首页
社区
课程
招聘
[原创]基于Linux6.12.32的程序启动流程分析
发表于: 2025-10-23 00:58 3105

[原创]基于Linux6.12.32的程序启动流程分析

2025-10-23 00:58
3105

Linux 主要可执行文件类型为 ELF

参考文章:

本文主要还是方便理解 Linux 在启动程序的时候大致发生了些什么,对 interpreter 加载所处的位置等有一些概念,受限于本人的水平,可能会有一些错误,主要还是以代码为主。

Linux中所有支持的可执行文件类型在内核中都有一个对应的linux_binfmt类型的对象。

所有的Linux_binfmt对象都保存在一个双向链表中,此链表第一个元素的地址保存在formats内核全局变量中

一般程序启动的流程就是调用fork() 复制一个当前进程的副本为新的进程,子进程fock()返回后会继续调用exec()进入到内核中.

Linux 6.12.32版本下

总的来说exec()作用就是清空新创建的进程的.text,.data,.bss段等,然后装载新进程并运行。

刷新原有的环境,设置一些标识位

当是ELF的第一个段加载的时候,会根据类型完成一次地址绑定

这个部分的代码主要是确定 load_bias ,后续的地址加载就是通过 load_bias + offset

就是没开 PIE 的可执行程序,直接根据 load_bias 为0计算即可

ET_DYN 类型

这样就会走 PIE 处理机制,实际上,ET_DYN 类型的 ELF 二进制文件分为两类:

加载器必须与程序分开加载,否则程序可能会与加载器发生地址冲突(尤其是对于没有随机化加载位置的 ET_EXEC 类型)。例如,为了测试新版本的加载器而执行 "./ld.so someprog" 时,

加载器后续加载的程序必须避开加载器自身的地址范围,因此它们不能共享相同的加载区间。

同时,必须为加载器分配足够的 brk 空间,因为加载器需要使用 brk 功能。

因此,当 ELF 为可执行程序而非 interpreter 的时候会从 ELF_ET_DYN_BASE 偏移处加载

而加载器则加载到独立随机化的 mmap 区域(加载偏移量为 0,不使用 MAP_FIXED 或 MAP_FIXED_NOREPLACE 标志)。

关于 "brk" 处理的详细信息见下文,其行为同样受程序 / 加载器类型及地址空间布局随机化(ASLR)的影响。

elf_ex->e_entry:ELF 文件中记录的程序入口点虚拟地址(即程序开始执行的第一条指令在文件中的相对地址)。

转换后:e_entry 变为程序入口点在内存中的实际地址(进程执行时,CPU 会从这个地址开始运行指令)。

phdr_addr += load_bias;

phdr_addr:之前计算的程序头表(PHDR)在 ELF 虚拟地址空间中的地址(见前文 “程序头表位置记录”)。

转换后:变为程序头表在内存中的实际地址,供动态链接器(如ld.so)实际访问时使用(动态链接器需要通过 PHDR 解析程序依赖)。

elf_brk += load_bias;

elf_brk:ELF 文件中记录的堆(heap)起始虚拟地址(所有段的结束地址,堆从这里开始向上增长)。

转换后:变为堆在内存中的实际起始地址,进程运行时通过brk/sbrk系统调用管理堆内存时,会基于这个地址操作。

start_code += load_bias;

start_code:ELF 文件中记录的代码段(可执行段)起始虚拟地址(所有可执行段的最小起始地址)。

转换后:变为代码段在内存中的实际起始地址,内核通过这个地址管理代码段的内存页(如设置可执行权限)。

end_code += load_bias;

end_code:ELF 文件中记录的代码段结束虚拟地址(所有可执行段的最大结束地址)。

转换后:变为代码段在内存中的实际结束地址,用于界定代码段的范围(与数据段区分)。

start_data += load_bias;

start_data:ELF 文件中记录的数据段起始虚拟地址(所有段的最大起始地址,通常在代码段之后)。

转换后:变为数据段在内存中的实际起始地址,内核通过这个地址管理数据段的内存页(如设置读写权限)。

end_data += load_bias;

end_data:ELF 文件中记录的数据段结束虚拟地址(所有段已初始化部分的最大结束地址)。

转换后:变为数据段在内存中的实际结束地址,用于界定数据段的范围。

主要是通过 create_elf_tables 将 ELF 和 Interpreter 可能需要的信息注入到栈中

将架构相关信息拷贝到 ELF 的栈上

执行完这段代码后,进程就完全变成了新程序,当从内核返回到用户态时,将直接从新程序的入口点开始执行。这是 execve() 系统调用中从旧进程到新进程的质变点。

/*
 * This structure defines the functions that are used to load the binary formats that
 * linux accepts.
 */
struct linux_binfmt {
        struct list_head lh;
        struct module *module;
        int (*load_binary)(struct linux_binprm *);
        int (*load_shlib)(struct file *);
#ifdef CONFIG_COREDUMP
        int (*core_dump)(struct coredump_params *cprm);
        unsigned long min_coredump;     /* minimal dump size */
#endif
} __randomize_layout;
/*
 * This structure defines the functions that are used to load the binary formats that
 * linux accepts.
 */
struct linux_binfmt {
        struct list_head lh;
        struct module *module;
        int (*load_binary)(struct linux_binprm *);
        int (*load_shlib)(struct file *);
#ifdef CONFIG_COREDUMP
        int (*core_dump)(struct coredump_params *cprm);
        unsigned long min_coredump;     /* minimal dump size */
#endif
} __randomize_layout;
static struct linux_binfmt elf_format = {
        .module         = THIS_MODULE,
        .load_binary    = load_elf_binary,
        .load_shlib     = load_elf_library,
#ifdef CONFIG_COREDUMP
        .core_dump      = elf_core_dump,
        .min_coredump   = ELF_EXEC_PAGESIZE,
#endif
};
static struct linux_binfmt elf_format = {
        .module         = THIS_MODULE,
        .load_binary    = load_elf_binary,
        .load_shlib     = load_elf_library,
#ifdef CONFIG_COREDUMP
        .core_dump      = elf_core_dump,
        .min_coredump   = ELF_EXEC_PAGESIZE,
#endif
};
/*
 * This structure is used to hold the arguments that are used when loading binaries.
 */
struct linux_binprm {
#ifdef CONFIG_MMU
        struct vm_area_struct *vma;
        unsigned long vma_pages;
        unsigned long argmin; /* rlimit marker for copy_strings() */
#else
# define MAX_ARG_PAGES  32
        struct page *page[MAX_ARG_PAGES];
#endif
        struct mm_struct *mm;
        unsigned long p; /* current top of mem */
        unsigned int
                /* Should an execfd be passed to userspace? */
                have_execfd:1,
 
                /* Use the creds of a script (see binfmt_misc) */
                execfd_creds:1,
                /*
                 * Set by bprm_creds_for_exec hook to indicate a
                 * privilege-gaining exec has happened. Used to set
                 * AT_SECURE auxv for glibc.
                 */
                secureexec:1,
                /*
                 * Set when errors can no longer be returned to the
                 * original userspace.
                 */
                point_of_no_return:1,
                /* Set when "comm" must come from the dentry. */
                comm_from_dentry:1;
        struct file *executable; /* Executable to pass to the interpreter */
        struct file *interpreter;
        struct file *file;
        struct cred *cred;      /* new credentials */
        int unsafe;             /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
        unsigned int per_clear; /* bits to clear in current->personality */
        int argc, envc;
        const char *filename;   /* Name of binary as seen by procps */
        const char *interp;     /* Name of the binary really executed. Most
                                   of the time same as filename, but could be
                                   different for binfmt_{misc,script} */
        const char *fdpath;     /* generated filename for execveat */
        unsigned interp_flags;
        int execfd;             /* File descriptor of the executable */
        unsigned long loader, exec;
 
        struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
 
        char buf[BINPRM_BUF_SIZE];
} __randomize_layout;
/*
 * This structure is used to hold the arguments that are used when loading binaries.
 */
struct linux_binprm {
#ifdef CONFIG_MMU
        struct vm_area_struct *vma;
        unsigned long vma_pages;
        unsigned long argmin; /* rlimit marker for copy_strings() */
#else
# define MAX_ARG_PAGES  32
        struct page *page[MAX_ARG_PAGES];
#endif
        struct mm_struct *mm;
        unsigned long p; /* current top of mem */
        unsigned int
                /* Should an execfd be passed to userspace? */
                have_execfd:1,
 
                /* Use the creds of a script (see binfmt_misc) */
                execfd_creds:1,
                /*
                 * Set by bprm_creds_for_exec hook to indicate a
                 * privilege-gaining exec has happened. Used to set
                 * AT_SECURE auxv for glibc.
                 */
                secureexec:1,
                /*
                 * Set when errors can no longer be returned to the
                 * original userspace.
                 */
                point_of_no_return:1,
                /* Set when "comm" must come from the dentry. */
                comm_from_dentry:1;
        struct file *executable; /* Executable to pass to the interpreter */
        struct file *interpreter;
        struct file *file;
        struct cred *cred;      /* new credentials */
        int unsafe;             /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
        unsigned int per_clear; /* bits to clear in current->personality */
        int argc, envc;
        const char *filename;   /* Name of binary as seen by procps */
        const char *interp;     /* Name of the binary really executed. Most
                                   of the time same as filename, but could be
                                   different for binfmt_{misc,script} */
        const char *fdpath;     /* generated filename for execveat */
        unsigned interp_flags;
        int execfd;             /* File descriptor of the executable */
        unsigned long loader, exec;
 
        struct rlimit rlim_stack; /* Saved RLIMIT_STACK used during exec. */
 
        char buf[BINPRM_BUF_SIZE];
} __randomize_layout;
do_execve() [入口]
    do_execveat_common() [准备环境]
        bprm_execve()
            exec_binprm()
                search_binary_handler()
                    load_libary() [加载 ELF 的核心逻辑]
         
        
do_execve() [入口]
    do_execveat_common() [准备环境]
        bprm_execve()
            exec_binprm()
                search_binary_handler()
                    load_libary() [加载 ELF 的核心逻辑]
         
        
static int do_execve(struct filename *filename,
        const char __user *const __user *__argv,
        const char __user *const __user *__envp)
{
        struct user_arg_ptr argv = { .ptr.native = __argv };
        struct user_arg_ptr envp = { .ptr.native = __envp };
        return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}
static int do_execveat_common(int fd, struct filename *filename,
                              struct user_arg_ptr argv,
                              struct user_arg_ptr envp,
                              int flags)
{
        struct linux_binprm *bprm;
        int retval;
        /*
                参数检查
        */
        if (IS_ERR(filename))
                return PTR_ERR(filename);
 
        /*
         * We move the actual failure in case of RLIMIT_NPROC excess from
         * set*uid() to execve() because too many poorly written programs
         * don't check setuid() return code.  Here we additionally recheck
         * whether NPROC limit is still exceeded.
         */
        if ((current->flags & PF_NPROC_EXCEEDED) &&
            is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
                retval = -EAGAIN;
                goto out_ret;
        }
 
        /* We're below the limit (still or again), so we don't want to make
         * further execve() calls fail. */
        current->flags &= ~PF_NPROC_EXCEEDED;
 
        bprm = alloc_bprm(fd, filename, flags);
        if (IS_ERR(bprm)) {
                retval = PTR_ERR(bprm);
                goto out_ret;
        }
        /*
                参数和环境变量计数
        */
        retval = count(argv, MAX_ARG_STRINGS);
        if (retval == 0)
                pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
                             current->comm, bprm->filename);
        if (retval < 0)
                goto out_free;
        bprm->argc = retval;
 
        retval = count(envp, MAX_ARG_STRINGS);
        if (retval < 0)
                goto out_free;
        bprm->envc = retval;
 
        /*
                栈限制检查和字符串拷贝    
        */
        retval = bprm_stack_limits(bprm);
        if (retval < 0)
                goto out_free;
 
        retval = copy_string_kernel(bprm->filename, bprm);
        if (retval < 0)
                goto out_free;
        bprm->exec = bprm->p;
 
        retval = copy_strings(bprm->envc, envp, bprm);
        if (retval < 0)
                goto out_free;
 
        retval = copy_strings(bprm->argc, argv, bprm);
        if (retval < 0)
                goto out_free;
 
        /*
         * When argv is empty, add an empty string ("") as argv[0] to
         * ensure confused userspace programs that start processing
         * from argv[1] won't end up walking envp. See also
         * bprm_stack_limits().
         */
        if (bprm->argc == 0) {
                retval = copy_string_kernel("", bprm);
                if (retval < 0)
                        goto out_free;
                bprm->argc = 1;
        }
        /*
                执行程序
        */
        retval = bprm_execve(bprm);
out_free:
        free_bprm(bprm);
 
out_ret:
        putname(filename);
        return retval;
}
static int do_execve(struct filename *filename,
        const char __user *const __user *__argv,
        const char __user *const __user *__envp)
{
        struct user_arg_ptr argv = { .ptr.native = __argv };
        struct user_arg_ptr envp = { .ptr.native = __envp };
        return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
}
static int do_execveat_common(int fd, struct filename *filename,
                              struct user_arg_ptr argv,
                              struct user_arg_ptr envp,
                              int flags)
{
        struct linux_binprm *bprm;
        int retval;
        /*
                参数检查
        */
        if (IS_ERR(filename))
                return PTR_ERR(filename);
 
        /*
         * We move the actual failure in case of RLIMIT_NPROC excess from
         * set*uid() to execve() because too many poorly written programs
         * don't check setuid() return code.  Here we additionally recheck
         * whether NPROC limit is still exceeded.
         */
        if ((current->flags & PF_NPROC_EXCEEDED) &&
            is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {
                retval = -EAGAIN;
                goto out_ret;
        }
 
        /* We're below the limit (still or again), so we don't want to make
         * further execve() calls fail. */
        current->flags &= ~PF_NPROC_EXCEEDED;
 
        bprm = alloc_bprm(fd, filename, flags);
        if (IS_ERR(bprm)) {
                retval = PTR_ERR(bprm);
                goto out_ret;
        }
        /*
                参数和环境变量计数
        */
        retval = count(argv, MAX_ARG_STRINGS);
        if (retval == 0)
                pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n",
                             current->comm, bprm->filename);
        if (retval < 0)
                goto out_free;
        bprm->argc = retval;
 
        retval = count(envp, MAX_ARG_STRINGS);
        if (retval < 0)
                goto out_free;
        bprm->envc = retval;
 
        /*
                栈限制检查和字符串拷贝    
        */
        retval = bprm_stack_limits(bprm);
        if (retval < 0)
                goto out_free;
 
        retval = copy_string_kernel(bprm->filename, bprm);
        if (retval < 0)
                goto out_free;
        bprm->exec = bprm->p;
 
        retval = copy_strings(bprm->envc, envp, bprm);
        if (retval < 0)
                goto out_free;
 
        retval = copy_strings(bprm->argc, argv, bprm);
        if (retval < 0)
                goto out_free;
 
        /*
         * When argv is empty, add an empty string ("") as argv[0] to
         * ensure confused userspace programs that start processing
         * from argv[1] won't end up walking envp. See also
         * bprm_stack_limits().
         */
        if (bprm->argc == 0) {
                retval = copy_string_kernel("", bprm);
                if (retval < 0)
                        goto out_free;
                bprm->argc = 1;
        }
        /*
                执行程序
        */
        retval = bprm_execve(bprm);
out_free:
        free_bprm(bprm);
 
out_ret:
        putname(filename);
        return retval;
}
static int load_elf_binary(struct linux_binprm *bprm)
{
        struct file *interpreter = NULL; /* to shut gcc up */
        unsigned long load_bias = 0, phdr_addr = 0;
        int first_pt_load = 1;
        unsigned long error;
        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
        struct elf_phdr *elf_property_phdata = NULL;
        unsigned long elf_brk;
        bool brk_moved = false;
        int retval, i;
        unsigned long elf_entry;
        unsigned long e_entry;
        unsigned long interp_load_addr = 0;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long reloc_func_desc __maybe_unused = 0;
        int executable_stack = EXSTACK_DEFAULT;
        struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
        struct elfhdr *interp_elf_ex = NULL;
        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
        struct mm_struct *mm;
        struct pt_regs *regs;
 
        retval = -ENOEXEC;
        /* First of all, some simple consistency checks */
        if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
                goto out;
 
        if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
                goto out;
        if (!elf_check_arch(elf_ex))
                goto out;
        if (elf_check_fdpic(elf_ex))
                goto out;
        if (!bprm->file->f_op->mmap)
                goto out;
 
        elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
        if (!elf_phdata)
                goto out;
 
        elf_ppnt = elf_phdata;
        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
                char *elf_interpreter;
 
                if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
                        elf_property_phdata = elf_ppnt;
                        continue;
                }
 
                if (elf_ppnt->p_type != PT_INTERP)
                        continue;
 
                /*
                 * This is the program interpreter used for shared libraries -
                 * for now assume that this is an a.out format binary.
                 */
                retval = -ENOEXEC;
                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
                        goto out_free_ph;
 
                retval = -ENOMEM;
                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
                if (!elf_interpreter)
                        goto out_free_ph;
 
                retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
                                  elf_ppnt->p_offset);
                if (retval < 0)
                        goto out_free_interp;
                /* make sure path is NULL terminated */
                retval = -ENOEXEC;
                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
                        goto out_free_interp;
 
                interpreter = open_exec(elf_interpreter);
                kfree(elf_interpreter);
                retval = PTR_ERR(interpreter);
                if (IS_ERR(interpreter))
                        goto out_free_ph;
 
                /*
                 * If the binary is not readable then enforce mm->dumpable = 0
                 * regardless of the interpreter's permissions.
                 */
                would_dump(bprm, interpreter);
 
                interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
                if (!interp_elf_ex) {
                        retval = -ENOMEM;
                        goto out_free_file;
                }
 
                /* Get the exec headers */
                retval = elf_read(interpreter, interp_elf_ex,
                                  sizeof(*interp_elf_ex), 0);
                if (retval < 0)
                        goto out_free_dentry;
 
                break;
 
out_free_interp:
                kfree(elf_interpreter);
                goto out_free_ph;
        }
 
        elf_ppnt = elf_phdata;
        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
                switch (elf_ppnt->p_type) {
                case PT_GNU_STACK:
                        if (elf_ppnt->p_flags & PF_X)
                                executable_stack = EXSTACK_ENABLE_X;
                        else
                                executable_stack = EXSTACK_DISABLE_X;
                        break;
 
                case PT_LOPROC ... PT_HIPROC:
                        retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
                                                  bprm->file, false,
                                                  &arch_state);
                        if (retval)
                                goto out_free_dentry;
                        break;
                }
 
        /* Some simple consistency checks for the interpreter */
        if (interpreter) {
                retval = -ELIBBAD;
                /* Not an ELF interpreter */
                if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
                        goto out_free_dentry;
                /* Verify the interpreter has a valid arch */
                if (!elf_check_arch(interp_elf_ex) ||
                    elf_check_fdpic(interp_elf_ex))
                        goto out_free_dentry;
 
                /* Load the interpreter program headers */
                interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
                                                   interpreter);
                if (!interp_elf_phdata)
                        goto out_free_dentry;
 
                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
                elf_property_phdata = NULL;
                elf_ppnt = interp_elf_phdata;
                for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
                        switch (elf_ppnt->p_type) {
                        case PT_GNU_PROPERTY:
                                elf_property_phdata = elf_ppnt;
                                break;
 
                        case PT_LOPROC ... PT_HIPROC:
                                retval = arch_elf_pt_proc(interp_elf_ex,
                                                          elf_ppnt, interpreter,
                                                          true, &arch_state);
                                if (retval)
                                        goto out_free_dentry;
                                break;
                        }
        }
 
        retval = parse_elf_properties(interpreter ?: bprm->file,
                                      elf_property_phdata, &arch_state);
        if (retval)
                goto out_free_dentry;
 
        /*
         * Allow arch code to reject the ELF at this point, whilst it's
         * still possible to return an error to the code that invoked
         * the exec syscall.
         */
        retval = arch_check_elf(elf_ex,
                                !!interpreter, interp_elf_ex,
                                &arch_state);
        if (retval)
                goto out_free_dentry;
 
        /* Flush all traces of the currently running executable */
        retval = begin_new_exec(bprm);
        if (retval)
                goto out_free_dentry;
 
        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
           may depend on the personality.  */
        SET_PERSONALITY2(*elf_ex, &arch_state);
        if (elf_read_implies_exec(*elf_ex, executable_stack))
                current->personality |= READ_IMPLIES_EXEC;
 
        const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
        if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
                current->flags |= PF_RANDOMIZE;
 
        setup_new_exec(bprm);
 
        /* Do this so that we can load the interpreter, if need be.  We will
           change some of these later */
        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
                                 executable_stack);
        if (retval < 0)
                goto out_free_dentry;
 
        elf_brk = 0;
 
        start_code = ~0UL;
        end_code = 0;
        start_data = 0;
        end_data = 0;
 
        /* Now we do a little grungy work by mmapping the ELF image into
           the correct location in memory. */
        for(i = 0, elf_ppnt = elf_phdata;
            i < elf_ex->e_phnum; i++, elf_ppnt++) {
                int elf_prot, elf_flags;
                unsigned long k, vaddr;
                unsigned long total_size = 0;
                unsigned long alignment;
 
                if (elf_ppnt->p_type != PT_LOAD)
                        continue;
 
                elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
                                     !!interpreter, false);
 
                elf_flags = MAP_PRIVATE;
 
                vaddr = elf_ppnt->p_vaddr;
                /*
                 * The first time through the loop, first_pt_load is true:
                 * layout will be calculated. Once set, use MAP_FIXED since
                 * we know we've already safely mapped the entire region with
                 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
                 */
                if (!first_pt_load) {
                        elf_flags |= MAP_FIXED;
                } else if (elf_ex->e_type == ET_EXEC) {
                        /*
                         * This logic is run once for the first LOAD Program
                         * Header for ET_EXEC binaries. No special handling
                         * is needed.
                         */
                        elf_flags |= MAP_FIXED_NOREPLACE;
                } else if (elf_ex->e_type == ET_DYN) {
                        /*
                         * This logic is run once for the first LOAD Program
                         * Header for ET_DYN binaries to calculate the
                         * randomization (load_bias) for all the LOAD
                         * Program Headers.
                         */
 
                        /*
                         * Calculate the entire size of the ELF mapping
                         * (total_size), used for the initial mapping,
                         * due to load_addr_set which is set to true later
                         * once the initial mapping is performed.
                         *
                         * Note that this is only sensible when the LOAD
                         * segments are contiguous (or overlapping). If
                         * used for LOADs that are far apart, this would
                         * cause the holes between LOADs to be mapped,
                         * running the risk of having the mapping fail,
                         * as it would be larger than the ELF file itself.
                         *
                         * As a result, only ET_DYN does this, since
                         * some ET_EXEC (e.g. ia64) may have large virtual
                         * memory holes between LOADs.
                         *
                         */
                        total_size = total_mapping_size(elf_phdata,
                                                        elf_ex->e_phnum);
                        if (!total_size) {
                                retval = -EINVAL;
                                goto out_free_dentry;
                        }
 
                        /* Calculate any requested alignment. */
                        alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
 
                        /**
                         * DOC: PIE handling
                         *
                         * There are effectively two types of ET_DYN ELF
                         * binaries: programs (i.e. PIE: ET_DYN with
                         * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
                         * without PT_INTERP, usually the ELF interpreter
                         * itself). Loaders must be loaded away from programs
                         * since the program may otherwise collide with the
                         * loader (especially for ET_EXEC which does not have
                         * a randomized position).
                         *
                         * For example, to handle invocations of
                         * "./ld.so someprog" to test out a new version of
                         * the loader, the subsequent program that the
                         * loader loads must avoid the loader itself, so
                         * they cannot share the same load range. Sufficient
                         * room for the brk must be allocated with the
                         * loader as well, since brk must be available with
                         * the loader.
                         *
                         * Therefore, programs are loaded offset from
                         * ELF_ET_DYN_BASE and loaders are loaded into the
                         * independently randomized mmap region (0 load_bias
                         * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
                         *
                         * See below for "brk" handling details, which is
                         * also affected by program vs loader and ASLR.
                         */
                        if (interpreter) {
                                /* On ET_DYN with PT_INTERP, we do the ASLR. */
                                load_bias = ELF_ET_DYN_BASE;
                                if (current->flags & PF_RANDOMIZE)
                                        load_bias += arch_mmap_rnd();
                                /* Adjust alignment as requested. */
                                if (alignment)
                                        load_bias &= ~(alignment - 1);
                                elf_flags |= MAP_FIXED_NOREPLACE;
                        } else {
                                /*
                                 * For ET_DYN without PT_INTERP, we rely on
                                 * the architectures's (potentially ASLR) mmap
                                 * base address (via a load_bias of 0).
                                 *
                                 * When a large alignment is requested, we
                                 * must do the allocation at address "0" right
                                 * now to discover where things will load so
                                 * that we can adjust the resulting alignment.
                                 * In this case (load_bias != 0), we can use
                                 * MAP_FIXED_NOREPLACE to make sure the mapping
                                 * doesn't collide with anything.
                                 */
                                if (alignment > ELF_MIN_ALIGN) {
                                        load_bias = elf_load(bprm->file, 0, elf_ppnt,
                                                             elf_prot, elf_flags, total_size);
                                        if (BAD_ADDR(load_bias)) {
                                                retval = IS_ERR_VALUE(load_bias) ?
                                                         PTR_ERR((void*)load_bias) : -EINVAL;
                                                goto out_free_dentry;
                                        }
                                        vm_munmap(load_bias, total_size);
                                        /* Adjust alignment as requested. */
                                        if (alignment)
                                                load_bias &= ~(alignment - 1);
                                        elf_flags |= MAP_FIXED_NOREPLACE;
                                } else
                                        load_bias = 0;
                        }
 
                        /*
                         * Since load_bias is used for all subsequent loading
                         * calculations, we must lower it by the first vaddr
                         * so that the remaining calculations based on the
                         * ELF vaddrs will be correctly offset. The result
                         * is then page aligned.
                         */
                        load_bias = ELF_PAGESTART(load_bias - vaddr);
                }
 
                error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
                                elf_prot, elf_flags, total_size);
                if (BAD_ADDR(error)) {
                        retval = IS_ERR_VALUE(error) ?
                                PTR_ERR((void*)error) : -EINVAL;
                        goto out_free_dentry;
                }
 
                if (first_pt_load) {
                        first_pt_load = 0;
                        if (elf_ex->e_type == ET_DYN) {
                                load_bias += error -
                                             ELF_PAGESTART(load_bias + vaddr);
                                reloc_func_desc = load_bias;
                        }
                }
 
                /*
                 * Figure out which segment in the file contains the Program
                 * Header table, and map to the associated memory address.
                 */
                if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
                    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
                        phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
                                    elf_ppnt->p_vaddr;
                }
 
                k = elf_ppnt->p_vaddr;
                if ((elf_ppnt->p_flags & PF_X) && k < start_code)
                        start_code = k;
                if (start_data < k)
                        start_data = k;
 
                /*
                 * Check to see if the section's size will overflow the
                 * allowed task size. Note that p_filesz must always be
                 * <= p_memsz so it is only necessary to check p_memsz.
                 */
                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
                    elf_ppnt->p_memsz > TASK_SIZE ||
                    TASK_SIZE - elf_ppnt->p_memsz < k) {
                        /* set_brk can never work. Avoid overflows. */
                        retval = -EINVAL;
                        goto out_free_dentry;
                }
 
                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 
                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
                        end_code = k;
                if (end_data < k)
                        end_data = k;
                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
                if (k > elf_brk)
                        elf_brk = k;
        }
 
        e_entry = elf_ex->e_entry + load_bias;
        phdr_addr += load_bias;
        elf_brk += load_bias;
        start_code += load_bias;
        end_code += load_bias;
        start_data += load_bias;
        end_data += load_bias;
 
        if (interpreter) {
                elf_entry = load_elf_interp(interp_elf_ex,
                                            interpreter,
                                            load_bias, interp_elf_phdata,
                                            &arch_state);
                if (!IS_ERR_VALUE(elf_entry)) {
                        /*
                         * load_elf_interp() returns relocation
                         * adjustment
                         */
                        interp_load_addr = elf_entry;
                        elf_entry += interp_elf_ex->e_entry;
                }
                if (BAD_ADDR(elf_entry)) {
                        retval = IS_ERR_VALUE(elf_entry) ?
                                        (int)elf_entry : -EINVAL;
                        goto out_free_dentry;
                }
                reloc_func_desc = interp_load_addr;
 
                allow_write_access(interpreter);
                fput(interpreter);
 
                kfree(interp_elf_ex);
                kfree(interp_elf_phdata);
        } else {
                elf_entry = e_entry;
                if (BAD_ADDR(elf_entry)) {
                        retval = -EINVAL;
                        goto out_free_dentry;
                }
        }
 
        kfree(elf_phdata);
 
        set_binfmt(&elf_format);
 
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
        retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
        if (retval < 0)
                goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
        retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
                                   e_entry, phdr_addr);
        if (retval < 0)
                goto out;
 
        mm = current->mm;
        mm->end_code = end_code;
        mm->start_code = start_code;
        mm->start_data = start_data;
        mm->end_data = end_data;
        mm->start_stack = bprm->p;
 
        /**
         * DOC: "brk" handling
         *
         * For architectures with ELF randomization, when executing a
         * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
         * move the brk area out of the mmap region and into the unused
         * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
         * early with the stack growing down or other regions being put
         * into the mmap region by the kernel (e.g. vdso).
         *
         * In the CONFIG_COMPAT_BRK case, though, everything is turned
         * off because we're not allowed to move the brk at all.
         */
        if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
            IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
            elf_ex->e_type == ET_DYN && !interpreter) {
                elf_brk = ELF_ET_DYN_BASE;
                /* This counts as moving the brk, so let brk(2) know. */
                brk_moved = true;
        }
        mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
 
        if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
                /*
                 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
                 * leave a gap between .bss and brk.
                 */
                if (!brk_moved)
                        mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
 
                mm->brk = mm->start_brk = arch_randomize_brk(mm);
                brk_moved = true;
        }
 
#ifdef compat_brk_randomized
        if (brk_moved)
                current->brk_randomized = 1;
#endif
 
        if (current->personality & MMAP_PAGE_ZERO) {
                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
                   and some applications "depend" upon this behavior.
                   Since we do not have the power to recompile these, we
                   emulate the SVr4 behavior. Sigh. */
                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
                                MAP_FIXED | MAP_PRIVATE, 0);
 
                retval = do_mseal(0, PAGE_SIZE, 0);
                if (retval)
                        pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
                                            task_pid_nr(current), retval);
        }
 
        regs = current_pt_regs();
#ifdef ELF_PLAT_INIT
        /*
         * The ABI may specify that certain registers be set up in special
         * ways (on i386 %edx is the address of a DT_FINI function, for
         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
         * that the e_entry field is the address of the function descriptor
         * for the startup routine, rather than the address of the startup
         * routine itself.  This macro performs whatever initialization to
         * the regs structure is required as well as any relocations to the
         * function descriptor entries when executing dynamically links apps.
         */
        ELF_PLAT_INIT(regs, reloc_func_desc);
#endif
 
        finalize_exec(bprm);
        START_THREAD(elf_ex, regs, elf_entry, bprm->p);
        retval = 0;
out:
        return retval;
 
        /* error cleanup */
out_free_dentry:
        kfree(interp_elf_ex);
        kfree(interp_elf_phdata);
out_free_file:
        allow_write_access(interpreter);
        if (interpreter)
                fput(interpreter);
out_free_ph:
        kfree(elf_phdata);
        goto out;
}
static int load_elf_binary(struct linux_binprm *bprm)
{
        struct file *interpreter = NULL; /* to shut gcc up */
        unsigned long load_bias = 0, phdr_addr = 0;
        int first_pt_load = 1;
        unsigned long error;
        struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
        struct elf_phdr *elf_property_phdata = NULL;
        unsigned long elf_brk;
        bool brk_moved = false;
        int retval, i;
        unsigned long elf_entry;
        unsigned long e_entry;
        unsigned long interp_load_addr = 0;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long reloc_func_desc __maybe_unused = 0;
        int executable_stack = EXSTACK_DEFAULT;
        struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
        struct elfhdr *interp_elf_ex = NULL;
        struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
        struct mm_struct *mm;
        struct pt_regs *regs;
 
        retval = -ENOEXEC;
        /* First of all, some simple consistency checks */
        if (memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
                goto out;
 
        if (elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
                goto out;
        if (!elf_check_arch(elf_ex))
                goto out;
        if (elf_check_fdpic(elf_ex))
                goto out;
        if (!bprm->file->f_op->mmap)
                goto out;
 
        elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
        if (!elf_phdata)
                goto out;
 
        elf_ppnt = elf_phdata;
        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
                char *elf_interpreter;
 
                if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
                        elf_property_phdata = elf_ppnt;
                        continue;
                }
 
                if (elf_ppnt->p_type != PT_INTERP)
                        continue;
 
                /*
                 * This is the program interpreter used for shared libraries -
                 * for now assume that this is an a.out format binary.
                 */
                retval = -ENOEXEC;
                if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
                        goto out_free_ph;
 
                retval = -ENOMEM;
                elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
                if (!elf_interpreter)
                        goto out_free_ph;
 
                retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
                                  elf_ppnt->p_offset);
                if (retval < 0)
                        goto out_free_interp;
                /* make sure path is NULL terminated */
                retval = -ENOEXEC;
                if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
                        goto out_free_interp;
 
                interpreter = open_exec(elf_interpreter);
                kfree(elf_interpreter);
                retval = PTR_ERR(interpreter);
                if (IS_ERR(interpreter))
                        goto out_free_ph;
 
                /*
                 * If the binary is not readable then enforce mm->dumpable = 0
                 * regardless of the interpreter's permissions.
                 */
                would_dump(bprm, interpreter);
 
                interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
                if (!interp_elf_ex) {
                        retval = -ENOMEM;
                        goto out_free_file;
                }
 
                /* Get the exec headers */
                retval = elf_read(interpreter, interp_elf_ex,
                                  sizeof(*interp_elf_ex), 0);
                if (retval < 0)
                        goto out_free_dentry;
 
                break;
 
out_free_interp:
                kfree(elf_interpreter);
                goto out_free_ph;
        }
 
        elf_ppnt = elf_phdata;
        for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++)
                switch (elf_ppnt->p_type) {
                case PT_GNU_STACK:
                        if (elf_ppnt->p_flags & PF_X)
                                executable_stack = EXSTACK_ENABLE_X;
                        else
                                executable_stack = EXSTACK_DISABLE_X;
                        break;
 
                case PT_LOPROC ... PT_HIPROC:
                        retval = arch_elf_pt_proc(elf_ex, elf_ppnt,
                                                  bprm->file, false,
                                                  &arch_state);
                        if (retval)
                                goto out_free_dentry;
                        break;
                }
 
        /* Some simple consistency checks for the interpreter */
        if (interpreter) {
                retval = -ELIBBAD;
                /* Not an ELF interpreter */
                if (memcmp(interp_elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
                        goto out_free_dentry;
                /* Verify the interpreter has a valid arch */
                if (!elf_check_arch(interp_elf_ex) ||
                    elf_check_fdpic(interp_elf_ex))
                        goto out_free_dentry;
 
                /* Load the interpreter program headers */
                interp_elf_phdata = load_elf_phdrs(interp_elf_ex,
                                                   interpreter);
                if (!interp_elf_phdata)
                        goto out_free_dentry;
 
                /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
                elf_property_phdata = NULL;
                elf_ppnt = interp_elf_phdata;
                for (i = 0; i < interp_elf_ex->e_phnum; i++, elf_ppnt++)
                        switch (elf_ppnt->p_type) {
                        case PT_GNU_PROPERTY:
                                elf_property_phdata = elf_ppnt;
                                break;
 
                        case PT_LOPROC ... PT_HIPROC:
                                retval = arch_elf_pt_proc(interp_elf_ex,
                                                          elf_ppnt, interpreter,
                                                          true, &arch_state);
                                if (retval)
                                        goto out_free_dentry;
                                break;
                        }
        }
 
        retval = parse_elf_properties(interpreter ?: bprm->file,
                                      elf_property_phdata, &arch_state);
        if (retval)
                goto out_free_dentry;
 
        /*
         * Allow arch code to reject the ELF at this point, whilst it's
         * still possible to return an error to the code that invoked
         * the exec syscall.
         */
        retval = arch_check_elf(elf_ex,
                                !!interpreter, interp_elf_ex,
                                &arch_state);
        if (retval)
                goto out_free_dentry;
 
        /* Flush all traces of the currently running executable */
        retval = begin_new_exec(bprm);
        if (retval)
                goto out_free_dentry;
 
        /* Do this immediately, since STACK_TOP as used in setup_arg_pages
           may depend on the personality.  */
        SET_PERSONALITY2(*elf_ex, &arch_state);
        if (elf_read_implies_exec(*elf_ex, executable_stack))
                current->personality |= READ_IMPLIES_EXEC;
 
        const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space);
        if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space)
                current->flags |= PF_RANDOMIZE;
 
        setup_new_exec(bprm);
 
        /* Do this so that we can load the interpreter, if need be.  We will
           change some of these later */
        retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
                                 executable_stack);
        if (retval < 0)
                goto out_free_dentry;
 
        elf_brk = 0;
 
        start_code = ~0UL;
        end_code = 0;
        start_data = 0;
        end_data = 0;
 
        /* Now we do a little grungy work by mmapping the ELF image into
           the correct location in memory. */
        for(i = 0, elf_ppnt = elf_phdata;
            i < elf_ex->e_phnum; i++, elf_ppnt++) {
                int elf_prot, elf_flags;
                unsigned long k, vaddr;
                unsigned long total_size = 0;
                unsigned long alignment;
 
                if (elf_ppnt->p_type != PT_LOAD)
                        continue;
 
                elf_prot = make_prot(elf_ppnt->p_flags, &arch_state,
                                     !!interpreter, false);
 
                elf_flags = MAP_PRIVATE;
 
                vaddr = elf_ppnt->p_vaddr;
                /*
                 * The first time through the loop, first_pt_load is true:
                 * layout will be calculated. Once set, use MAP_FIXED since
                 * we know we've already safely mapped the entire region with
                 * MAP_FIXED_NOREPLACE in the once-per-binary logic following.
                 */
                if (!first_pt_load) {
                        elf_flags |= MAP_FIXED;
                } else if (elf_ex->e_type == ET_EXEC) {
                        /*
                         * This logic is run once for the first LOAD Program
                         * Header for ET_EXEC binaries. No special handling
                         * is needed.
                         */
                        elf_flags |= MAP_FIXED_NOREPLACE;
                } else if (elf_ex->e_type == ET_DYN) {
                        /*
                         * This logic is run once for the first LOAD Program
                         * Header for ET_DYN binaries to calculate the
                         * randomization (load_bias) for all the LOAD
                         * Program Headers.
                         */
 
                        /*
                         * Calculate the entire size of the ELF mapping
                         * (total_size), used for the initial mapping,
                         * due to load_addr_set which is set to true later
                         * once the initial mapping is performed.
                         *
                         * Note that this is only sensible when the LOAD
                         * segments are contiguous (or overlapping). If
                         * used for LOADs that are far apart, this would
                         * cause the holes between LOADs to be mapped,
                         * running the risk of having the mapping fail,
                         * as it would be larger than the ELF file itself.
                         *
                         * As a result, only ET_DYN does this, since
                         * some ET_EXEC (e.g. ia64) may have large virtual
                         * memory holes between LOADs.
                         *
                         */
                        total_size = total_mapping_size(elf_phdata,
                                                        elf_ex->e_phnum);
                        if (!total_size) {
                                retval = -EINVAL;
                                goto out_free_dentry;
                        }
 
                        /* Calculate any requested alignment. */
                        alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
 
                        /**
                         * DOC: PIE handling
                         *
                         * There are effectively two types of ET_DYN ELF
                         * binaries: programs (i.e. PIE: ET_DYN with
                         * PT_INTERP) and loaders (i.e. static PIE: ET_DYN
                         * without PT_INTERP, usually the ELF interpreter
                         * itself). Loaders must be loaded away from programs
                         * since the program may otherwise collide with the
                         * loader (especially for ET_EXEC which does not have
                         * a randomized position).
                         *
                         * For example, to handle invocations of
                         * "./ld.so someprog" to test out a new version of
                         * the loader, the subsequent program that the
                         * loader loads must avoid the loader itself, so
                         * they cannot share the same load range. Sufficient
                         * room for the brk must be allocated with the
                         * loader as well, since brk must be available with
                         * the loader.
                         *
                         * Therefore, programs are loaded offset from
                         * ELF_ET_DYN_BASE and loaders are loaded into the
                         * independently randomized mmap region (0 load_bias
                         * without MAP_FIXED nor MAP_FIXED_NOREPLACE).
                         *
                         * See below for "brk" handling details, which is
                         * also affected by program vs loader and ASLR.
                         */
                        if (interpreter) {
                                /* On ET_DYN with PT_INTERP, we do the ASLR. */
                                load_bias = ELF_ET_DYN_BASE;
                                if (current->flags & PF_RANDOMIZE)
                                        load_bias += arch_mmap_rnd();
                                /* Adjust alignment as requested. */
                                if (alignment)
                                        load_bias &= ~(alignment - 1);
                                elf_flags |= MAP_FIXED_NOREPLACE;
                        } else {
                                /*
                                 * For ET_DYN without PT_INTERP, we rely on
                                 * the architectures's (potentially ASLR) mmap
                                 * base address (via a load_bias of 0).
                                 *
                                 * When a large alignment is requested, we
                                 * must do the allocation at address "0" right
                                 * now to discover where things will load so
                                 * that we can adjust the resulting alignment.
                                 * In this case (load_bias != 0), we can use
                                 * MAP_FIXED_NOREPLACE to make sure the mapping
                                 * doesn't collide with anything.
                                 */
                                if (alignment > ELF_MIN_ALIGN) {
                                        load_bias = elf_load(bprm->file, 0, elf_ppnt,
                                                             elf_prot, elf_flags, total_size);
                                        if (BAD_ADDR(load_bias)) {
                                                retval = IS_ERR_VALUE(load_bias) ?
                                                         PTR_ERR((void*)load_bias) : -EINVAL;
                                                goto out_free_dentry;
                                        }
                                        vm_munmap(load_bias, total_size);
                                        /* Adjust alignment as requested. */
                                        if (alignment)
                                                load_bias &= ~(alignment - 1);
                                        elf_flags |= MAP_FIXED_NOREPLACE;
                                } else
                                        load_bias = 0;
                        }
 
                        /*
                         * Since load_bias is used for all subsequent loading
                         * calculations, we must lower it by the first vaddr
                         * so that the remaining calculations based on the
                         * ELF vaddrs will be correctly offset. The result
                         * is then page aligned.
                         */
                        load_bias = ELF_PAGESTART(load_bias - vaddr);
                }
 
                error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt,
                                elf_prot, elf_flags, total_size);
                if (BAD_ADDR(error)) {
                        retval = IS_ERR_VALUE(error) ?
                                PTR_ERR((void*)error) : -EINVAL;
                        goto out_free_dentry;
                }
 
                if (first_pt_load) {
                        first_pt_load = 0;
                        if (elf_ex->e_type == ET_DYN) {
                                load_bias += error -
                                             ELF_PAGESTART(load_bias + vaddr);
                                reloc_func_desc = load_bias;
                        }
                }
 
                /*
                 * Figure out which segment in the file contains the Program
                 * Header table, and map to the associated memory address.
                 */
                if (elf_ppnt->p_offset <= elf_ex->e_phoff &&
                    elf_ex->e_phoff < elf_ppnt->p_offset + elf_ppnt->p_filesz) {
                        phdr_addr = elf_ex->e_phoff - elf_ppnt->p_offset +
                                    elf_ppnt->p_vaddr;
                }
 
                k = elf_ppnt->p_vaddr;
                if ((elf_ppnt->p_flags & PF_X) && k < start_code)
                        start_code = k;
                if (start_data < k)
                        start_data = k;
 
                /*
                 * Check to see if the section's size will overflow the
                 * allowed task size. Note that p_filesz must always be
                 * <= p_memsz so it is only necessary to check p_memsz.
                 */
                if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
                    elf_ppnt->p_memsz > TASK_SIZE ||
                    TASK_SIZE - elf_ppnt->p_memsz < k) {
                        /* set_brk can never work. Avoid overflows. */
                        retval = -EINVAL;
                        goto out_free_dentry;
                }
 
                k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
 
                if ((elf_ppnt->p_flags & PF_X) && end_code < k)
                        end_code = k;
                if (end_data < k)
                        end_data = k;
                k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
                if (k > elf_brk)
                        elf_brk = k;
        }
 
        e_entry = elf_ex->e_entry + load_bias;
        phdr_addr += load_bias;
        elf_brk += load_bias;
        start_code += load_bias;
        end_code += load_bias;
        start_data += load_bias;
        end_data += load_bias;
 
        if (interpreter) {
                elf_entry = load_elf_interp(interp_elf_ex,
                                            interpreter,
                                            load_bias, interp_elf_phdata,
                                            &arch_state);
                if (!IS_ERR_VALUE(elf_entry)) {
                        /*
                         * load_elf_interp() returns relocation
                         * adjustment
                         */
                        interp_load_addr = elf_entry;
                        elf_entry += interp_elf_ex->e_entry;
                }
                if (BAD_ADDR(elf_entry)) {
                        retval = IS_ERR_VALUE(elf_entry) ?
                                        (int)elf_entry : -EINVAL;
                        goto out_free_dentry;
                }
                reloc_func_desc = interp_load_addr;
 
                allow_write_access(interpreter);
                fput(interpreter);
 
                kfree(interp_elf_ex);
                kfree(interp_elf_phdata);
        } else {
                elf_entry = e_entry;
                if (BAD_ADDR(elf_entry)) {
                        retval = -EINVAL;
                        goto out_free_dentry;
                }
        }
 
        kfree(elf_phdata);
 
        set_binfmt(&elf_format);
 
#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
        retval = ARCH_SETUP_ADDITIONAL_PAGES(bprm, elf_ex, !!interpreter);
        if (retval < 0)
                goto out;
#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
 
        retval = create_elf_tables(bprm, elf_ex, interp_load_addr,
                                   e_entry, phdr_addr);
        if (retval < 0)
                goto out;
 
        mm = current->mm;
        mm->end_code = end_code;
        mm->start_code = start_code;
        mm->start_data = start_data;
        mm->end_data = end_data;
        mm->start_stack = bprm->p;
 
        /**
         * DOC: "brk" handling
         *
         * For architectures with ELF randomization, when executing a
         * loader directly (i.e. static PIE: ET_DYN without PT_INTERP),
         * move the brk area out of the mmap region and into the unused
         * ELF_ET_DYN_BASE region. Since "brk" grows up it may collide
         * early with the stack growing down or other regions being put
         * into the mmap region by the kernel (e.g. vdso).
         *
         * In the CONFIG_COMPAT_BRK case, though, everything is turned
         * off because we're not allowed to move the brk at all.
         */
        if (!IS_ENABLED(CONFIG_COMPAT_BRK) &&
            IS_ENABLED(CONFIG_ARCH_HAS_ELF_RANDOMIZE) &&
            elf_ex->e_type == ET_DYN && !interpreter) {
                elf_brk = ELF_ET_DYN_BASE;
                /* This counts as moving the brk, so let brk(2) know. */
                brk_moved = true;
        }
        mm->start_brk = mm->brk = ELF_PAGEALIGN(elf_brk);
 
        if ((current->flags & PF_RANDOMIZE) && snapshot_randomize_va_space > 1) {
                /*
                 * If we didn't move the brk to ELF_ET_DYN_BASE (above),
                 * leave a gap between .bss and brk.
                 */
                if (!brk_moved)
                        mm->brk = mm->start_brk = mm->brk + PAGE_SIZE;
 
                mm->brk = mm->start_brk = arch_randomize_brk(mm);
                brk_moved = true;
        }
 
#ifdef compat_brk_randomized
        if (brk_moved)
                current->brk_randomized = 1;
#endif
 
        if (current->personality & MMAP_PAGE_ZERO) {
                /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
                   and some applications "depend" upon this behavior.
                   Since we do not have the power to recompile these, we
                   emulate the SVr4 behavior. Sigh. */
                error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
                                MAP_FIXED | MAP_PRIVATE, 0);
 
                retval = do_mseal(0, PAGE_SIZE, 0);
                if (retval)
                        pr_warn_ratelimited("pid=%d, couldn't seal address 0, ret=%d.\n",
                                            task_pid_nr(current), retval);
        }
 
        regs = current_pt_regs();
#ifdef ELF_PLAT_INIT
        /*
         * The ABI may specify that certain registers be set up in special
         * ways (on i386 %edx is the address of a DT_FINI function, for
         * example.  In addition, it may also specify (eg, PowerPC64 ELF)
         * that the e_entry field is the address of the function descriptor
         * for the startup routine, rather than the address of the startup
         * routine itself.  This macro performs whatever initialization to
         * the regs structure is required as well as any relocations to the
         * function descriptor entries when executing dynamically links apps.
         */
        ELF_PLAT_INIT(regs, reloc_func_desc);
#endif
 
        finalize_exec(bprm);
        START_THREAD(elf_ex, regs, elf_entry, bprm->p);
        retval = 0;
out:
        return retval;
 
        /* error cleanup */
out_free_dentry:
        kfree(interp_elf_ex);
        kfree(interp_elf_phdata);
out_free_file:
        allow_write_access(interpreter);
        if (interpreter)
                fput(interpreter);
out_free_ph:
        kfree(elf_phdata);
        goto out;
}
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
if (!elf_phdata)
        goto out;
 
elf_ppnt = elf_phdata;
struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
if (!elf_phdata)
        goto out;
 
elf_ppnt = elf_phdata;
elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
if (!elf_phdata)
        goto out;
 
elf_ppnt = elf_phdata;
//  遍历程序头,查找 PT_INTERP 段(解释器)
for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
        char *elf_interpreter;
        // 跳过属性段
        if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
                elf_property_phdata = elf_ppnt;
                continue;
        }
        // 只处理 PT_INTERP 段
        if (elf_ppnt->p_type != PT_INTERP)
                continue;
 
        /*
         * This is the program interpreter used for shared libraries -
         * for now assume that this is an a.out format binary.
         */
        retval = -ENOEXEC;
        // 解释器路径长度的检查
        if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
                goto out_free_ph;
 
        retval = -ENOMEM;
        elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
        if (!elf_interpreter)
                goto out_free_ph;
        // 读取出 ELF 中村的解释器路径
        retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
                          elf_ppnt->p_offset);
        if (retval < 0)
                goto out_free_interp;
        /* make sure path is NULL terminated */
        retval = -ENOEXEC;
        if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
                goto out_free_interp;
        // 打开解释器文件
        interpreter = open_exec(elf_interpreter);
        kfree(elf_interpreter);
        retval = PTR_ERR(interpreter);
        if (IS_ERR(interpreter))
                goto out_free_ph;
 
        /*
         * If the binary is not readable then enforce mm->dumpable = 0
         * regardless of the interpreter's permissions.
         */
        would_dump(bprm, interpreter);
        /*
                读取解释器的 ELF 头
        */
        interp_elf_ex = kmalloc(sizeof(*interp_elf_ex), GFP_KERNEL);
        if (!interp_elf_ex) {
                retval = -ENOMEM;
                goto out_free_file;
        }
 
        /* Get the exec headers */
        retval = elf_read(interpreter, interp_elf_ex,
                          sizeof(*interp_elf_ex), 0);
        if (retval < 0)
                goto out_free_dentry;
 
        break;
 
out_free_interp:
        kfree(elf_interpreter);
        goto out_free_ph;
}
elf_phdata = load_elf_phdrs(elf_ex, bprm->file);
if (!elf_phdata)
        goto out;
 
elf_ppnt = elf_phdata;
//  遍历程序头,查找 PT_INTERP 段(解释器)
for (i = 0; i < elf_ex->e_phnum; i++, elf_ppnt++) {
        char *elf_interpreter;
        // 跳过属性段
        if (elf_ppnt->p_type == PT_GNU_PROPERTY) {
                elf_property_phdata = elf_ppnt;
                continue;
        }
        // 只处理 PT_INTERP 段
        if (elf_ppnt->p_type != PT_INTERP)
                continue;
 
        /*
         * This is the program interpreter used for shared libraries -
         * for now assume that this is an a.out format binary.
         */
        retval = -ENOEXEC;
        // 解释器路径长度的检查
        if (elf_ppnt->p_filesz > PATH_MAX || elf_ppnt->p_filesz < 2)
                goto out_free_ph;
 
        retval = -ENOMEM;
        elf_interpreter = kmalloc(elf_ppnt->p_filesz, GFP_KERNEL);
        if (!elf_interpreter)
                goto out_free_ph;
        // 读取出 ELF 中村的解释器路径
        retval = elf_read(bprm->file, elf_interpreter, elf_ppnt->p_filesz,
                          elf_ppnt->p_offset);
        if (retval < 0)
                goto out_free_interp;
        /* make sure path is NULL terminated */

传播安全知识、拓宽行业人脉——看雪讲师团队等你加入!

收藏
免费 6
支持
分享
最新回复 (2)
雪    币: 2573
活跃值: (10630)
能力值: (RANK:438 )
在线值:
发帖
回帖
粉丝
2
感谢分享,如果能画一个流程图就更加清晰了~
2025-10-23 10:26
0
雪    币: 0
能力值: ( LV1,RANK:0 )
在线值:
发帖
回帖
粉丝
3
讲得很详细,谢谢分享。
2025-10-29 17:00
0
游客
登录 | 注册 方可回帖
返回