1/*
2 * linux/fs/binfmt_elf.c
3 *
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines.  Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7 * Tools".
8 *
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10 */
11
12#include <linux/module.h>
13#include <linux/kernel.h>
14#include <linux/fs.h>
15#include <linux/mm.h>
16#include <linux/mman.h>
17#include <linux/errno.h>
18#include <linux/signal.h>
19#include <linux/binfmts.h>
20#include <linux/string.h>
21#include <linux/file.h>
22#include <linux/slab.h>
23#include <linux/personality.h>
24#include <linux/elfcore.h>
25#include <linux/init.h>
26#include <linux/highuid.h>
27#include <linux/compiler.h>
28#include <linux/highmem.h>
29#include <linux/pagemap.h>
30#include <linux/vmalloc.h>
31#include <linux/security.h>
32#include <linux/random.h>
33#include <linux/elf.h>
34#include <linux/elf-randomize.h>
35#include <linux/utsname.h>
36#include <linux/coredump.h>
37#include <linux/sched.h>
38#include <asm/uaccess.h>
39#include <asm/param.h>
40#include <asm/page.h>
41
42#ifndef user_long_t
43#define user_long_t long
44#endif
45#ifndef user_siginfo_t
46#define user_siginfo_t siginfo_t
47#endif
48
49static int load_elf_binary(struct linux_binprm *bprm);
50static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51				int, int, unsigned long);
52
53#ifdef CONFIG_USELIB
54static int load_elf_library(struct file *);
55#else
56#define load_elf_library NULL
57#endif
58
59/*
60 * If we don't support core dumping, then supply a NULL so we
61 * don't even try.
62 */
63#ifdef CONFIG_ELF_CORE
64static int elf_core_dump(struct coredump_params *cprm);
65#else
66#define elf_core_dump	NULL
67#endif
68
69#if ELF_EXEC_PAGESIZE > PAGE_SIZE
70#define ELF_MIN_ALIGN	ELF_EXEC_PAGESIZE
71#else
72#define ELF_MIN_ALIGN	PAGE_SIZE
73#endif
74
75#ifndef ELF_CORE_EFLAGS
76#define ELF_CORE_EFLAGS	0
77#endif
78
79#define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
80#define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
81#define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
82
83static struct linux_binfmt elf_format = {
84	.module		= THIS_MODULE,
85	.load_binary	= load_elf_binary,
86	.load_shlib	= load_elf_library,
87	.core_dump	= elf_core_dump,
88	.min_coredump	= ELF_EXEC_PAGESIZE,
89};
90
91#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
92
93static int set_brk(unsigned long start, unsigned long end)
94{
95	start = ELF_PAGEALIGN(start);
96	end = ELF_PAGEALIGN(end);
97	if (end > start) {
98		unsigned long addr;
99		addr = vm_brk(start, end - start);
100		if (BAD_ADDR(addr))
101			return addr;
102	}
103	current->mm->start_brk = current->mm->brk = end;
104	return 0;
105}
106
107/* We need to explicitly zero any fractional pages
108   after the data section (i.e. bss).  This would
109   contain the junk from the file that should not
110   be in memory
111 */
112static int padzero(unsigned long elf_bss)
113{
114	unsigned long nbyte;
115
116	nbyte = ELF_PAGEOFFSET(elf_bss);
117	if (nbyte) {
118		nbyte = ELF_MIN_ALIGN - nbyte;
119		if (clear_user((void __user *) elf_bss, nbyte))
120			return -EFAULT;
121	}
122	return 0;
123}
124
125/* Let's use some macros to make this stack manipulation a little clearer */
126#ifdef CONFIG_STACK_GROWSUP
127#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
128#define STACK_ROUND(sp, items) \
129	((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
130#define STACK_ALLOC(sp, len) ({ \
131	elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
132	old_sp; })
133#else
134#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135#define STACK_ROUND(sp, items) \
136	(((unsigned long) (sp - items)) &~ 15UL)
137#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
138#endif
139
140#ifndef ELF_BASE_PLATFORM
141/*
142 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
143 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
144 * will be copied to the user stack in the same manner as AT_PLATFORM.
145 */
146#define ELF_BASE_PLATFORM NULL
147#endif
148
149static int
150create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
151		unsigned long load_addr, unsigned long interp_load_addr)
152{
153	unsigned long p = bprm->p;
154	int argc = bprm->argc;
155	int envc = bprm->envc;
156	elf_addr_t __user *argv;
157	elf_addr_t __user *envp;
158	elf_addr_t __user *sp;
159	elf_addr_t __user *u_platform;
160	elf_addr_t __user *u_base_platform;
161	elf_addr_t __user *u_rand_bytes;
162	const char *k_platform = ELF_PLATFORM;
163	const char *k_base_platform = ELF_BASE_PLATFORM;
164	unsigned char k_rand_bytes[16];
165	int items;
166	elf_addr_t *elf_info;
167	int ei_index = 0;
168	const struct cred *cred = current_cred();
169	struct vm_area_struct *vma;
170
171	/*
172	 * In some cases (e.g. Hyper-Threading), we want to avoid L1
173	 * evictions by the processes running on the same package. One
174	 * thing we can do is to shuffle the initial stack for them.
175	 */
176
177	p = arch_align_stack(p);
178
179	/*
180	 * If this architecture has a platform capability string, copy it
181	 * to userspace.  In some cases (Sparc), this info is impossible
182	 * for userspace to get any other way, in others (i386) it is
183	 * merely difficult.
184	 */
185	u_platform = NULL;
186	if (k_platform) {
187		size_t len = strlen(k_platform) + 1;
188
189		u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190		if (__copy_to_user(u_platform, k_platform, len))
191			return -EFAULT;
192	}
193
194	/*
195	 * If this architecture has a "base" platform capability
196	 * string, copy it to userspace.
197	 */
198	u_base_platform = NULL;
199	if (k_base_platform) {
200		size_t len = strlen(k_base_platform) + 1;
201
202		u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203		if (__copy_to_user(u_base_platform, k_base_platform, len))
204			return -EFAULT;
205	}
206
207	/*
208	 * Generate 16 random bytes for userspace PRNG seeding.
209	 */
210	get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
211	u_rand_bytes = (elf_addr_t __user *)
212		       STACK_ALLOC(p, sizeof(k_rand_bytes));
213	if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
214		return -EFAULT;
215
216	/* Create the ELF interpreter info */
217	elf_info = (elf_addr_t *)current->mm->saved_auxv;
218	/* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
219#define NEW_AUX_ENT(id, val) \
220	do { \
221		elf_info[ei_index++] = id; \
222		elf_info[ei_index++] = val; \
223	} while (0)
224
225#ifdef ARCH_DLINFO
226	/*
227	 * ARCH_DLINFO must come first so PPC can do its special alignment of
228	 * AUXV.
229	 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
230	 * ARCH_DLINFO changes
231	 */
232	ARCH_DLINFO;
233#endif
234	NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
235	NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
236	NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
237	NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
238	NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
239	NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
240	NEW_AUX_ENT(AT_BASE, interp_load_addr);
241	NEW_AUX_ENT(AT_FLAGS, 0);
242	NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
243	NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
244	NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
245	NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
246	NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
247 	NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
248	NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
249#ifdef ELF_HWCAP2
250	NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
251#endif
252	NEW_AUX_ENT(AT_EXECFN, bprm->exec);
253	if (k_platform) {
254		NEW_AUX_ENT(AT_PLATFORM,
255			    (elf_addr_t)(unsigned long)u_platform);
256	}
257	if (k_base_platform) {
258		NEW_AUX_ENT(AT_BASE_PLATFORM,
259			    (elf_addr_t)(unsigned long)u_base_platform);
260	}
261	if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
262		NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
263	}
264#undef NEW_AUX_ENT
265	/* AT_NULL is zero; clear the rest too */
266	memset(&elf_info[ei_index], 0,
267	       sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
268
269	/* And advance past the AT_NULL entry.  */
270	ei_index += 2;
271
272	sp = STACK_ADD(p, ei_index);
273
274	items = (argc + 1) + (envc + 1) + 1;
275	bprm->p = STACK_ROUND(sp, items);
276
277	/* Point sp at the lowest address on the stack */
278#ifdef CONFIG_STACK_GROWSUP
279	sp = (elf_addr_t __user *)bprm->p - items - ei_index;
280	bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
281#else
282	sp = (elf_addr_t __user *)bprm->p;
283#endif
284
285
286	/*
287	 * Grow the stack manually; some architectures have a limit on how
288	 * far ahead a user-space access may be in order to grow the stack.
289	 */
290	vma = find_extend_vma(current->mm, bprm->p);
291	if (!vma)
292		return -EFAULT;
293
294	/* Now, let's put argc (and argv, envp if appropriate) on the stack */
295	if (__put_user(argc, sp++))
296		return -EFAULT;
297	argv = sp;
298	envp = argv + argc + 1;
299
300	/* Populate argv and envp */
301	p = current->mm->arg_end = current->mm->arg_start;
302	while (argc-- > 0) {
303		size_t len;
304		if (__put_user((elf_addr_t)p, argv++))
305			return -EFAULT;
306		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
307		if (!len || len > MAX_ARG_STRLEN)
308			return -EINVAL;
309		p += len;
310	}
311	if (__put_user(0, argv))
312		return -EFAULT;
313	current->mm->arg_end = current->mm->env_start = p;
314	while (envc-- > 0) {
315		size_t len;
316		if (__put_user((elf_addr_t)p, envp++))
317			return -EFAULT;
318		len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
319		if (!len || len > MAX_ARG_STRLEN)
320			return -EINVAL;
321		p += len;
322	}
323	if (__put_user(0, envp))
324		return -EFAULT;
325	current->mm->env_end = p;
326
327	/* Put the elf_info on the stack in the right place.  */
328	sp = (elf_addr_t __user *)envp + 1;
329	if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
330		return -EFAULT;
331	return 0;
332}
333
334#ifndef elf_map
335
336static unsigned long elf_map(struct file *filep, unsigned long addr,
337		struct elf_phdr *eppnt, int prot, int type,
338		unsigned long total_size)
339{
340	unsigned long map_addr;
341	unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
342	unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
343	addr = ELF_PAGESTART(addr);
344	size = ELF_PAGEALIGN(size);
345
346	/* mmap() will return -EINVAL if given a zero size, but a
347	 * segment with zero filesize is perfectly valid */
348	if (!size)
349		return addr;
350
351	/*
352	* total_size is the size of the ELF (interpreter) image.
353	* The _first_ mmap needs to know the full size, otherwise
354	* randomization might put this image into an overlapping
355	* position with the ELF binary image. (since size < total_size)
356	* So we first map the 'big' image - and unmap the remainder at
357	* the end. (which unmap is needed for ELF images with holes.)
358	*/
359	if (total_size) {
360		total_size = ELF_PAGEALIGN(total_size);
361		map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
362		if (!BAD_ADDR(map_addr))
363			vm_munmap(map_addr+size, total_size-size);
364	} else
365		map_addr = vm_mmap(filep, addr, size, prot, type, off);
366
367	return(map_addr);
368}
369
370#endif /* !elf_map */
371
372static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
373{
374	int i, first_idx = -1, last_idx = -1;
375
376	for (i = 0; i < nr; i++) {
377		if (cmds[i].p_type == PT_LOAD) {
378			last_idx = i;
379			if (first_idx == -1)
380				first_idx = i;
381		}
382	}
383	if (first_idx == -1)
384		return 0;
385
386	return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
387				ELF_PAGESTART(cmds[first_idx].p_vaddr);
388}
389
390/**
391 * load_elf_phdrs() - load ELF program headers
392 * @elf_ex:   ELF header of the binary whose program headers should be loaded
393 * @elf_file: the opened ELF binary file
394 *
395 * Loads ELF program headers from the binary file elf_file, which has the ELF
396 * header pointed to by elf_ex, into a newly allocated array. The caller is
397 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
398 */
399static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
400				       struct file *elf_file)
401{
402	struct elf_phdr *elf_phdata = NULL;
403	int retval, size, err = -1;
404
405	/*
406	 * If the size of this structure has changed, then punt, since
407	 * we will be doing the wrong thing.
408	 */
409	if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
410		goto out;
411
412	/* Sanity check the number of program headers... */
413	if (elf_ex->e_phnum < 1 ||
414		elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
415		goto out;
416
417	/* ...and their total size. */
418	size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
419	if (size > ELF_MIN_ALIGN)
420		goto out;
421
422	elf_phdata = kmalloc(size, GFP_KERNEL);
423	if (!elf_phdata)
424		goto out;
425
426	/* Read in the program headers */
427	retval = kernel_read(elf_file, elf_ex->e_phoff,
428			     (char *)elf_phdata, size);
429	if (retval != size) {
430		err = (retval < 0) ? retval : -EIO;
431		goto out;
432	}
433
434	/* Success! */
435	err = 0;
436out:
437	if (err) {
438		kfree(elf_phdata);
439		elf_phdata = NULL;
440	}
441	return elf_phdata;
442}
443
444#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
445
446/**
447 * struct arch_elf_state - arch-specific ELF loading state
448 *
449 * This structure is used to preserve architecture specific data during
450 * the loading of an ELF file, throughout the checking of architecture
451 * specific ELF headers & through to the point where the ELF load is
452 * known to be proceeding (ie. SET_PERSONALITY).
453 *
454 * This implementation is a dummy for architectures which require no
455 * specific state.
456 */
457struct arch_elf_state {
458};
459
460#define INIT_ARCH_ELF_STATE {}
461
462/**
463 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
464 * @ehdr:	The main ELF header
465 * @phdr:	The program header to check
466 * @elf:	The open ELF file
467 * @is_interp:	True if the phdr is from the interpreter of the ELF being
468 *		loaded, else false.
469 * @state:	Architecture-specific state preserved throughout the process
470 *		of loading the ELF.
471 *
472 * Inspects the program header phdr to validate its correctness and/or
473 * suitability for the system. Called once per ELF program header in the
474 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
475 * interpreter.
476 *
477 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
478 *         with that return code.
479 */
480static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
481				   struct elf_phdr *phdr,
482				   struct file *elf, bool is_interp,
483				   struct arch_elf_state *state)
484{
485	/* Dummy implementation, always proceed */
486	return 0;
487}
488
489/**
490 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
491 * @ehdr:	The main ELF header
492 * @has_interp:	True if the ELF has an interpreter, else false.
493 * @state:	Architecture-specific state preserved throughout the process
494 *		of loading the ELF.
495 *
496 * Provides a final opportunity for architecture code to reject the loading
497 * of the ELF & cause an exec syscall to return an error. This is called after
498 * all program headers to be checked by arch_elf_pt_proc have been.
499 *
500 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
501 *         with that return code.
502 */
503static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
504				 struct arch_elf_state *state)
505{
506	/* Dummy implementation, always proceed */
507	return 0;
508}
509
510#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
511
512/* This is much more generalized than the library routine read function,
513   so we keep this separate.  Technically the library read function
514   is only provided so that we can read a.out libraries that have
515   an ELF header */
516
517static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
518		struct file *interpreter, unsigned long *interp_map_addr,
519		unsigned long no_base, struct elf_phdr *interp_elf_phdata)
520{
521	struct elf_phdr *eppnt;
522	unsigned long load_addr = 0;
523	int load_addr_set = 0;
524	unsigned long last_bss = 0, elf_bss = 0;
525	unsigned long error = ~0UL;
526	unsigned long total_size;
527	int i;
528
529	/* First of all, some simple consistency checks */
530	if (interp_elf_ex->e_type != ET_EXEC &&
531	    interp_elf_ex->e_type != ET_DYN)
532		goto out;
533	if (!elf_check_arch(interp_elf_ex))
534		goto out;
535	if (!interpreter->f_op->mmap)
536		goto out;
537
538	total_size = total_mapping_size(interp_elf_phdata,
539					interp_elf_ex->e_phnum);
540	if (!total_size) {
541		error = -EINVAL;
542		goto out;
543	}
544
545	eppnt = interp_elf_phdata;
546	for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
547		if (eppnt->p_type == PT_LOAD) {
548			int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
549			int elf_prot = 0;
550			unsigned long vaddr = 0;
551			unsigned long k, map_addr;
552
553			if (eppnt->p_flags & PF_R)
554		    		elf_prot = PROT_READ;
555			if (eppnt->p_flags & PF_W)
556				elf_prot |= PROT_WRITE;
557			if (eppnt->p_flags & PF_X)
558				elf_prot |= PROT_EXEC;
559			vaddr = eppnt->p_vaddr;
560			if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
561				elf_type |= MAP_FIXED;
562			else if (no_base && interp_elf_ex->e_type == ET_DYN)
563				load_addr = -vaddr;
564
565			map_addr = elf_map(interpreter, load_addr + vaddr,
566					eppnt, elf_prot, elf_type, total_size);
567			total_size = 0;
568			if (!*interp_map_addr)
569				*interp_map_addr = map_addr;
570			error = map_addr;
571			if (BAD_ADDR(map_addr))
572				goto out;
573
574			if (!load_addr_set &&
575			    interp_elf_ex->e_type == ET_DYN) {
576				load_addr = map_addr - ELF_PAGESTART(vaddr);
577				load_addr_set = 1;
578			}
579
580			/*
581			 * Check to see if the section's size will overflow the
582			 * allowed task size. Note that p_filesz must always be
583			 * <= p_memsize so it's only necessary to check p_memsz.
584			 */
585			k = load_addr + eppnt->p_vaddr;
586			if (BAD_ADDR(k) ||
587			    eppnt->p_filesz > eppnt->p_memsz ||
588			    eppnt->p_memsz > TASK_SIZE ||
589			    TASK_SIZE - eppnt->p_memsz < k) {
590				error = -ENOMEM;
591				goto out;
592			}
593
594			/*
595			 * Find the end of the file mapping for this phdr, and
596			 * keep track of the largest address we see for this.
597			 */
598			k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
599			if (k > elf_bss)
600				elf_bss = k;
601
602			/*
603			 * Do the same thing for the memory mapping - between
604			 * elf_bss and last_bss is the bss section.
605			 */
606			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
607			if (k > last_bss)
608				last_bss = k;
609		}
610	}
611
612	if (last_bss > elf_bss) {
613		/*
614		 * Now fill out the bss section.  First pad the last page up
615		 * to the page boundary, and then perform a mmap to make sure
616		 * that there are zero-mapped pages up to and including the
617		 * last bss page.
618		 */
619		if (padzero(elf_bss)) {
620			error = -EFAULT;
621			goto out;
622		}
623
624		/* What we have mapped so far */
625		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
626
627		/* Map the last of the bss segment */
628		error = vm_brk(elf_bss, last_bss - elf_bss);
629		if (BAD_ADDR(error))
630			goto out;
631	}
632
633	error = load_addr;
634out:
635	return error;
636}
637
638/*
639 * These are the functions used to load ELF style executables and shared
640 * libraries.  There is no binary dependent code anywhere else.
641 */
642
643#ifndef STACK_RND_MASK
644#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))	/* 8MB of VA */
645#endif
646
647static unsigned long randomize_stack_top(unsigned long stack_top)
648{
649	unsigned long random_variable = 0;
650
651	if ((current->flags & PF_RANDOMIZE) &&
652		!(current->personality & ADDR_NO_RANDOMIZE)) {
653		random_variable = (unsigned long) get_random_int();
654		random_variable &= STACK_RND_MASK;
655		random_variable <<= PAGE_SHIFT;
656	}
657#ifdef CONFIG_STACK_GROWSUP
658	return PAGE_ALIGN(stack_top) + random_variable;
659#else
660	return PAGE_ALIGN(stack_top) - random_variable;
661#endif
662}
663
664static int load_elf_binary(struct linux_binprm *bprm)
665{
666	struct file *interpreter = NULL; /* to shut gcc up */
667 	unsigned long load_addr = 0, load_bias = 0;
668	int load_addr_set = 0;
669	char * elf_interpreter = NULL;
670	unsigned long error;
671	struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
672	unsigned long elf_bss, elf_brk;
673	int retval, i;
674	unsigned long elf_entry;
675	unsigned long interp_load_addr = 0;
676	unsigned long start_code, end_code, start_data, end_data;
677	unsigned long reloc_func_desc __maybe_unused = 0;
678	int executable_stack = EXSTACK_DEFAULT;
679	struct pt_regs *regs = current_pt_regs();
680	struct {
681		struct elfhdr elf_ex;
682		struct elfhdr interp_elf_ex;
683	} *loc;
684	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
685
686	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
687	if (!loc) {
688		retval = -ENOMEM;
689		goto out_ret;
690	}
691
692	/* Get the exec-header */
693	loc->elf_ex = *((struct elfhdr *)bprm->buf);
694
695	retval = -ENOEXEC;
696	/* First of all, some simple consistency checks */
697	if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
698		goto out;
699
700	if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
701		goto out;
702	if (!elf_check_arch(&loc->elf_ex))
703		goto out;
704	if (!bprm->file->f_op->mmap)
705		goto out;
706
707	elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
708	if (!elf_phdata)
709		goto out;
710
711	elf_ppnt = elf_phdata;
712	elf_bss = 0;
713	elf_brk = 0;
714
715	start_code = ~0UL;
716	end_code = 0;
717	start_data = 0;
718	end_data = 0;
719
720	for (i = 0; i < loc->elf_ex.e_phnum; i++) {
721		if (elf_ppnt->p_type == PT_INTERP) {
722			/* This is the program interpreter used for
723			 * shared libraries - for now assume that this
724			 * is an a.out format binary
725			 */
726			retval = -ENOEXEC;
727			if (elf_ppnt->p_filesz > PATH_MAX ||
728			    elf_ppnt->p_filesz < 2)
729				goto out_free_ph;
730
731			retval = -ENOMEM;
732			elf_interpreter = kmalloc(elf_ppnt->p_filesz,
733						  GFP_KERNEL);
734			if (!elf_interpreter)
735				goto out_free_ph;
736
737			retval = kernel_read(bprm->file, elf_ppnt->p_offset,
738					     elf_interpreter,
739					     elf_ppnt->p_filesz);
740			if (retval != elf_ppnt->p_filesz) {
741				if (retval >= 0)
742					retval = -EIO;
743				goto out_free_interp;
744			}
745			/* make sure path is NULL terminated */
746			retval = -ENOEXEC;
747			if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
748				goto out_free_interp;
749
750			interpreter = open_exec(elf_interpreter);
751			retval = PTR_ERR(interpreter);
752			if (IS_ERR(interpreter))
753				goto out_free_interp;
754
755			/*
756			 * If the binary is not readable then enforce
757			 * mm->dumpable = 0 regardless of the interpreter's
758			 * permissions.
759			 */
760			would_dump(bprm, interpreter);
761
762			retval = kernel_read(interpreter, 0, bprm->buf,
763					     BINPRM_BUF_SIZE);
764			if (retval != BINPRM_BUF_SIZE) {
765				if (retval >= 0)
766					retval = -EIO;
767				goto out_free_dentry;
768			}
769
770			/* Get the exec headers */
771			loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
772			break;
773		}
774		elf_ppnt++;
775	}
776
777	elf_ppnt = elf_phdata;
778	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
779		switch (elf_ppnt->p_type) {
780		case PT_GNU_STACK:
781			if (elf_ppnt->p_flags & PF_X)
782				executable_stack = EXSTACK_ENABLE_X;
783			else
784				executable_stack = EXSTACK_DISABLE_X;
785			break;
786
787		case PT_LOPROC ... PT_HIPROC:
788			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
789						  bprm->file, false,
790						  &arch_state);
791			if (retval)
792				goto out_free_dentry;
793			break;
794		}
795
796	/* Some simple consistency checks for the interpreter */
797	if (elf_interpreter) {
798		retval = -ELIBBAD;
799		/* Not an ELF interpreter */
800		if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
801			goto out_free_dentry;
802		/* Verify the interpreter has a valid arch */
803		if (!elf_check_arch(&loc->interp_elf_ex))
804			goto out_free_dentry;
805
806		/* Load the interpreter program headers */
807		interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
808						   interpreter);
809		if (!interp_elf_phdata)
810			goto out_free_dentry;
811
812		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
813		elf_ppnt = interp_elf_phdata;
814		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
815			switch (elf_ppnt->p_type) {
816			case PT_LOPROC ... PT_HIPROC:
817				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
818							  elf_ppnt, interpreter,
819							  true, &arch_state);
820				if (retval)
821					goto out_free_dentry;
822				break;
823			}
824	}
825
826	/*
827	 * Allow arch code to reject the ELF at this point, whilst it's
828	 * still possible to return an error to the code that invoked
829	 * the exec syscall.
830	 */
831	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
832	if (retval)
833		goto out_free_dentry;
834
835	/* Flush all traces of the currently running executable */
836	retval = flush_old_exec(bprm);
837	if (retval)
838		goto out_free_dentry;
839
840	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
841	   may depend on the personality.  */
842	SET_PERSONALITY2(loc->elf_ex, &arch_state);
843	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844		current->personality |= READ_IMPLIES_EXEC;
845
846	if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847		current->flags |= PF_RANDOMIZE;
848
849	setup_new_exec(bprm);
850
851	/* Do this so that we can load the interpreter, if need be.  We will
852	   change some of these later */
853	retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
854				 executable_stack);
855	if (retval < 0)
856		goto out_free_dentry;
857
858	current->mm->start_stack = bprm->p;
859
860	/* Now we do a little grungy work by mmapping the ELF image into
861	   the correct location in memory. */
862	for(i = 0, elf_ppnt = elf_phdata;
863	    i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
864		int elf_prot = 0, elf_flags;
865		unsigned long k, vaddr;
866		unsigned long total_size = 0;
867
868		if (elf_ppnt->p_type != PT_LOAD)
869			continue;
870
871		if (unlikely (elf_brk > elf_bss)) {
872			unsigned long nbyte;
873
874			/* There was a PT_LOAD segment with p_memsz > p_filesz
875			   before this one. Map anonymous pages, if needed,
876			   and clear the area.  */
877			retval = set_brk(elf_bss + load_bias,
878					 elf_brk + load_bias);
879			if (retval)
880				goto out_free_dentry;
881			nbyte = ELF_PAGEOFFSET(elf_bss);
882			if (nbyte) {
883				nbyte = ELF_MIN_ALIGN - nbyte;
884				if (nbyte > elf_brk - elf_bss)
885					nbyte = elf_brk - elf_bss;
886				if (clear_user((void __user *)elf_bss +
887							load_bias, nbyte)) {
888					/*
889					 * This bss-zeroing can fail if the ELF
890					 * file specifies odd protections. So
891					 * we don't check the return value
892					 */
893				}
894			}
895		}
896
897		if (elf_ppnt->p_flags & PF_R)
898			elf_prot |= PROT_READ;
899		if (elf_ppnt->p_flags & PF_W)
900			elf_prot |= PROT_WRITE;
901		if (elf_ppnt->p_flags & PF_X)
902			elf_prot |= PROT_EXEC;
903
904		elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
905
906		vaddr = elf_ppnt->p_vaddr;
907		if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908			elf_flags |= MAP_FIXED;
909		} else if (loc->elf_ex.e_type == ET_DYN) {
910			/* Try and get dynamic programs out of the way of the
911			 * default mmap base, as well as whatever program they
912			 * might try to exec.  This is because the brk will
913			 * follow the loader, and is not movable.  */
914			load_bias = ELF_ET_DYN_BASE - vaddr;
915			if (current->flags & PF_RANDOMIZE)
916				load_bias += arch_mmap_rnd();
917			load_bias = ELF_PAGESTART(load_bias);
918			total_size = total_mapping_size(elf_phdata,
919							loc->elf_ex.e_phnum);
920			if (!total_size) {
921				retval = -EINVAL;
922				goto out_free_dentry;
923			}
924		}
925
926		error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
927				elf_prot, elf_flags, total_size);
928		if (BAD_ADDR(error)) {
929			retval = IS_ERR((void *)error) ?
930				PTR_ERR((void*)error) : -EINVAL;
931			goto out_free_dentry;
932		}
933
934		if (!load_addr_set) {
935			load_addr_set = 1;
936			load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
937			if (loc->elf_ex.e_type == ET_DYN) {
938				load_bias += error -
939				             ELF_PAGESTART(load_bias + vaddr);
940				load_addr += load_bias;
941				reloc_func_desc = load_bias;
942			}
943		}
944		k = elf_ppnt->p_vaddr;
945		if (k < start_code)
946			start_code = k;
947		if (start_data < k)
948			start_data = k;
949
950		/*
951		 * Check to see if the section's size will overflow the
952		 * allowed task size. Note that p_filesz must always be
953		 * <= p_memsz so it is only necessary to check p_memsz.
954		 */
955		if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
956		    elf_ppnt->p_memsz > TASK_SIZE ||
957		    TASK_SIZE - elf_ppnt->p_memsz < k) {
958			/* set_brk can never work. Avoid overflows. */
959			retval = -EINVAL;
960			goto out_free_dentry;
961		}
962
963		k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
964
965		if (k > elf_bss)
966			elf_bss = k;
967		if ((elf_ppnt->p_flags & PF_X) && end_code < k)
968			end_code = k;
969		if (end_data < k)
970			end_data = k;
971		k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
972		if (k > elf_brk)
973			elf_brk = k;
974	}
975
976	loc->elf_ex.e_entry += load_bias;
977	elf_bss += load_bias;
978	elf_brk += load_bias;
979	start_code += load_bias;
980	end_code += load_bias;
981	start_data += load_bias;
982	end_data += load_bias;
983
984	/* Calling set_brk effectively mmaps the pages that we need
985	 * for the bss and break sections.  We must do this before
986	 * mapping in the interpreter, to make sure it doesn't wind
987	 * up getting placed where the bss needs to go.
988	 */
989	retval = set_brk(elf_bss, elf_brk);
990	if (retval)
991		goto out_free_dentry;
992	if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
993		retval = -EFAULT; /* Nobody gets to see this, but.. */
994		goto out_free_dentry;
995	}
996
997	if (elf_interpreter) {
998		unsigned long interp_map_addr = 0;
999
1000		elf_entry = load_elf_interp(&loc->interp_elf_ex,
1001					    interpreter,
1002					    &interp_map_addr,
1003					    load_bias, interp_elf_phdata);
1004		if (!IS_ERR((void *)elf_entry)) {
1005			/*
1006			 * load_elf_interp() returns relocation
1007			 * adjustment
1008			 */
1009			interp_load_addr = elf_entry;
1010			elf_entry += loc->interp_elf_ex.e_entry;
1011		}
1012		if (BAD_ADDR(elf_entry)) {
1013			retval = IS_ERR((void *)elf_entry) ?
1014					(int)elf_entry : -EINVAL;
1015			goto out_free_dentry;
1016		}
1017		reloc_func_desc = interp_load_addr;
1018
1019		allow_write_access(interpreter);
1020		fput(interpreter);
1021		kfree(elf_interpreter);
1022	} else {
1023		elf_entry = loc->elf_ex.e_entry;
1024		if (BAD_ADDR(elf_entry)) {
1025			retval = -EINVAL;
1026			goto out_free_dentry;
1027		}
1028	}
1029
1030	kfree(interp_elf_phdata);
1031	kfree(elf_phdata);
1032
1033	set_binfmt(&elf_format);
1034
1035#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1036	retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1037	if (retval < 0)
1038		goto out;
1039#endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1040
1041	install_exec_creds(bprm);
1042	retval = create_elf_tables(bprm, &loc->elf_ex,
1043			  load_addr, interp_load_addr);
1044	if (retval < 0)
1045		goto out;
1046	/* N.B. passed_fileno might not be initialized? */
1047	current->mm->end_code = end_code;
1048	current->mm->start_code = start_code;
1049	current->mm->start_data = start_data;
1050	current->mm->end_data = end_data;
1051	current->mm->start_stack = bprm->p;
1052
1053	if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1054		current->mm->brk = current->mm->start_brk =
1055			arch_randomize_brk(current->mm);
1056#ifdef compat_brk_randomized
1057		current->brk_randomized = 1;
1058#endif
1059	}
1060
1061	if (current->personality & MMAP_PAGE_ZERO) {
1062		/* Why this, you ask???  Well SVr4 maps page 0 as read-only,
1063		   and some applications "depend" upon this behavior.
1064		   Since we do not have the power to recompile these, we
1065		   emulate the SVr4 behavior. Sigh. */
1066		error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1067				MAP_FIXED | MAP_PRIVATE, 0);
1068	}
1069
1070#ifdef ELF_PLAT_INIT
1071	/*
1072	 * The ABI may specify that certain registers be set up in special
1073	 * ways (on i386 %edx is the address of a DT_FINI function, for
1074	 * example.  In addition, it may also specify (eg, PowerPC64 ELF)
1075	 * that the e_entry field is the address of the function descriptor
1076	 * for the startup routine, rather than the address of the startup
1077	 * routine itself.  This macro performs whatever initialization to
1078	 * the regs structure is required as well as any relocations to the
1079	 * function descriptor entries when executing dynamically links apps.
1080	 */
1081	ELF_PLAT_INIT(regs, reloc_func_desc);
1082#endif
1083
1084	start_thread(regs, elf_entry, bprm->p);
1085	retval = 0;
1086out:
1087	kfree(loc);
1088out_ret:
1089	return retval;
1090
1091	/* error cleanup */
1092out_free_dentry:
1093	kfree(interp_elf_phdata);
1094	allow_write_access(interpreter);
1095	if (interpreter)
1096		fput(interpreter);
1097out_free_interp:
1098	kfree(elf_interpreter);
1099out_free_ph:
1100	kfree(elf_phdata);
1101	goto out;
1102}
1103
1104#ifdef CONFIG_USELIB
1105/* This is really simpleminded and specialized - we are loading an
1106   a.out library that is given an ELF header. */
1107static int load_elf_library(struct file *file)
1108{
1109	struct elf_phdr *elf_phdata;
1110	struct elf_phdr *eppnt;
1111	unsigned long elf_bss, bss, len;
1112	int retval, error, i, j;
1113	struct elfhdr elf_ex;
1114
1115	error = -ENOEXEC;
1116	retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1117	if (retval != sizeof(elf_ex))
1118		goto out;
1119
1120	if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1121		goto out;
1122
1123	/* First of all, some simple consistency checks */
1124	if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1125	    !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1126		goto out;
1127
1128	/* Now read in all of the header information */
1129
1130	j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1131	/* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1132
1133	error = -ENOMEM;
1134	elf_phdata = kmalloc(j, GFP_KERNEL);
1135	if (!elf_phdata)
1136		goto out;
1137
1138	eppnt = elf_phdata;
1139	error = -ENOEXEC;
1140	retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1141	if (retval != j)
1142		goto out_free_ph;
1143
1144	for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1145		if ((eppnt + i)->p_type == PT_LOAD)
1146			j++;
1147	if (j != 1)
1148		goto out_free_ph;
1149
1150	while (eppnt->p_type != PT_LOAD)
1151		eppnt++;
1152
1153	/* Now use mmap to map the library into memory. */
1154	error = vm_mmap(file,
1155			ELF_PAGESTART(eppnt->p_vaddr),
1156			(eppnt->p_filesz +
1157			 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1158			PROT_READ | PROT_WRITE | PROT_EXEC,
1159			MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1160			(eppnt->p_offset -
1161			 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1162	if (error != ELF_PAGESTART(eppnt->p_vaddr))
1163		goto out_free_ph;
1164
1165	elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1166	if (padzero(elf_bss)) {
1167		error = -EFAULT;
1168		goto out_free_ph;
1169	}
1170
1171	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1172			    ELF_MIN_ALIGN - 1);
1173	bss = eppnt->p_memsz + eppnt->p_vaddr;
1174	if (bss > len)
1175		vm_brk(len, bss - len);
1176	error = 0;
1177
1178out_free_ph:
1179	kfree(elf_phdata);
1180out:
1181	return error;
1182}
1183#endif /* #ifdef CONFIG_USELIB */
1184
1185#ifdef CONFIG_ELF_CORE
1186/*
1187 * ELF core dumper
1188 *
1189 * Modelled on fs/exec.c:aout_core_dump()
1190 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1191 */
1192
1193/*
1194 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1195 * that are useful for post-mortem analysis are included in every core dump.
1196 * In that way we ensure that the core dump is fully interpretable later
1197 * without matching up the same kernel and hardware config to see what PC values
1198 * meant. These special mappings include - vDSO, vsyscall, and other
1199 * architecture specific mappings
1200 */
1201static bool always_dump_vma(struct vm_area_struct *vma)
1202{
1203	/* Any vsyscall mappings? */
1204	if (vma == get_gate_vma(vma->vm_mm))
1205		return true;
1206
1207	/*
1208	 * Assume that all vmas with a .name op should always be dumped.
1209	 * If this changes, a new vm_ops field can easily be added.
1210	 */
1211	if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1212		return true;
1213
1214	/*
1215	 * arch_vma_name() returns non-NULL for special architecture mappings,
1216	 * such as vDSO sections.
1217	 */
1218	if (arch_vma_name(vma))
1219		return true;
1220
1221	return false;
1222}
1223
1224/*
1225 * Decide what to dump of a segment, part, all or none.
1226 */
1227static unsigned long vma_dump_size(struct vm_area_struct *vma,
1228				   unsigned long mm_flags)
1229{
1230#define FILTER(type)	(mm_flags & (1UL << MMF_DUMP_##type))
1231
1232	/* always dump the vdso and vsyscall sections */
1233	if (always_dump_vma(vma))
1234		goto whole;
1235
1236	if (vma->vm_flags & VM_DONTDUMP)
1237		return 0;
1238
1239	/* Hugetlb memory check */
1240	if (vma->vm_flags & VM_HUGETLB) {
1241		if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1242			goto whole;
1243		if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1244			goto whole;
1245		return 0;
1246	}
1247
1248	/* Do not dump I/O mapped devices or special mappings */
1249	if (vma->vm_flags & VM_IO)
1250		return 0;
1251
1252	/* By default, dump shared memory if mapped from an anonymous file. */
1253	if (vma->vm_flags & VM_SHARED) {
1254		if (file_inode(vma->vm_file)->i_nlink == 0 ?
1255		    FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1256			goto whole;
1257		return 0;
1258	}
1259
1260	/* Dump segments that have been written to.  */
1261	if (vma->anon_vma && FILTER(ANON_PRIVATE))
1262		goto whole;
1263	if (vma->vm_file == NULL)
1264		return 0;
1265
1266	if (FILTER(MAPPED_PRIVATE))
1267		goto whole;
1268
1269	/*
1270	 * If this looks like the beginning of a DSO or executable mapping,
1271	 * check for an ELF header.  If we find one, dump the first page to
1272	 * aid in determining what was mapped here.
1273	 */
1274	if (FILTER(ELF_HEADERS) &&
1275	    vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1276		u32 __user *header = (u32 __user *) vma->vm_start;
1277		u32 word;
1278		mm_segment_t fs = get_fs();
1279		/*
1280		 * Doing it this way gets the constant folded by GCC.
1281		 */
1282		union {
1283			u32 cmp;
1284			char elfmag[SELFMAG];
1285		} magic;
1286		BUILD_BUG_ON(SELFMAG != sizeof word);
1287		magic.elfmag[EI_MAG0] = ELFMAG0;
1288		magic.elfmag[EI_MAG1] = ELFMAG1;
1289		magic.elfmag[EI_MAG2] = ELFMAG2;
1290		magic.elfmag[EI_MAG3] = ELFMAG3;
1291		/*
1292		 * Switch to the user "segment" for get_user(),
1293		 * then put back what elf_core_dump() had in place.
1294		 */
1295		set_fs(USER_DS);
1296		if (unlikely(get_user(word, header)))
1297			word = 0;
1298		set_fs(fs);
1299		if (word == magic.cmp)
1300			return PAGE_SIZE;
1301	}
1302
1303#undef	FILTER
1304
1305	return 0;
1306
1307whole:
1308	return vma->vm_end - vma->vm_start;
1309}
1310
1311/* An ELF note in memory */
1312struct memelfnote
1313{
1314	const char *name;
1315	int type;
1316	unsigned int datasz;
1317	void *data;
1318};
1319
1320static int notesize(struct memelfnote *en)
1321{
1322	int sz;
1323
1324	sz = sizeof(struct elf_note);
1325	sz += roundup(strlen(en->name) + 1, 4);
1326	sz += roundup(en->datasz, 4);
1327
1328	return sz;
1329}
1330
1331static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1332{
1333	struct elf_note en;
1334	en.n_namesz = strlen(men->name) + 1;
1335	en.n_descsz = men->datasz;
1336	en.n_type = men->type;
1337
1338	return dump_emit(cprm, &en, sizeof(en)) &&
1339	    dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1340	    dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1341}
1342
1343static void fill_elf_header(struct elfhdr *elf, int segs,
1344			    u16 machine, u32 flags)
1345{
1346	memset(elf, 0, sizeof(*elf));
1347
1348	memcpy(elf->e_ident, ELFMAG, SELFMAG);
1349	elf->e_ident[EI_CLASS] = ELF_CLASS;
1350	elf->e_ident[EI_DATA] = ELF_DATA;
1351	elf->e_ident[EI_VERSION] = EV_CURRENT;
1352	elf->e_ident[EI_OSABI] = ELF_OSABI;
1353
1354	elf->e_type = ET_CORE;
1355	elf->e_machine = machine;
1356	elf->e_version = EV_CURRENT;
1357	elf->e_phoff = sizeof(struct elfhdr);
1358	elf->e_flags = flags;
1359	elf->e_ehsize = sizeof(struct elfhdr);
1360	elf->e_phentsize = sizeof(struct elf_phdr);
1361	elf->e_phnum = segs;
1362
1363	return;
1364}
1365
1366static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1367{
1368	phdr->p_type = PT_NOTE;
1369	phdr->p_offset = offset;
1370	phdr->p_vaddr = 0;
1371	phdr->p_paddr = 0;
1372	phdr->p_filesz = sz;
1373	phdr->p_memsz = 0;
1374	phdr->p_flags = 0;
1375	phdr->p_align = 0;
1376	return;
1377}
1378
1379static void fill_note(struct memelfnote *note, const char *name, int type,
1380		unsigned int sz, void *data)
1381{
1382	note->name = name;
1383	note->type = type;
1384	note->datasz = sz;
1385	note->data = data;
1386	return;
1387}
1388
1389/*
1390 * fill up all the fields in prstatus from the given task struct, except
1391 * registers which need to be filled up separately.
1392 */
1393static void fill_prstatus(struct elf_prstatus *prstatus,
1394		struct task_struct *p, long signr)
1395{
1396	prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1397	prstatus->pr_sigpend = p->pending.signal.sig[0];
1398	prstatus->pr_sighold = p->blocked.sig[0];
1399	rcu_read_lock();
1400	prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1401	rcu_read_unlock();
1402	prstatus->pr_pid = task_pid_vnr(p);
1403	prstatus->pr_pgrp = task_pgrp_vnr(p);
1404	prstatus->pr_sid = task_session_vnr(p);
1405	if (thread_group_leader(p)) {
1406		struct task_cputime cputime;
1407
1408		/*
1409		 * This is the record for the group leader.  It shows the
1410		 * group-wide total, not its individual thread total.
1411		 */
1412		thread_group_cputime(p, &cputime);
1413		cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1414		cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1415	} else {
1416		cputime_t utime, stime;
1417
1418		task_cputime(p, &utime, &stime);
1419		cputime_to_timeval(utime, &prstatus->pr_utime);
1420		cputime_to_timeval(stime, &prstatus->pr_stime);
1421	}
1422	cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1423	cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1424}
1425
1426static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1427		       struct mm_struct *mm)
1428{
1429	const struct cred *cred;
1430	unsigned int i, len;
1431
1432	/* first copy the parameters from user space */
1433	memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1434
1435	len = mm->arg_end - mm->arg_start;
1436	if (len >= ELF_PRARGSZ)
1437		len = ELF_PRARGSZ-1;
1438	if (copy_from_user(&psinfo->pr_psargs,
1439		           (const char __user *)mm->arg_start, len))
1440		return -EFAULT;
1441	for(i = 0; i < len; i++)
1442		if (psinfo->pr_psargs[i] == 0)
1443			psinfo->pr_psargs[i] = ' ';
1444	psinfo->pr_psargs[len] = 0;
1445
1446	rcu_read_lock();
1447	psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1448	rcu_read_unlock();
1449	psinfo->pr_pid = task_pid_vnr(p);
1450	psinfo->pr_pgrp = task_pgrp_vnr(p);
1451	psinfo->pr_sid = task_session_vnr(p);
1452
1453	i = p->state ? ffz(~p->state) + 1 : 0;
1454	psinfo->pr_state = i;
1455	psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1456	psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1457	psinfo->pr_nice = task_nice(p);
1458	psinfo->pr_flag = p->flags;
1459	rcu_read_lock();
1460	cred = __task_cred(p);
1461	SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1462	SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1463	rcu_read_unlock();
1464	strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1465
1466	return 0;
1467}
1468
1469static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1470{
1471	elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1472	int i = 0;
1473	do
1474		i += 2;
1475	while (auxv[i - 2] != AT_NULL);
1476	fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1477}
1478
1479static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1480		const siginfo_t *siginfo)
1481{
1482	mm_segment_t old_fs = get_fs();
1483	set_fs(KERNEL_DS);
1484	copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1485	set_fs(old_fs);
1486	fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1487}
1488
1489#define MAX_FILE_NOTE_SIZE (4*1024*1024)
1490/*
1491 * Format of NT_FILE note:
1492 *
1493 * long count     -- how many files are mapped
1494 * long page_size -- units for file_ofs
1495 * array of [COUNT] elements of
1496 *   long start
1497 *   long end
1498 *   long file_ofs
1499 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1500 */
1501static int fill_files_note(struct memelfnote *note)
1502{
1503	struct vm_area_struct *vma;
1504	unsigned count, size, names_ofs, remaining, n;
1505	user_long_t *data;
1506	user_long_t *start_end_ofs;
1507	char *name_base, *name_curpos;
1508
1509	/* *Estimated* file count and total data size needed */
1510	count = current->mm->map_count;
1511	size = count * 64;
1512
1513	names_ofs = (2 + 3 * count) * sizeof(data[0]);
1514 alloc:
1515	if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1516		return -EINVAL;
1517	size = round_up(size, PAGE_SIZE);
1518	data = vmalloc(size);
1519	if (!data)
1520		return -ENOMEM;
1521
1522	start_end_ofs = data + 2;
1523	name_base = name_curpos = ((char *)data) + names_ofs;
1524	remaining = size - names_ofs;
1525	count = 0;
1526	for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1527		struct file *file;
1528		const char *filename;
1529
1530		file = vma->vm_file;
1531		if (!file)
1532			continue;
1533		filename = d_path(&file->f_path, name_curpos, remaining);
1534		if (IS_ERR(filename)) {
1535			if (PTR_ERR(filename) == -ENAMETOOLONG) {
1536				vfree(data);
1537				size = size * 5 / 4;
1538				goto alloc;
1539			}
1540			continue;
1541		}
1542
1543		/* d_path() fills at the end, move name down */
1544		/* n = strlen(filename) + 1: */
1545		n = (name_curpos + remaining) - filename;
1546		remaining = filename - name_curpos;
1547		memmove(name_curpos, filename, n);
1548		name_curpos += n;
1549
1550		*start_end_ofs++ = vma->vm_start;
1551		*start_end_ofs++ = vma->vm_end;
1552		*start_end_ofs++ = vma->vm_pgoff;
1553		count++;
1554	}
1555
1556	/* Now we know exact count of files, can store it */
1557	data[0] = count;
1558	data[1] = PAGE_SIZE;
1559	/*
1560	 * Count usually is less than current->mm->map_count,
1561	 * we need to move filenames down.
1562	 */
1563	n = current->mm->map_count - count;
1564	if (n != 0) {
1565		unsigned shift_bytes = n * 3 * sizeof(data[0]);
1566		memmove(name_base - shift_bytes, name_base,
1567			name_curpos - name_base);
1568		name_curpos -= shift_bytes;
1569	}
1570
1571	size = name_curpos - (char *)data;
1572	fill_note(note, "CORE", NT_FILE, size, data);
1573	return 0;
1574}
1575
1576#ifdef CORE_DUMP_USE_REGSET
1577#include <linux/regset.h>
1578
1579struct elf_thread_core_info {
1580	struct elf_thread_core_info *next;
1581	struct task_struct *task;
1582	struct elf_prstatus prstatus;
1583	struct memelfnote notes[0];
1584};
1585
1586struct elf_note_info {
1587	struct elf_thread_core_info *thread;
1588	struct memelfnote psinfo;
1589	struct memelfnote signote;
1590	struct memelfnote auxv;
1591	struct memelfnote files;
1592	user_siginfo_t csigdata;
1593	size_t size;
1594	int thread_notes;
1595};
1596
1597/*
1598 * When a regset has a writeback hook, we call it on each thread before
1599 * dumping user memory.  On register window machines, this makes sure the
1600 * user memory backing the register data is up to date before we read it.
1601 */
1602static void do_thread_regset_writeback(struct task_struct *task,
1603				       const struct user_regset *regset)
1604{
1605	if (regset->writeback)
1606		regset->writeback(task, regset, 1);
1607}
1608
1609#ifndef PR_REG_SIZE
1610#define PR_REG_SIZE(S) sizeof(S)
1611#endif
1612
1613#ifndef PRSTATUS_SIZE
1614#define PRSTATUS_SIZE(S) sizeof(S)
1615#endif
1616
1617#ifndef PR_REG_PTR
1618#define PR_REG_PTR(S) (&((S)->pr_reg))
1619#endif
1620
1621#ifndef SET_PR_FPVALID
1622#define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1623#endif
1624
1625static int fill_thread_core_info(struct elf_thread_core_info *t,
1626				 const struct user_regset_view *view,
1627				 long signr, size_t *total)
1628{
1629	unsigned int i;
1630
1631	/*
1632	 * NT_PRSTATUS is the one special case, because the regset data
1633	 * goes into the pr_reg field inside the note contents, rather
1634	 * than being the whole note contents.  We fill the reset in here.
1635	 * We assume that regset 0 is NT_PRSTATUS.
1636	 */
1637	fill_prstatus(&t->prstatus, t->task, signr);
1638	(void) view->regsets[0].get(t->task, &view->regsets[0],
1639				    0, PR_REG_SIZE(t->prstatus.pr_reg),
1640				    PR_REG_PTR(&t->prstatus), NULL);
1641
1642	fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1643		  PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1644	*total += notesize(&t->notes[0]);
1645
1646	do_thread_regset_writeback(t->task, &view->regsets[0]);
1647
1648	/*
1649	 * Each other regset might generate a note too.  For each regset
1650	 * that has no core_note_type or is inactive, we leave t->notes[i]
1651	 * all zero and we'll know to skip writing it later.
1652	 */
1653	for (i = 1; i < view->n; ++i) {
1654		const struct user_regset *regset = &view->regsets[i];
1655		do_thread_regset_writeback(t->task, regset);
1656		if (regset->core_note_type && regset->get &&
1657		    (!regset->active || regset->active(t->task, regset))) {
1658			int ret;
1659			size_t size = regset->n * regset->size;
1660			void *data = kmalloc(size, GFP_KERNEL);
1661			if (unlikely(!data))
1662				return 0;
1663			ret = regset->get(t->task, regset,
1664					  0, size, data, NULL);
1665			if (unlikely(ret))
1666				kfree(data);
1667			else {
1668				if (regset->core_note_type != NT_PRFPREG)
1669					fill_note(&t->notes[i], "LINUX",
1670						  regset->core_note_type,
1671						  size, data);
1672				else {
1673					SET_PR_FPVALID(&t->prstatus, 1);
1674					fill_note(&t->notes[i], "CORE",
1675						  NT_PRFPREG, size, data);
1676				}
1677				*total += notesize(&t->notes[i]);
1678			}
1679		}
1680	}
1681
1682	return 1;
1683}
1684
1685static int fill_note_info(struct elfhdr *elf, int phdrs,
1686			  struct elf_note_info *info,
1687			  const siginfo_t *siginfo, struct pt_regs *regs)
1688{
1689	struct task_struct *dump_task = current;
1690	const struct user_regset_view *view = task_user_regset_view(dump_task);
1691	struct elf_thread_core_info *t;
1692	struct elf_prpsinfo *psinfo;
1693	struct core_thread *ct;
1694	unsigned int i;
1695
1696	info->size = 0;
1697	info->thread = NULL;
1698
1699	psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1700	if (psinfo == NULL) {
1701		info->psinfo.data = NULL; /* So we don't free this wrongly */
1702		return 0;
1703	}
1704
1705	fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1706
1707	/*
1708	 * Figure out how many notes we're going to need for each thread.
1709	 */
1710	info->thread_notes = 0;
1711	for (i = 0; i < view->n; ++i)
1712		if (view->regsets[i].core_note_type != 0)
1713			++info->thread_notes;
1714
1715	/*
1716	 * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1717	 * since it is our one special case.
1718	 */
1719	if (unlikely(info->thread_notes == 0) ||
1720	    unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1721		WARN_ON(1);
1722		return 0;
1723	}
1724
1725	/*
1726	 * Initialize the ELF file header.
1727	 */
1728	fill_elf_header(elf, phdrs,
1729			view->e_machine, view->e_flags);
1730
1731	/*
1732	 * Allocate a structure for each thread.
1733	 */
1734	for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1735		t = kzalloc(offsetof(struct elf_thread_core_info,
1736				     notes[info->thread_notes]),
1737			    GFP_KERNEL);
1738		if (unlikely(!t))
1739			return 0;
1740
1741		t->task = ct->task;
1742		if (ct->task == dump_task || !info->thread) {
1743			t->next = info->thread;
1744			info->thread = t;
1745		} else {
1746			/*
1747			 * Make sure to keep the original task at
1748			 * the head of the list.
1749			 */
1750			t->next = info->thread->next;
1751			info->thread->next = t;
1752		}
1753	}
1754
1755	/*
1756	 * Now fill in each thread's information.
1757	 */
1758	for (t = info->thread; t != NULL; t = t->next)
1759		if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1760			return 0;
1761
1762	/*
1763	 * Fill in the two process-wide notes.
1764	 */
1765	fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1766	info->size += notesize(&info->psinfo);
1767
1768	fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1769	info->size += notesize(&info->signote);
1770
1771	fill_auxv_note(&info->auxv, current->mm);
1772	info->size += notesize(&info->auxv);
1773
1774	if (fill_files_note(&info->files) == 0)
1775		info->size += notesize(&info->files);
1776
1777	return 1;
1778}
1779
1780static size_t get_note_info_size(struct elf_note_info *info)
1781{
1782	return info->size;
1783}
1784
1785/*
1786 * Write all the notes for each thread.  When writing the first thread, the
1787 * process-wide notes are interleaved after the first thread-specific note.
1788 */
1789static int write_note_info(struct elf_note_info *info,
1790			   struct coredump_params *cprm)
1791{
1792	bool first = true;
1793	struct elf_thread_core_info *t = info->thread;
1794
1795	do {
1796		int i;
1797
1798		if (!writenote(&t->notes[0], cprm))
1799			return 0;
1800
1801		if (first && !writenote(&info->psinfo, cprm))
1802			return 0;
1803		if (first && !writenote(&info->signote, cprm))
1804			return 0;
1805		if (first && !writenote(&info->auxv, cprm))
1806			return 0;
1807		if (first && info->files.data &&
1808				!writenote(&info->files, cprm))
1809			return 0;
1810
1811		for (i = 1; i < info->thread_notes; ++i)
1812			if (t->notes[i].data &&
1813			    !writenote(&t->notes[i], cprm))
1814				return 0;
1815
1816		first = false;
1817		t = t->next;
1818	} while (t);
1819
1820	return 1;
1821}
1822
1823static void free_note_info(struct elf_note_info *info)
1824{
1825	struct elf_thread_core_info *threads = info->thread;
1826	while (threads) {
1827		unsigned int i;
1828		struct elf_thread_core_info *t = threads;
1829		threads = t->next;
1830		WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1831		for (i = 1; i < info->thread_notes; ++i)
1832			kfree(t->notes[i].data);
1833		kfree(t);
1834	}
1835	kfree(info->psinfo.data);
1836	vfree(info->files.data);
1837}
1838
1839#else
1840
1841/* Here is the structure in which status of each thread is captured. */
1842struct elf_thread_status
1843{
1844	struct list_head list;
1845	struct elf_prstatus prstatus;	/* NT_PRSTATUS */
1846	elf_fpregset_t fpu;		/* NT_PRFPREG */
1847	struct task_struct *thread;
1848#ifdef ELF_CORE_COPY_XFPREGS
1849	elf_fpxregset_t xfpu;		/* ELF_CORE_XFPREG_TYPE */
1850#endif
1851	struct memelfnote notes[3];
1852	int num_notes;
1853};
1854
1855/*
1856 * In order to add the specific thread information for the elf file format,
1857 * we need to keep a linked list of every threads pr_status and then create
1858 * a single section for them in the final core file.
1859 */
1860static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1861{
1862	int sz = 0;
1863	struct task_struct *p = t->thread;
1864	t->num_notes = 0;
1865
1866	fill_prstatus(&t->prstatus, p, signr);
1867	elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1868
1869	fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1870		  &(t->prstatus));
1871	t->num_notes++;
1872	sz += notesize(&t->notes[0]);
1873
1874	if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1875								&t->fpu))) {
1876		fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1877			  &(t->fpu));
1878		t->num_notes++;
1879		sz += notesize(&t->notes[1]);
1880	}
1881
1882#ifdef ELF_CORE_COPY_XFPREGS
1883	if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1884		fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1885			  sizeof(t->xfpu), &t->xfpu);
1886		t->num_notes++;
1887		sz += notesize(&t->notes[2]);
1888	}
1889#endif
1890	return sz;
1891}
1892
1893struct elf_note_info {
1894	struct memelfnote *notes;
1895	struct memelfnote *notes_files;
1896	struct elf_prstatus *prstatus;	/* NT_PRSTATUS */
1897	struct elf_prpsinfo *psinfo;	/* NT_PRPSINFO */
1898	struct list_head thread_list;
1899	elf_fpregset_t *fpu;
1900#ifdef ELF_CORE_COPY_XFPREGS
1901	elf_fpxregset_t *xfpu;
1902#endif
1903	user_siginfo_t csigdata;
1904	int thread_status_size;
1905	int numnote;
1906};
1907
1908static int elf_note_info_init(struct elf_note_info *info)
1909{
1910	memset(info, 0, sizeof(*info));
1911	INIT_LIST_HEAD(&info->thread_list);
1912
1913	/* Allocate space for ELF notes */
1914	info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1915	if (!info->notes)
1916		return 0;
1917	info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1918	if (!info->psinfo)
1919		return 0;
1920	info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1921	if (!info->prstatus)
1922		return 0;
1923	info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1924	if (!info->fpu)
1925		return 0;
1926#ifdef ELF_CORE_COPY_XFPREGS
1927	info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1928	if (!info->xfpu)
1929		return 0;
1930#endif
1931	return 1;
1932}
1933
1934static int fill_note_info(struct elfhdr *elf, int phdrs,
1935			  struct elf_note_info *info,
1936			  const siginfo_t *siginfo, struct pt_regs *regs)
1937{
1938	struct list_head *t;
1939	struct core_thread *ct;
1940	struct elf_thread_status *ets;
1941
1942	if (!elf_note_info_init(info))
1943		return 0;
1944
1945	for (ct = current->mm->core_state->dumper.next;
1946					ct; ct = ct->next) {
1947		ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1948		if (!ets)
1949			return 0;
1950
1951		ets->thread = ct->task;
1952		list_add(&ets->list, &info->thread_list);
1953	}
1954
1955	list_for_each(t, &info->thread_list) {
1956		int sz;
1957
1958		ets = list_entry(t, struct elf_thread_status, list);
1959		sz = elf_dump_thread_status(siginfo->si_signo, ets);
1960		info->thread_status_size += sz;
1961	}
1962	/* now collect the dump for the current */
1963	memset(info->prstatus, 0, sizeof(*info->prstatus));
1964	fill_prstatus(info->prstatus, current, siginfo->si_signo);
1965	elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1966
1967	/* Set up header */
1968	fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1969
1970	/*
1971	 * Set up the notes in similar form to SVR4 core dumps made
1972	 * with info from their /proc.
1973	 */
1974
1975	fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1976		  sizeof(*info->prstatus), info->prstatus);
1977	fill_psinfo(info->psinfo, current->group_leader, current->mm);
1978	fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1979		  sizeof(*info->psinfo), info->psinfo);
1980
1981	fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1982	fill_auxv_note(info->notes + 3, current->mm);
1983	info->numnote = 4;
1984
1985	if (fill_files_note(info->notes + info->numnote) == 0) {
1986		info->notes_files = info->notes + info->numnote;
1987		info->numnote++;
1988	}
1989
1990	/* Try to dump the FPU. */
1991	info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1992							       info->fpu);
1993	if (info->prstatus->pr_fpvalid)
1994		fill_note(info->notes + info->numnote++,
1995			  "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1996#ifdef ELF_CORE_COPY_XFPREGS
1997	if (elf_core_copy_task_xfpregs(current, info->xfpu))
1998		fill_note(info->notes + info->numnote++,
1999			  "LINUX", ELF_CORE_XFPREG_TYPE,
2000			  sizeof(*info->xfpu), info->xfpu);
2001#endif
2002
2003	return 1;
2004}
2005
2006static size_t get_note_info_size(struct elf_note_info *info)
2007{
2008	int sz = 0;
2009	int i;
2010
2011	for (i = 0; i < info->numnote; i++)
2012		sz += notesize(info->notes + i);
2013
2014	sz += info->thread_status_size;
2015
2016	return sz;
2017}
2018
2019static int write_note_info(struct elf_note_info *info,
2020			   struct coredump_params *cprm)
2021{
2022	int i;
2023	struct list_head *t;
2024
2025	for (i = 0; i < info->numnote; i++)
2026		if (!writenote(info->notes + i, cprm))
2027			return 0;
2028
2029	/* write out the thread status notes section */
2030	list_for_each(t, &info->thread_list) {
2031		struct elf_thread_status *tmp =
2032				list_entry(t, struct elf_thread_status, list);
2033
2034		for (i = 0; i < tmp->num_notes; i++)
2035			if (!writenote(&tmp->notes[i], cprm))
2036				return 0;
2037	}
2038
2039	return 1;
2040}
2041
2042static void free_note_info(struct elf_note_info *info)
2043{
2044	while (!list_empty(&info->thread_list)) {
2045		struct list_head *tmp = info->thread_list.next;
2046		list_del(tmp);
2047		kfree(list_entry(tmp, struct elf_thread_status, list));
2048	}
2049
2050	/* Free data possibly allocated by fill_files_note(): */
2051	if (info->notes_files)
2052		vfree(info->notes_files->data);
2053
2054	kfree(info->prstatus);
2055	kfree(info->psinfo);
2056	kfree(info->notes);
2057	kfree(info->fpu);
2058#ifdef ELF_CORE_COPY_XFPREGS
2059	kfree(info->xfpu);
2060#endif
2061}
2062
2063#endif
2064
2065static struct vm_area_struct *first_vma(struct task_struct *tsk,
2066					struct vm_area_struct *gate_vma)
2067{
2068	struct vm_area_struct *ret = tsk->mm->mmap;
2069
2070	if (ret)
2071		return ret;
2072	return gate_vma;
2073}
2074/*
2075 * Helper function for iterating across a vma list.  It ensures that the caller
2076 * will visit `gate_vma' prior to terminating the search.
2077 */
2078static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2079					struct vm_area_struct *gate_vma)
2080{
2081	struct vm_area_struct *ret;
2082
2083	ret = this_vma->vm_next;
2084	if (ret)
2085		return ret;
2086	if (this_vma == gate_vma)
2087		return NULL;
2088	return gate_vma;
2089}
2090
2091static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2092			     elf_addr_t e_shoff, int segs)
2093{
2094	elf->e_shoff = e_shoff;
2095	elf->e_shentsize = sizeof(*shdr4extnum);
2096	elf->e_shnum = 1;
2097	elf->e_shstrndx = SHN_UNDEF;
2098
2099	memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2100
2101	shdr4extnum->sh_type = SHT_NULL;
2102	shdr4extnum->sh_size = elf->e_shnum;
2103	shdr4extnum->sh_link = elf->e_shstrndx;
2104	shdr4extnum->sh_info = segs;
2105}
2106
2107/*
2108 * Actual dumper
2109 *
2110 * This is a two-pass process; first we find the offsets of the bits,
2111 * and then they are actually written out.  If we run out of core limit
2112 * we just truncate.
2113 */
2114static int elf_core_dump(struct coredump_params *cprm)
2115{
2116	int has_dumped = 0;
2117	mm_segment_t fs;
2118	int segs, i;
2119	size_t vma_data_size = 0;
2120	struct vm_area_struct *vma, *gate_vma;
2121	struct elfhdr *elf = NULL;
2122	loff_t offset = 0, dataoff;
2123	struct elf_note_info info = { };
2124	struct elf_phdr *phdr4note = NULL;
2125	struct elf_shdr *shdr4extnum = NULL;
2126	Elf_Half e_phnum;
2127	elf_addr_t e_shoff;
2128	elf_addr_t *vma_filesz = NULL;
2129
2130	/*
2131	 * We no longer stop all VM operations.
2132	 *
2133	 * This is because those proceses that could possibly change map_count
2134	 * or the mmap / vma pages are now blocked in do_exit on current
2135	 * finishing this core dump.
2136	 *
2137	 * Only ptrace can touch these memory addresses, but it doesn't change
2138	 * the map_count or the pages allocated. So no possibility of crashing
2139	 * exists while dumping the mm->vm_next areas to the core file.
2140	 */
2141
2142	/* alloc memory for large data structures: too large to be on stack */
2143	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2144	if (!elf)
2145		goto out;
2146	/*
2147	 * The number of segs are recored into ELF header as 16bit value.
2148	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2149	 */
2150	segs = current->mm->map_count;
2151	segs += elf_core_extra_phdrs();
2152
2153	gate_vma = get_gate_vma(current->mm);
2154	if (gate_vma != NULL)
2155		segs++;
2156
2157	/* for notes section */
2158	segs++;
2159
2160	/* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2161	 * this, kernel supports extended numbering. Have a look at
2162	 * include/linux/elf.h for further information. */
2163	e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2164
2165	/*
2166	 * Collect all the non-memory information about the process for the
2167	 * notes.  This also sets up the file header.
2168	 */
2169	if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2170		goto cleanup;
2171
2172	has_dumped = 1;
2173
2174	fs = get_fs();
2175	set_fs(KERNEL_DS);
2176
2177	offset += sizeof(*elf);				/* Elf header */
2178	offset += segs * sizeof(struct elf_phdr);	/* Program headers */
2179
2180	/* Write notes phdr entry */
2181	{
2182		size_t sz = get_note_info_size(&info);
2183
2184		sz += elf_coredump_extra_notes_size();
2185
2186		phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2187		if (!phdr4note)
2188			goto end_coredump;
2189
2190		fill_elf_note_phdr(phdr4note, sz, offset);
2191		offset += sz;
2192	}
2193
2194	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2195
2196	vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2197	if (!vma_filesz)
2198		goto end_coredump;
2199
2200	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2201			vma = next_vma(vma, gate_vma)) {
2202		unsigned long dump_size;
2203
2204		dump_size = vma_dump_size(vma, cprm->mm_flags);
2205		vma_filesz[i++] = dump_size;
2206		vma_data_size += dump_size;
2207	}
2208
2209	offset += vma_data_size;
2210	offset += elf_core_extra_data_size();
2211	e_shoff = offset;
2212
2213	if (e_phnum == PN_XNUM) {
2214		shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2215		if (!shdr4extnum)
2216			goto end_coredump;
2217		fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2218	}
2219
2220	offset = dataoff;
2221
2222	if (!dump_emit(cprm, elf, sizeof(*elf)))
2223		goto end_coredump;
2224
2225	if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2226		goto end_coredump;
2227
2228	/* Write program headers for segments dump */
2229	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2230			vma = next_vma(vma, gate_vma)) {
2231		struct elf_phdr phdr;
2232
2233		phdr.p_type = PT_LOAD;
2234		phdr.p_offset = offset;
2235		phdr.p_vaddr = vma->vm_start;
2236		phdr.p_paddr = 0;
2237		phdr.p_filesz = vma_filesz[i++];
2238		phdr.p_memsz = vma->vm_end - vma->vm_start;
2239		offset += phdr.p_filesz;
2240		phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2241		if (vma->vm_flags & VM_WRITE)
2242			phdr.p_flags |= PF_W;
2243		if (vma->vm_flags & VM_EXEC)
2244			phdr.p_flags |= PF_X;
2245		phdr.p_align = ELF_EXEC_PAGESIZE;
2246
2247		if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2248			goto end_coredump;
2249	}
2250
2251	if (!elf_core_write_extra_phdrs(cprm, offset))
2252		goto end_coredump;
2253
2254 	/* write out the notes section */
2255	if (!write_note_info(&info, cprm))
2256		goto end_coredump;
2257
2258	if (elf_coredump_extra_notes_write(cprm))
2259		goto end_coredump;
2260
2261	/* Align to page */
2262	if (!dump_skip(cprm, dataoff - cprm->written))
2263		goto end_coredump;
2264
2265	for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2266			vma = next_vma(vma, gate_vma)) {
2267		unsigned long addr;
2268		unsigned long end;
2269
2270		end = vma->vm_start + vma_filesz[i++];
2271
2272		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2273			struct page *page;
2274			int stop;
2275
2276			page = get_dump_page(addr);
2277			if (page) {
2278				void *kaddr = kmap(page);
2279				stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2280				kunmap(page);
2281				page_cache_release(page);
2282			} else
2283				stop = !dump_skip(cprm, PAGE_SIZE);
2284			if (stop)
2285				goto end_coredump;
2286		}
2287	}
2288
2289	if (!elf_core_write_extra_data(cprm))
2290		goto end_coredump;
2291
2292	if (e_phnum == PN_XNUM) {
2293		if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2294			goto end_coredump;
2295	}
2296
2297end_coredump:
2298	set_fs(fs);
2299
2300cleanup:
2301	free_note_info(&info);
2302	kfree(shdr4extnum);
2303	kfree(vma_filesz);
2304	kfree(phdr4note);
2305	kfree(elf);
2306out:
2307	return has_dumped;
2308}
2309
2310#endif		/* CONFIG_ELF_CORE */
2311
2312static int __init init_elf_binfmt(void)
2313{
2314	register_binfmt(&elf_format);
2315	return 0;
2316}
2317
2318static void __exit exit_elf_binfmt(void)
2319{
2320	/* Remove the COFF and ELF loaders. */
2321	unregister_binfmt(&elf_format);
2322}
2323
2324core_initcall(init_elf_binfmt);
2325module_exit(exit_elf_binfmt);
2326MODULE_LICENSE("GPL");
2327