From 0e8f989a253b1bf85ea1c8d7987d67c054f4af91 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:46 +0000 Subject: NOMMU: Fix cleanup handling in ramfs_nommu_get_umapped_area() Fix cleanup handling in ramfs_nommu_get_umapped_area() by only freeing the number of pages that find_get_pages() said it had returned (nr) rather than attempting to free the number of pages we asked for (lpages) - thus avoiding the situation whereby put_page() may be handed NULL pointers if find_get_pages() returned fewer pages that were requested. Also avoid a warning about nr being uninitialised and the need for an if-statement in the cleanup path by using appropriate gotos. Signed-off-by: David Howells --- fs/ramfs/file-nommu.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 76acdbc3461..b9b567a2837 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -262,11 +262,11 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file, ret = -ENOMEM; pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL); if (!pages) - goto out; + goto out_free; nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages); if (nr != lpages) - goto out; /* leave if some pages were missing */ + goto out_free_pages; /* leave if some pages were missing */ /* check the pages for physical adjacency */ ptr = pages; @@ -274,19 +274,18 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file, page++; for (loop = lpages; loop > 1; loop--) if (*ptr++ != page++) - goto out; + goto out_free_pages; /* okay - all conditions fulfilled */ ret = (unsigned long) page_address(pages[0]); - out: - if (pages) { - ptr = pages; - for (loop = lpages; loop > 0; loop--) - put_page(*ptr++); - kfree(pages); - } - +out_free_pages: + ptr = pages; + for (loop = nr; loop > 0; loop--) + put_page(*ptr++); +out_free: + kfree(pages); +out: return ret; } -- cgit v1.2.3 From 8feae13110d60cc6287afabc2887366b0eb226c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Make VMAs per MM as for MMU-mode linux Make VMAs per mm_struct as for MMU-mode linux. This solves two problems: (1) In SYSV SHM where nattch for a segment does not reflect the number of shmat's (and forks) done. (2) In mmap() where the VMA's vm_mm is set to point to the parent mm by an exec'ing process when VM_EXECUTABLE is specified, regardless of the fact that a VMA might be shared and already have its vm_mm assigned to another process or a dead process. A new struct (vm_region) is introduced to track a mapped region and to remember the circumstances under which it may be shared and the vm_list_struct structure is discarded as it's no longer required. This patch makes the following additional changes: (1) Regions are now allocated with alloc_pages() rather than kmalloc() and with no recourse to __GFP_COMP, so the pages are not composite. Instead, each page has a reference on it held by the region. Anything else that is interested in such a page will have to get a reference on it to retain it. When the pages are released due to unmapping, each page is passed to put_page() and will be freed when the page usage count reaches zero. (2) Excess pages are trimmed after an allocation as the allocation must be made as a power-of-2 quantity of pages. (3) VMAs are added to the parent MM's R/B tree and mmap lists. As an MM may end up with overlapping VMAs within the tree, the VMA struct address is appended to the sort key. (4) Non-anonymous VMAs are now added to the backing inode's prio list. (5) Holes may be punched in anonymous VMAs with munmap(), releasing parts of the backing region. The VMA and region structs will be split if necessary. (6) sys_shmdt() only releases one attachment to a SYSV IPC shared memory segment instead of all the attachments at that addresss. Multiple shmat()'s return the same address under NOMMU-mode instead of different virtual addresses as under MMU-mode. (7) Core dumping for ELF-FDPIC requires fewer exceptions for NOMMU-mode. (8) /proc/maps is now the global list of mapped regions, and may list bits that aren't actually mapped anywhere. (9) /proc/meminfo gains a line (tagged "MmapCopy") that indicates the amount of RAM currently allocated by mmap to hold mappable regions that can't be mapped directly. These are copies of the backing device or file if not anonymous. These changes make NOMMU mode more similar to MMU mode. The downside is that NOMMU mode requires some extra memory to track things over NOMMU without this patch (VMAs are no longer shared, and there are now region structs). Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- fs/binfmt_elf_fdpic.c | 27 ++----------- fs/proc/internal.h | 2 - fs/proc/meminfo.c | 6 +++ fs/proc/nommu.c | 71 +++++++++++++++------------------ fs/proc/task_nommu.c | 108 +++++++++++++++++++++++++++++++++++--------------- 5 files changed, 116 insertions(+), 98 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index aa5b43205e3..22baf1b1349 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1567,11 +1567,9 @@ end_coredump: static int elf_fdpic_dump_segments(struct file *file, size_t *size, unsigned long *limit, unsigned long mm_flags) { - struct vm_list_struct *vml; - - for (vml = current->mm->context.vmlist; vml; vml = vml->next) { - struct vm_area_struct *vma = vml->vma; + struct vm_area_struct *vma; + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { if (!maydump(vma, mm_flags)) continue; @@ -1617,9 +1615,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, elf_fpxregset_t *xfpu = NULL; #endif int thread_status_size = 0; -#ifndef CONFIG_MMU - struct vm_list_struct *vml; -#endif elf_addr_t *auxv; unsigned long mm_flags; @@ -1685,13 +1680,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, fill_prstatus(prstatus, current, signr); elf_core_copy_regs(&prstatus->pr_reg, regs); -#ifdef CONFIG_MMU segs = current->mm->map_count; -#else - segs = 0; - for (vml = current->mm->context.vmlist; vml; vml = vml->next) - segs++; -#endif #ifdef ELF_CORE_EXTRA_PHDRS segs += ELF_CORE_EXTRA_PHDRS; #endif @@ -1766,20 +1755,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs, mm_flags = current->mm->flags; /* write program headers for segments dump */ - for ( -#ifdef CONFIG_MMU - vma = current->mm->mmap; vma; vma = vma->vm_next -#else - vml = current->mm->context.vmlist; vml; vml = vml->next -#endif - ) { + for (vma = current->mm->mmap; vma; vma = vma->vm_next) { struct elf_phdr phdr; size_t sz; -#ifndef CONFIG_MMU - vma = vml->vma; -#endif - sz = vma->vm_end - vma->vm_start; phdr.p_type = PT_LOAD; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3e8aeb8b61c..cd53ff83849 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -41,8 +41,6 @@ do { \ (vmi)->used = 0; \ (vmi)->largest_chunk = 0; \ } while(0) - -extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); #endif extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index b1675c4e66d..43d23948384 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -73,6 +73,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) "HighFree: %8lu kB\n" "LowTotal: %8lu kB\n" "LowFree: %8lu kB\n" +#endif +#ifndef CONFIG_MMU + "MmapCopy: %8lu kB\n" #endif "SwapTotal: %8lu kB\n" "SwapFree: %8lu kB\n" @@ -115,6 +118,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freehigh), K(i.totalram-i.totalhigh), K(i.freeram-i.freehigh), +#endif +#ifndef CONFIG_MMU + K((unsigned long) atomic_read(&mmap_pages_allocated)), #endif K(i.totalswap), K(i.freeswap), diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 3f87d263294..b446d7ad0b0 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -33,33 +33,33 @@ #include "internal.h" /* - * display a single VMA to a sequenced file + * display a single region to a sequenced file */ -int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) +static int nommu_region_show(struct seq_file *m, struct vm_region *region) { unsigned long ino = 0; struct file *file; dev_t dev = 0; int flags, len; - flags = vma->vm_flags; - file = vma->vm_file; + flags = region->vm_flags; + file = region->vm_file; if (file) { - struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + struct inode *inode = region->vm_file->f_path.dentry->d_inode; dev = inode->i_sb->s_dev; ino = inode->i_ino; } seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", - vma->vm_start, - vma->vm_end, + region->vm_start, + region->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', - ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, + ((loff_t)region->vm_pgoff) << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); if (file) { @@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) } /* - * display a list of all the VMAs the kernel knows about + * display a list of all the REGIONs the kernel knows about * - nommu kernals have a single flat list */ -static int nommu_vma_list_show(struct seq_file *m, void *v) +static int nommu_region_list_show(struct seq_file *m, void *_p) { - struct vm_area_struct *vma; + struct rb_node *p = _p; - vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb); - return nommu_vma_show(m, vma); + return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb)); } -static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos) +static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos) { - struct rb_node *_rb; + struct rb_node *p; loff_t pos = *_pos; - void *next = NULL; - down_read(&nommu_vma_sem); + down_read(&nommu_region_sem); - for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) { - if (pos == 0) { - next = _rb; - break; - } - pos--; - } - - return next; + for (p = rb_first(&nommu_region_tree); p; p = rb_next(p)) + if (pos-- == 0) + return p; + return NULL; } -static void nommu_vma_list_stop(struct seq_file *m, void *v) +static void nommu_region_list_stop(struct seq_file *m, void *v) { - up_read(&nommu_vma_sem); + up_read(&nommu_region_sem); } -static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos) +static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return rb_next((struct rb_node *) v); } -static const struct seq_operations proc_nommu_vma_list_seqop = { - .start = nommu_vma_list_start, - .next = nommu_vma_list_next, - .stop = nommu_vma_list_stop, - .show = nommu_vma_list_show +static struct seq_operations proc_nommu_region_list_seqop = { + .start = nommu_region_list_start, + .next = nommu_region_list_next, + .stop = nommu_region_list_stop, + .show = nommu_region_list_show }; -static int proc_nommu_vma_list_open(struct inode *inode, struct file *file) +static int proc_nommu_region_list_open(struct inode *inode, struct file *file) { - return seq_open(file, &proc_nommu_vma_list_seqop); + return seq_open(file, &proc_nommu_region_list_seqop); } -static const struct file_operations proc_nommu_vma_list_operations = { - .open = proc_nommu_vma_list_open, +static const struct file_operations proc_nommu_region_list_operations = { + .open = proc_nommu_region_list_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = { static int __init proc_nommu_init(void) { - proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations); + proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations); return 0; } diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index d4a8be32b90..ca4a48d0d31 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -15,25 +15,25 @@ */ void task_mem(struct seq_file *m, struct mm_struct *mm) { - struct vm_list_struct *vml; + struct vm_area_struct *vma; + struct rb_node *p; unsigned long bytes = 0, sbytes = 0, slack = 0; down_read(&mm->mmap_sem); - for (vml = mm->context.vmlist; vml; vml = vml->next) { - if (!vml->vma) - continue; + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { + vma = rb_entry(p, struct vm_area_struct, vm_rb); - bytes += kobjsize(vml); + bytes += kobjsize(vma); if (atomic_read(&mm->mm_count) > 1 || - atomic_read(&vml->vma->vm_usage) > 1 - ) { - sbytes += kobjsize((void *) vml->vma->vm_start); - sbytes += kobjsize(vml->vma); + vma->vm_region || + vma->vm_flags & VM_MAYSHARE) { + sbytes += kobjsize((void *) vma->vm_start); + if (vma->vm_region) + sbytes += kobjsize(vma->vm_region); } else { - bytes += kobjsize((void *) vml->vma->vm_start); - bytes += kobjsize(vml->vma); - slack += kobjsize((void *) vml->vma->vm_start) - - (vml->vma->vm_end - vml->vma->vm_start); + bytes += kobjsize((void *) vma->vm_start); + slack += kobjsize((void *) vma->vm_start) - + (vma->vm_end - vma->vm_start); } } @@ -70,13 +70,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) unsigned long task_vsize(struct mm_struct *mm) { - struct vm_list_struct *tbp; + struct vm_area_struct *vma; + struct rb_node *p; unsigned long vsize = 0; down_read(&mm->mmap_sem); - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { - if (tbp->vma) - vsize += kobjsize((void *) tbp->vma->vm_start); + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { + vma = rb_entry(p, struct vm_area_struct, vm_rb); + vsize += vma->vm_region->vm_end - vma->vm_region->vm_start; } up_read(&mm->mmap_sem); return vsize; @@ -85,16 +86,15 @@ unsigned long task_vsize(struct mm_struct *mm) int task_statm(struct mm_struct *mm, int *shared, int *text, int *data, int *resident) { - struct vm_list_struct *tbp; + struct vm_area_struct *vma; + struct rb_node *p; int size = kobjsize(mm); down_read(&mm->mmap_sem); - for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) { - size += kobjsize(tbp); - if (tbp->vma) { - size += kobjsize(tbp->vma); - size += kobjsize((void *) tbp->vma->vm_start); - } + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { + vma = rb_entry(p, struct vm_area_struct, vm_rb); + size += kobjsize(vma); + size += kobjsize((void *) vma->vm_start); } size += (*text = mm->end_code - mm->start_code); @@ -104,21 +104,63 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, return size; } +/* + * display a single VMA to a sequenced file + */ +static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) +{ + unsigned long ino = 0; + struct file *file; + dev_t dev = 0; + int flags, len; + + flags = vma->vm_flags; + file = vma->vm_file; + + if (file) { + struct inode *inode = vma->vm_file->f_path.dentry->d_inode; + dev = inode->i_sb->s_dev; + ino = inode->i_ino; + } + + seq_printf(m, + "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + vma->vm_start, + vma->vm_end, + flags & VM_READ ? 'r' : '-', + flags & VM_WRITE ? 'w' : '-', + flags & VM_EXEC ? 'x' : '-', + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', + vma->vm_pgoff << PAGE_SHIFT, + MAJOR(dev), MINOR(dev), ino, &len); + + if (file) { + len = 25 + sizeof(void *) * 6 - len; + if (len < 1) + len = 1; + seq_printf(m, "%*c", len, ' '); + seq_path(m, &file->f_path, ""); + } + + seq_putc(m, '\n'); + return 0; +} + /* * display mapping lines for a particular process's /proc/pid/maps */ -static int show_map(struct seq_file *m, void *_vml) +static int show_map(struct seq_file *m, void *_p) { - struct vm_list_struct *vml = _vml; + struct rb_node *p = _p; - return nommu_vma_show(m, vml->vma); + return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb)); } static void *m_start(struct seq_file *m, loff_t *pos) { struct proc_maps_private *priv = m->private; - struct vm_list_struct *vml; struct mm_struct *mm; + struct rb_node *p; loff_t n = *pos; /* pin the task and mm whilst we play with them */ @@ -134,9 +176,9 @@ static void *m_start(struct seq_file *m, loff_t *pos) } /* start from the Nth VMA */ - for (vml = mm->context.vmlist; vml; vml = vml->next) + for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) if (n-- == 0) - return vml; + return p; return NULL; } @@ -152,12 +194,12 @@ static void m_stop(struct seq_file *m, void *_vml) } } -static void *m_next(struct seq_file *m, void *_vml, loff_t *pos) +static void *m_next(struct seq_file *m, void *_p, loff_t *pos) { - struct vm_list_struct *vml = _vml; + struct rb_node *p = _p; (*pos)++; - return vml ? vml->next : NULL; + return p ? rb_next(p) : NULL; } static const struct seq_operations proc_pid_maps_ops = { -- cgit v1.2.3 From 38f714795b7cf4103c54152200ca66b524f8ed6e Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: NOMMU: Improve procfs output using per-MM VMAs Improve procfs output using per-MM VMAs for process memory accounting. Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- fs/proc/task_nommu.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index ca4a48d0d31..343ea1216bc 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -16,24 +16,31 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) { struct vm_area_struct *vma; + struct vm_region *region; struct rb_node *p; - unsigned long bytes = 0, sbytes = 0, slack = 0; + unsigned long bytes = 0, sbytes = 0, slack = 0, size; down_read(&mm->mmap_sem); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); bytes += kobjsize(vma); + + region = vma->vm_region; + if (region) { + size = kobjsize(region); + size += region->vm_end - region->vm_start; + } else { + size = vma->vm_end - vma->vm_start; + } + if (atomic_read(&mm->mm_count) > 1 || - vma->vm_region || vma->vm_flags & VM_MAYSHARE) { - sbytes += kobjsize((void *) vma->vm_start); - if (vma->vm_region) - sbytes += kobjsize(vma->vm_region); + sbytes += size; } else { - bytes += kobjsize((void *) vma->vm_start); - slack += kobjsize((void *) vma->vm_start) - - (vma->vm_end - vma->vm_start); + bytes += size; + if (region) + slack = region->vm_end - vma->vm_end; } } @@ -77,7 +84,7 @@ unsigned long task_vsize(struct mm_struct *mm) down_read(&mm->mmap_sem); for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); - vsize += vma->vm_region->vm_end - vma->vm_region->vm_start; + vsize += vma->vm_end - vma->vm_start; } up_read(&mm->mmap_sem); return vsize; @@ -87,6 +94,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, int *data, int *resident) { struct vm_area_struct *vma; + struct vm_region *region; struct rb_node *p; int size = kobjsize(mm); @@ -94,7 +102,11 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) { vma = rb_entry(p, struct vm_area_struct, vm_rb); size += kobjsize(vma); - size += kobjsize((void *) vma->vm_start); + region = vma->vm_region; + if (region) { + size += kobjsize(region); + size += region->vm_end - region->vm_start; + } } size += (*text = mm->end_code - mm->start_code); -- cgit v1.2.3 From f4bbf51050a1e1dd485e9cd89eef4619a7453d71 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: FDPIC: Don't attempt to expand the userspace stack to fill the space allocated Stop the ELF-FDPIC binfmt from attempting to expand the userspace stack and brk segments to fill the space actually allocated for it. The space allocated may be rounded up by mmap(), and may be wasted. However, finding out how much space we actually obtained uses the contentious kobjsize() function which we'd like to get rid of as it doesn't necessarily work for all slab allocators. Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- fs/binfmt_elf_fdpic.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 22baf1b1349..f3e72c5c19f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -168,9 +168,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, struct elf_fdpic_params exec_params, interp_params; struct elf_phdr *phdr; unsigned long stack_size, entryaddr; -#ifndef CONFIG_MMU - unsigned long fullsize; -#endif #ifdef ELF_FDPIC_PLAT_INIT unsigned long dynaddr; #endif @@ -390,11 +387,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, goto error_kill; } - /* expand the stack mapping to use up the entire allocation granule */ - fullsize = kobjsize((char *) current->mm->start_brk); - if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size, - fullsize, 0, 0))) - stack_size = fullsize; up_write(¤t->mm->mmap_sem); current->mm->brk = current->mm->start_brk; -- cgit v1.2.3 From 0f3e442a403a344a5d0a49af9ecd7632b7e7343a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 8 Jan 2009 12:04:47 +0000 Subject: FLAT: Don't attempt to expand the userspace stack to fill the space allocated Stop the FLAT binfmt from attempting to expand the userspace stack and brk segments to fill the space actually allocated for it. The space allocated may be rounded up by mmap(), and may be wasted. However, finding out how much space we actually obtained uses the contentious kobjsize() function which we'd like to get rid of as it doesn't necessarily work for all slab allocators. Signed-off-by: David Howells Tested-by: Mike Frysinger Acked-by: Paul Mundt --- fs/binfmt_flat.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) (limited to 'fs') diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 7bbd5c6b372..5cebf0b3779 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -417,8 +417,8 @@ static int load_flat_file(struct linux_binprm * bprm, unsigned long textpos = 0, datapos = 0, result; unsigned long realdatastart = 0; unsigned long text_len, data_len, bss_len, stack_len, flags; - unsigned long len, reallen, memp = 0; - unsigned long extra, rlim; + unsigned long len, memp = 0; + unsigned long memp_size, extra, rlim; unsigned long *reloc = 0, *rp; struct inode *inode; int i, rev, relocs = 0; @@ -543,17 +543,10 @@ static int load_flat_file(struct linux_binprm * bprm, } len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); + len = PAGE_ALIGN(len); down_write(¤t->mm->mmap_sem); realdatastart = do_mmap(0, 0, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); - /* Remap to use all availabe slack region space */ - if (realdatastart && (realdatastart < (unsigned long)-4096)) { - reallen = kobjsize((void *)realdatastart); - if (reallen > len) { - realdatastart = do_mremap(realdatastart, len, - reallen, MREMAP_FIXED, realdatastart); - } - } up_write(¤t->mm->mmap_sem); if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) { @@ -591,21 +584,14 @@ static int load_flat_file(struct linux_binprm * bprm, reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len)); memp = realdatastart; - + memp_size = len; } else { len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); + len = PAGE_ALIGN(len); down_write(¤t->mm->mmap_sem); textpos = do_mmap(0, 0, len, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); - /* Remap to use all availabe slack region space */ - if (textpos && (textpos < (unsigned long) -4096)) { - reallen = kobjsize((void *)textpos); - if (reallen > len) { - textpos = do_mremap(textpos, len, reallen, - MREMAP_FIXED, textpos); - } - } up_write(¤t->mm->mmap_sem); if (!textpos || textpos >= (unsigned long) -4096) { @@ -622,7 +608,7 @@ static int load_flat_file(struct linux_binprm * bprm, reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) + MAX_SHARED_LIBS * sizeof(unsigned long)); memp = textpos; - + memp_size = len; #ifdef CONFIG_BINFMT_ZFLAT /* * load it all in and treat it like a RAM load from now on @@ -680,10 +666,12 @@ static int load_flat_file(struct linux_binprm * bprm, * set up the brk stuff, uses any slack left in data/bss/stack * allocation. We put the brk after the bss (between the bss * and stack) like other platforms. + * Userspace code relies on the stack pointer starting out at + * an address right at the end of a page. */ current->mm->start_brk = datapos + data_len + bss_len; current->mm->brk = (current->mm->start_brk + 3) & ~3; - current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len; + current->mm->context.end_brk = memp + memp_size - stack_len; } if (flags & FLAT_FLAG_KTRACE) @@ -790,8 +778,8 @@ static int load_flat_file(struct linux_binprm * bprm, /* zero the BSS, BRK and stack areas */ memset((void*)(datapos + data_len), 0, bss_len + - (memp + kobjsize((void *) memp) - stack_len - /* end brk */ - libinfo->lib_list[id].start_brk) + /* start brk */ + (memp + memp_size - stack_len - /* end brk */ + libinfo->lib_list[id].start_brk) + /* start brk */ stack_len); return 0; -- cgit v1.2.3