From f52111b1546943545e67573c4dde1c7613ca33d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 18:19:16 -0400 Subject: [PATCH] take init_files to fs/file.c Signed-off-by: Al Viro --- fs/file.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/file.c') diff --git a/fs/file.c b/fs/file.c index 4c6f0ea12c4..754cd05b06a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -275,3 +275,16 @@ void __init files_defer_init(void) for_each_possible_cpu(i) fdtable_defer_list_init(i); } + +struct files_struct init_files = { + .count = ATOMIC_INIT(1), + .fdt = &init_files.fdtab, + .fdtab = { + .max_fds = NR_OPEN_DEFAULT, + .fd = &init_files.fd_array[0], + .close_on_exec = (fd_set *)&init_files.close_on_exec_init, + .open_fds = (fd_set *)&init_files.open_fds_init, + .rcu = RCU_HEAD_INIT, + }, + .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), +}; -- cgit v1.2.3 From 02afc6267f6d55d47aba9fcafdbd1b7230d2294a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 19:42:56 -0400 Subject: [PATCH] dup_fd() fixes, part 1 Move the sucker to fs/file.c in preparation to the rest Signed-off-by: Al Viro --- fs/file.c | 130 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) (limited to 'fs/file.c') diff --git a/fs/file.c b/fs/file.c index 754cd05b06a..7dbadaaf00f 100644 --- a/fs/file.c +++ b/fs/file.c @@ -261,6 +261,136 @@ int expand_files(struct files_struct *files, int nr) return expand_fdtable(files, nr); } +static int count_open_files(struct fdtable *fdt) +{ + int size = fdt->max_fds; + int i; + + /* Find the last open fd */ + for (i = size/(8*sizeof(long)); i > 0; ) { + if (fdt->open_fds->fds_bits[--i]) + break; + } + i = (i+1) * 8 * sizeof(long); + return i; +} + +static struct files_struct *alloc_files(void) +{ + struct files_struct *newf; + struct fdtable *fdt; + + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + fdt = &newf->fdtab; + fdt->max_fds = NR_OPEN_DEFAULT; + fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + fdt->open_fds = (fd_set *)&newf->open_fds_init; + fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&fdt->rcu); + fdt->next = NULL; + rcu_assign_pointer(newf->fdt, fdt); +out: + return newf; +} + +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + * errorp will be valid only when the returned files_struct is NULL. + */ +struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) +{ + struct files_struct *newf; + struct file **old_fds, **new_fds; + int open_files, size, i; + struct fdtable *old_fdt, *new_fdt; + + *errorp = -ENOMEM; + newf = alloc_files(); + if (!newf) + goto out; + + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + new_fdt = files_fdtable(newf); + open_files = count_open_files(old_fdt); + + /* + * Check whether we need to allocate a larger fd array and fd set. + * Note: we're not a clone task, so the open count won't change. + */ + if (open_files > new_fdt->max_fds) { + new_fdt->max_fds = 0; + spin_unlock(&oldf->file_lock); + spin_lock(&newf->file_lock); + *errorp = expand_files(newf, open_files-1); + spin_unlock(&newf->file_lock); + if (*errorp < 0) + goto out_release; + new_fdt = files_fdtable(newf); + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ + spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); + } + + old_fds = old_fdt->fd; + new_fds = new_fdt->fd; + + memcpy(new_fdt->open_fds->fds_bits, + old_fdt->open_fds->fds_bits, open_files/8); + memcpy(new_fdt->close_on_exec->fds_bits, + old_fdt->close_on_exec->fds_bits, open_files/8); + + for (i = open_files; i != 0; i--) { + struct file *f = *old_fds++; + if (f) { + get_file(f); + } else { + /* + * The fd may be claimed in the fd bitmap but not yet + * instantiated in the files array if a sibling thread + * is partway through open(). So make sure that this + * fd is available to the new process. + */ + FD_CLR(open_files - i, new_fdt->open_fds); + } + rcu_assign_pointer(*new_fds++, f); + } + spin_unlock(&oldf->file_lock); + + /* compute the remainder to be cleared */ + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); + + /* This is long word aligned thus could use a optimized version */ + memset(new_fds, 0, size); + + if (new_fdt->max_fds > open_files) { + int left = (new_fdt->max_fds-open_files)/8; + int start = open_files / (8 * sizeof(unsigned long)); + + memset(&new_fdt->open_fds->fds_bits[start], 0, left); + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); + } + + return newf; + +out_release: + kmem_cache_free(files_cachep, newf); +out: + return NULL; +} + static void __devinit fdtable_defer_list_init(int cpu) { struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); -- cgit v1.2.3 From 9dec3c4d306b09b31331e475e895bb9674e16d81 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:02:45 -0400 Subject: [PATCH] dup_fd() part 2 use alloc_fdtable() instead of expand_files(), get rid of pointless grabbing newf->file_lock, kill magic in copy_fdtable() that used to be there only to skip copying when called from dup_fd(). Signed-off-by: Al Viro --- fs/file.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs/file.c') diff --git a/fs/file.c b/fs/file.c index 7dbadaaf00f..6491b2b5bc3 100644 --- a/fs/file.c +++ b/fs/file.c @@ -119,8 +119,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) unsigned int cpy, set; BUG_ON(nfdt->max_fds < ofdt->max_fds); - if (ofdt->max_fds == 0) - return; cpy = ofdt->max_fds * sizeof(struct file *); set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); @@ -327,14 +325,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) * Note: we're not a clone task, so the open count won't change. */ if (open_files > new_fdt->max_fds) { - new_fdt->max_fds = 0; spin_unlock(&oldf->file_lock); - spin_lock(&newf->file_lock); - *errorp = expand_files(newf, open_files-1); - spin_unlock(&newf->file_lock); - if (*errorp < 0) + + new_fdt = alloc_fdtable(open_files - 1); + if (!new_fdt) { + *errorp = -ENOMEM; + goto out_release; + } + + /* beyond sysctl_nr_open; nothing to do */ + if (unlikely(new_fdt->max_fds < open_files)) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + *errorp = -EMFILE; goto out_release; - new_fdt = files_fdtable(newf); + } + rcu_assign_pointer(files->fdt, new_fdt); + /* * Reacquire the oldf lock and a pointer to its fd table * who knows it may have a new bigger fd table. We need -- cgit v1.2.3 From afbec7fff4928c273a1f1bb14dfdfdf62688a193 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:11:17 -0400 Subject: [PATCH] dup_fd() - part 3 merge alloc_files() into dup_fd(), leave setting newf->fdt until the end Signed-off-by: Al Viro --- fs/file.c | 43 +++++++++++++++---------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) (limited to 'fs/file.c') diff --git a/fs/file.c b/fs/file.c index 6491b2b5bc3..689d2b6947e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -273,31 +273,6 @@ static int count_open_files(struct fdtable *fdt) return i; } -static struct files_struct *alloc_files(void) -{ - struct files_struct *newf; - struct fdtable *fdt; - - newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); - if (!newf) - goto out; - - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - fdt = &newf->fdtab; - fdt->max_fds = NR_OPEN_DEFAULT; - fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; - fdt->open_fds = (fd_set *)&newf->open_fds_init; - fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&fdt->rcu); - fdt->next = NULL; - rcu_assign_pointer(newf->fdt, fdt); -out: - return newf; -} - /* * Allocate a new files structure and copy contents from the * passed in files structure. @@ -311,13 +286,24 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; - newf = alloc_files(); + newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); if (!newf) goto out; + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + newf->next_fd = 0; + new_fdt = &newf->fdtab; + new_fdt->max_fds = NR_OPEN_DEFAULT; + new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; + new_fdt->open_fds = (fd_set *)&newf->open_fds_init; + new_fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&new_fdt->rcu); + new_fdt->next = NULL; + spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); - new_fdt = files_fdtable(newf); open_files = count_open_files(old_fdt); /* @@ -341,7 +327,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) *errorp = -EMFILE; goto out_release; } - rcu_assign_pointer(files->fdt, new_fdt); /* * Reacquire the oldf lock and a pointer to its fd table @@ -391,6 +376,8 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); } + rcu_assign_pointer(newf->fdt, new_fdt); + return newf; out_release: -- cgit v1.2.3 From adbecb128cd2cc5d14b0ebef6d020ced0efd0ec6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 8 May 2008 21:19:42 -0400 Subject: [PATCH] dup_fd() part 4 - race fix Parent _can_ be a clone task, contrary to the comment. Moreover, more files could be opened while we allocate a copy, in which case we end up copying only part into new descriptor table. Since what we get _is_ affected by all changes in the old range, we can get rather weird effects - e.g. dup2(0, 1024); close(0); in parallel with fork() resulting in child that sees the effect of close(), but not that of dup2() done just before that close(). What we need is to recalculate the open_count after having reacquired ->file_lock and if external fdtable we'd just allocated is too small for it, free the sucker and redo allocation. Signed-off-by: Al Viro --- fs/file.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/file.c') diff --git a/fs/file.c b/fs/file.c index 689d2b6947e..0f705c7cfef 100644 --- a/fs/file.c +++ b/fs/file.c @@ -308,11 +308,16 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) /* * Check whether we need to allocate a larger fd array and fd set. - * Note: we're not a clone task, so the open count won't change. */ - if (open_files > new_fdt->max_fds) { + while (unlikely(open_files > new_fdt->max_fds)) { spin_unlock(&oldf->file_lock); + if (new_fdt != &newf->fdtab) { + free_fdarr(new_fdt); + free_fdset(new_fdt); + kfree(new_fdt); + } + new_fdt = alloc_fdtable(open_files - 1); if (!new_fdt) { *errorp = -ENOMEM; @@ -335,6 +340,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) */ spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); + open_files = count_open_files(old_fdt); } old_fds = old_fdt->fd; -- cgit v1.2.3 From eceea0b3df05ed262ae32e0c6340cc7a3626632d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 May 2008 10:08:32 -0400 Subject: [PATCH] avoid multiplication overflows and signedness issues for max_fds Limit sysctl_nr_open - we don't want ->max_fds to exceed MAX_INT and we don't want size calculation for ->fd[] to overflow. Signed-off-by: Al Viro --- fs/file.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/file.c') diff --git a/fs/file.c b/fs/file.c index 0f705c7cfef..7b3887e054d 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,6 +26,8 @@ struct fdtable_defer { }; int sysctl_nr_open __read_mostly = 1024*1024; +int sysctl_nr_open_min = BITS_PER_LONG; +int sysctl_nr_open_max = 1024 * 1024; /* raised later */ /* * We use this list to defer free fdtables that have vmalloced @@ -405,6 +407,8 @@ void __init files_defer_init(void) int i; for_each_possible_cpu(i) fdtable_defer_list_init(i); + sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & + -BITS_PER_LONG; } struct files_struct init_files = { -- cgit v1.2.3