diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-03 19:28:46 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-03 19:28:46 +0200 |
commit | f68ec0c24755e5cdb779be6240925f2175311d84 (patch) | |
tree | a7b7128e61a8456385d82bd1c7ca5f14eecbf2ca /ipc | |
parent | 98920dc3d1113b883cbc73e3293446d3525c6042 (diff) | |
parent | 94aca1dac6f6d21f4b07e4864baf7768cabcc6e7 (diff) |
Merge commit 'v2.6.27-rc8' into x86/setup
Diffstat (limited to 'ipc')
-rw-r--r-- | ipc/ipc_sysctl.c | 72 | ||||
-rw-r--r-- | ipc/ipcns_notifier.c | 20 | ||||
-rw-r--r-- | ipc/mqueue.c | 31 | ||||
-rw-r--r-- | ipc/sem.c | 316 | ||||
-rw-r--r-- | ipc/shm.c | 24 | ||||
-rw-r--r-- | ipc/util.c | 61 | ||||
-rw-r--r-- | ipc/util.h | 6 |
7 files changed, 263 insertions, 267 deletions
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index d3497465cc0..69bc85978ba 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -27,15 +27,17 @@ static void *get_ipc(ctl_table *table) } /* - * Routine that is called when a tunable has successfully been changed by - * hand and it has a callback routine registered on the ipc namespace notifier - * chain: we don't want such tunables to be recomputed anymore upon memory - * add/remove or ipc namespace creation/removal. - * They can come back to a recomputable state by being set to a <0 value. + * Routine that is called when the file "auto_msgmni" has successfully been + * written. + * Two values are allowed: + * 0: unregister msgmni's callback routine from the ipc namespace notifier + * chain. This means that msgmni won't be recomputed anymore upon memory + * add/remove or ipc namespace creation/removal. + * 1: register back the callback routine. */ -static void tunable_set_callback(int val) +static void ipc_auto_callback(int val) { - if (val >= 0) + if (!val) unregister_ipcns_notifier(current->nsproxy->ipc_ns); else { /* @@ -71,7 +73,12 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write, rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos); if (write && !rc && lenp_bef == *lenp) - tunable_set_callback(*((int *)(ipc_table.data))); + /* + * Tunable has successfully been changed by hand. Disable its + * automatic adjustment. This simply requires unregistering + * the notifiers that trigger recalculation. + */ + unregister_ipcns_notifier(current->nsproxy->ipc_ns); return rc; } @@ -87,10 +94,39 @@ static int proc_ipc_doulongvec_minmax(ctl_table *table, int write, lenp, ppos); } +static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, + struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + size_t lenp_bef = *lenp; + int oldval; + int rc; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + oldval = *((int *)(ipc_table.data)); + + rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos); + + if (write && !rc && lenp_bef == *lenp) { + int newval = *((int *)(ipc_table.data)); + /* + * The file "auto_msgmni" has correctly been set. + * React by (un)registering the corresponding tunable, if the + * value has changed. + */ + if (newval != oldval) + ipc_auto_callback(newval); + } + + return rc; +} + #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL #define proc_ipc_callback_dointvec NULL +#define proc_ipcauto_dointvec_minmax NULL #endif #ifdef CONFIG_SYSCTL_SYSCALL @@ -142,14 +178,11 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, rc = sysctl_ipc_data(table, name, nlen, oldval, oldlenp, newval, newlen); - if (newval && newlen && rc > 0) { + if (newval && newlen && rc > 0) /* * Tunable has successfully been changed from userland */ - int *data = get_ipc(table); - - tunable_set_callback(*data); - } + unregister_ipcns_notifier(current->nsproxy->ipc_ns); return rc; } @@ -158,6 +191,9 @@ static int sysctl_ipc_registered_data(ctl_table *table, int __user *name, #define sysctl_ipc_registered_data NULL #endif +static int zero; +static int one = 1; + static struct ctl_table ipc_kern_table[] = { { .ctl_name = KERN_SHMMAX, @@ -222,6 +258,16 @@ static struct ctl_table ipc_kern_table[] = { .proc_handler = proc_ipc_dointvec, .strategy = sysctl_ipc_data, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "auto_msgmni", + .data = &init_ipc_ns.auto_msgmni, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_ipcauto_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, {} }; diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c index 70ff09183f7..b9b31a4f77e 100644 --- a/ipc/ipcns_notifier.c +++ b/ipc/ipcns_notifier.c @@ -55,25 +55,35 @@ static int ipcns_callback(struct notifier_block *self, int register_ipcns_notifier(struct ipc_namespace *ns) { + int rc; + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); ns->ipcns_nb.notifier_call = ipcns_callback; ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; - return blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); + rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb); + if (!rc) + ns->auto_msgmni = 1; + return rc; } int cond_register_ipcns_notifier(struct ipc_namespace *ns) { + int rc; + memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb)); ns->ipcns_nb.notifier_call = ipcns_callback; ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI; - return blocking_notifier_chain_cond_register(&ipcns_chain, + rc = blocking_notifier_chain_cond_register(&ipcns_chain, &ns->ipcns_nb); + if (!rc) + ns->auto_msgmni = 1; + return rc; } -int unregister_ipcns_notifier(struct ipc_namespace *ns) +void unregister_ipcns_notifier(struct ipc_namespace *ns) { - return blocking_notifier_chain_unregister(&ipcns_chain, - &ns->ipcns_nb); + blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb); + ns->auto_msgmni = 0; } int ipcns_notify(unsigned long val) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index b3b69fd5133..96fb36cd987 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -207,7 +207,7 @@ static int mqueue_get_sb(struct file_system_type *fs_type, return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct mqueue_inode_info *p = (struct mqueue_inode_info *) foo; @@ -314,15 +314,11 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry) * through std routines) */ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, - size_t count, loff_t * off) + size_t count, loff_t *off) { struct mqueue_inode_info *info = MQUEUE_I(filp->f_path.dentry->d_inode); char buffer[FILENT_SIZE]; - size_t slen; - loff_t o; - - if (!count) - return 0; + ssize_t ret; spin_lock(&info->lock); snprintf(buffer, sizeof(buffer), @@ -335,21 +331,14 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data, pid_vnr(info->notify_owner)); spin_unlock(&info->lock); buffer[sizeof(buffer)-1] = '\0'; - slen = strlen(buffer)+1; - - o = *off; - if (o > slen) - return 0; - - if (o + count > slen) - count = slen - o; - if (copy_to_user(u_data, buffer + o, count)) - return -EFAULT; + ret = simple_read_from_buffer(u_data, count, off, buffer, + strlen(buffer)); + if (ret <= 0) + return ret; - *off = o + count; filp->f_path.dentry->d_inode->i_atime = filp->f_path.dentry->d_inode->i_ctime = CURRENT_TIME; - return count; + return ret; } static int mqueue_flush_file(struct file *filp, fl_owner_t id) @@ -649,7 +638,7 @@ static int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, return ERR_PTR(-EINVAL); } - if (permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE], NULL)) { + if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { dput(dentry); mntput(mqueue_mnt); return ERR_PTR(-EACCES); @@ -1054,7 +1043,7 @@ retry: } timeo = MAX_SCHEDULE_TIMEOUT; - ret = netlink_attachskb(sock, nc, 0, &timeo, NULL); + ret = netlink_attachskb(sock, nc, &timeo, NULL); if (ret == 1) goto retry; if (ret) { diff --git a/ipc/sem.c b/ipc/sem.c index e9418df5ff3..bf1bc36cb7e 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -272,9 +272,8 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) ns->used_sems += nsems; sma->sem_base = (struct sem *) &sma[1]; - /* sma->sem_pending = NULL; */ - sma->sem_pending_last = &sma->sem_pending; - /* sma->undo = NULL; */ + INIT_LIST_HEAD(&sma->sem_pending); + INIT_LIST_HEAD(&sma->list_id); sma->sem_nsems = nsems; sma->sem_ctime = get_seconds(); sem_unlock(sma); @@ -331,38 +330,6 @@ asmlinkage long sys_semget(key_t key, int nsems, int semflg) return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params); } -/* Manage the doubly linked list sma->sem_pending as a FIFO: - * insert new queue elements at the tail sma->sem_pending_last. - */ -static inline void append_to_queue (struct sem_array * sma, - struct sem_queue * q) -{ - *(q->prev = sma->sem_pending_last) = q; - *(sma->sem_pending_last = &q->next) = NULL; -} - -static inline void prepend_to_queue (struct sem_array * sma, - struct sem_queue * q) -{ - q->next = sma->sem_pending; - *(q->prev = &sma->sem_pending) = q; - if (q->next) - q->next->prev = &q->next; - else /* sma->sem_pending_last == &sma->sem_pending */ - sma->sem_pending_last = &q->next; -} - -static inline void remove_from_queue (struct sem_array * sma, - struct sem_queue * q) -{ - *(q->prev) = q->next; - if (q->next) - q->next->prev = q->prev; - else /* sma->sem_pending_last == &q->next */ - sma->sem_pending_last = q->prev; - q->prev = NULL; /* mark as removed */ -} - /* * Determine whether a sequence of semaphore operations would succeed * all at once. Return 0 if yes, 1 if need to sleep, else return error code. @@ -438,16 +405,15 @@ static void update_queue (struct sem_array * sma) int error; struct sem_queue * q; - q = sma->sem_pending; - while(q) { + q = list_entry(sma->sem_pending.next, struct sem_queue, list); + while (&q->list != &sma->sem_pending) { error = try_atomic_semop(sma, q->sops, q->nsops, q->undo, q->pid); /* Does q->sleeper still need to sleep? */ if (error <= 0) { struct sem_queue *n; - remove_from_queue(sma,q); - q->status = IN_WAKEUP; + /* * Continue scanning. The next operation * that must be checked depends on the type of the @@ -458,11 +424,26 @@ static void update_queue (struct sem_array * sma) * for semaphore values to become 0. * - if the operation didn't modify the array, * then just continue. + * The order of list_del() and reading ->next + * is crucial: In the former case, the list_del() + * must be done first [because we might be the + * first entry in ->sem_pending], in the latter + * case the list_del() must be done last + * [because the list is invalid after the list_del()] */ - if (q->alter) - n = sma->sem_pending; - else - n = q->next; + if (q->alter) { + list_del(&q->list); + n = list_entry(sma->sem_pending.next, + struct sem_queue, list); + } else { + n = list_entry(q->list.next, struct sem_queue, + list); + list_del(&q->list); + } + + /* wake up the waiting thread */ + q->status = IN_WAKEUP; + wake_up_process(q->sleeper); /* hands-off: q will disappear immediately after * writing q->status. @@ -471,7 +452,7 @@ static void update_queue (struct sem_array * sma) q->status = error; q = n; } else { - q = q->next; + q = list_entry(q->list.next, struct sem_queue, list); } } } @@ -491,7 +472,7 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) struct sem_queue * q; semncnt = 0; - for (q = sma->sem_pending; q; q = q->next) { + list_for_each_entry(q, &sma->sem_pending, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -503,13 +484,14 @@ static int count_semncnt (struct sem_array * sma, ushort semnum) } return semncnt; } + static int count_semzcnt (struct sem_array * sma, ushort semnum) { int semzcnt; struct sem_queue * q; semzcnt = 0; - for (q = sma->sem_pending; q; q = q->next) { + list_for_each_entry(q, &sma->sem_pending, list) { struct sembuf * sops = q->sops; int nsops = q->nsops; int i; @@ -522,35 +504,41 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum) return semzcnt; } +void free_un(struct rcu_head *head) +{ + struct sem_undo *un = container_of(head, struct sem_undo, rcu); + kfree(un); +} + /* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex * remains locked on exit. */ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) { - struct sem_undo *un; - struct sem_queue *q; + struct sem_undo *un, *tu; + struct sem_queue *q, *tq; struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm); - /* Invalidate the existing undo structures for this semaphore set. - * (They will be freed without any further action in exit_sem() - * or during the next semop.) - */ - for (un = sma->undo; un; un = un->id_next) + /* Free the existing undo structures for this semaphore set. */ + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry_safe(un, tu, &sma->list_id, list_id) { + list_del(&un->list_id); + spin_lock(&un->ulp->lock); un->semid = -1; + list_del_rcu(&un->list_proc); + spin_unlock(&un->ulp->lock); + call_rcu(&un->rcu, free_un); + } /* Wake up all pending processes and let them fail with EIDRM. */ - q = sma->sem_pending; - while(q) { - struct sem_queue *n; - /* lazy remove_from_queue: we are killing the whole queue */ - q->prev = NULL; - n = q->next; + list_for_each_entry_safe(q, tq, &sma->sem_pending, list) { + list_del(&q->list); + q->status = IN_WAKEUP; wake_up_process(q->sleeper); /* doesn't sleep */ smp_wmb(); q->status = -EIDRM; /* hands-off q */ - q = n; } /* Remove the semaphore set from the IDR */ @@ -763,9 +751,12 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, for (i = 0; i < nsems; i++) sma->sem_base[i].semval = sem_io[i]; - for (un = sma->undo; un; un = un->id_next) + + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry(un, &sma->list_id, list_id) { for (i = 0; i < nsems; i++) un->semadj[i] = 0; + } sma->sem_ctime = get_seconds(); /* maybe some queued-up processes were waiting for this */ update_queue(sma); @@ -797,12 +788,15 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, { int val = arg.val; struct sem_undo *un; + err = -ERANGE; if (val > SEMVMX || val < 0) goto out_unlock; - for (un = sma->undo; un; un = un->id_next) + assert_spin_locked(&sma->sem_perm.lock); + list_for_each_entry(un, &sma->list_id, list_id) un->semadj[semnum] = 0; + curr->semval = val; curr->sempid = task_tgid_vnr(current); sma->sem_ctime = get_seconds(); @@ -952,6 +946,8 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) return -ENOMEM; spin_lock_init(&undo_list->lock); atomic_set(&undo_list->refcnt, 1); + INIT_LIST_HEAD(&undo_list->list_proc); + current->sysvsem.undo_list = undo_list; } *undo_listp = undo_list; @@ -960,25 +956,27 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid) { - struct sem_undo **last, *un; + struct sem_undo *walk; - last = &ulp->proc_list; - un = *last; - while(un != NULL) { - if(un->semid==semid) - break; - if(un->semid==-1) { - *last=un->proc_next; - kfree(un); - } else { - last=&un->proc_next; - } - un=*last; + list_for_each_entry_rcu(walk, &ulp->list_proc, list_proc) { + if (walk->semid == semid) + return walk; } - return un; + return NULL; } -static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) +/** + * find_alloc_undo - Lookup (and if not present create) undo array + * @ns: namespace + * @semid: semaphore array id + * + * The function looks up (and if not present creates) the undo structure. + * The size of the undo structure depends on the size of the semaphore + * array, thus the alloc path is not that straightforward. + * Lifetime-rules: sem_undo is rcu-protected, on success, the function + * performs a rcu_read_lock(). + */ +static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) { struct sem_array *sma; struct sem_undo_list *ulp; @@ -990,13 +988,16 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) if (error) return ERR_PTR(error); + rcu_read_lock(); spin_lock(&ulp->lock); un = lookup_undo(ulp, semid); spin_unlock(&ulp->lock); if (likely(un!=NULL)) goto out; + rcu_read_unlock(); /* no undo structure around - allocate one. */ + /* step 1: figure out the size of the semaphore array */ sma = sem_lock_check(ns, semid); if (IS_ERR(sma)) return ERR_PTR(PTR_ERR(sma)); @@ -1004,37 +1005,45 @@ static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid) nsems = sma->sem_nsems; sem_getref_and_unlock(sma); + /* step 2: allocate new undo structure */ new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); if (!new) { sem_putref(sma); return ERR_PTR(-ENOMEM); } - new->semadj = (short *) &new[1]; - new->semid = semid; - spin_lock(&ulp->lock); - un = lookup_undo(ulp, semid); - if (un) { - spin_unlock(&ulp->lock); - kfree(new); - sem_putref(sma); - goto out; - } + /* step 3: Acquire the lock on semaphore array */ sem_lock_and_putref(sma); if (sma->sem_perm.deleted) { sem_unlock(sma); - spin_unlock(&ulp->lock); kfree(new); un = ERR_PTR(-EIDRM); goto out; } - new->proc_next = ulp->proc_list; - ulp->proc_list = new; - new->id_next = sma->undo; - sma->undo = new; - sem_unlock(sma); + spin_lock(&ulp->lock); + + /* + * step 4: check for races: did someone else allocate the undo struct? + */ + un = lookup_undo(ulp, semid); + if (un) { + kfree(new); + goto success; + } + /* step 5: initialize & link new undo structure */ + new->semadj = (short *) &new[1]; + new->ulp = ulp; + new->semid = semid; + assert_spin_locked(&ulp->lock); + list_add_rcu(&new->list_proc, &ulp->list_proc); + assert_spin_locked(&sma->sem_perm.lock); + list_add(&new->list_id, &sma->list_id); un = new; + +success: spin_unlock(&ulp->lock); + rcu_read_lock(); + sem_unlock(sma); out: return un; } @@ -1090,9 +1099,8 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, alter = 1; } -retry_undos: if (undos) { - un = find_undo(ns, semid); + un = find_alloc_undo(ns, semid); if (IS_ERR(un)) { error = PTR_ERR(un); goto out_free; @@ -1102,19 +1110,37 @@ retry_undos: sma = sem_lock_check(ns, semid); if (IS_ERR(sma)) { + if (un) + rcu_read_unlock(); error = PTR_ERR(sma); goto out_free; } /* - * semid identifiers are not unique - find_undo may have + * semid identifiers are not unique - find_alloc_undo may have * allocated an undo structure, it was invalidated by an RMID - * and now a new array with received the same id. Check and retry. + * and now a new array with received the same id. Check and fail. + * This case can be detected checking un->semid. The existance of + * "un" itself is guaranteed by rcu. */ - if (un && un->semid == -1) { - sem_unlock(sma); - goto retry_undos; + error = -EIDRM; + if (un) { + if (un->semid == -1) { + rcu_read_unlock(); + goto out_unlock_free; + } else { + /* + * rcu lock can be released, "un" cannot disappear: + * - sem_lock is acquired, thus IPC_RMID is + * impossible. + * - exit_sem is impossible, it always operates on + * current (or a dead task). + */ + + rcu_read_unlock(); + } } + error = -EFBIG; if (max >= sma->sem_nsems) goto out_unlock_free; @@ -1138,17 +1164,15 @@ retry_undos: * task into the pending queue and go to sleep. */ - queue.sma = sma; queue.sops = sops; queue.nsops = nsops; queue.undo = un; queue.pid = task_tgid_vnr(current); - queue.id = semid; queue.alter = alter; if (alter) - append_to_queue(sma ,&queue); + list_add_tail(&queue.list, &sma->sem_pending); else - prepend_to_queue(sma ,&queue); + list_add(&queue.list, &sma->sem_pending); queue.status = -EINTR; queue.sleeper = current; @@ -1174,7 +1198,6 @@ retry_undos: sma = sem_lock(ns, semid); if (IS_ERR(sma)) { - BUG_ON(queue.prev != NULL); error = -EIDRM; goto out_free; } @@ -1192,7 +1215,7 @@ retry_undos: */ if (timeout && jiffies_left == 0) error = -EAGAIN; - remove_from_queue(sma,&queue); + list_del(&queue.list); goto out_unlock_free; out_unlock_free: @@ -1243,56 +1266,62 @@ int copy_semundo(unsigned long clone_flags, struct task_struct *tsk) */ void exit_sem(struct task_struct *tsk) { - struct sem_undo_list *undo_list; - struct sem_undo *u, **up; - struct ipc_namespace *ns; + struct sem_undo_list *ulp; - undo_list = tsk->sysvsem.undo_list; - if (!undo_list) + ulp = tsk->sysvsem.undo_list; + if (!ulp) return; tsk->sysvsem.undo_list = NULL; - if (!atomic_dec_and_test(&undo_list->refcnt)) + if (!atomic_dec_and_test(&ulp->refcnt)) return; - ns = tsk->nsproxy->ipc_ns; - /* There's no need to hold the semundo list lock, as current - * is the last task exiting for this undo list. - */ - for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) { + for (;;) { struct sem_array *sma; - int nsems, i; - struct sem_undo *un, **unp; + struct sem_undo *un; int semid; - - semid = u->semid; + int i; - if(semid == -1) - continue; - sma = sem_lock(ns, semid); + rcu_read_lock(); + un = list_entry(rcu_dereference(ulp->list_proc.next), + struct sem_undo, list_proc); + if (&un->list_proc == &ulp->list_proc) + semid = -1; + else + semid = un->semid; + rcu_read_unlock(); + + if (semid == -1) + break; + + sma = sem_lock_check(tsk->nsproxy->ipc_ns, un->semid); + + /* exit_sem raced with IPC_RMID, nothing to do */ if (IS_ERR(sma)) continue; - if (u->semid == -1) - goto next_entry; + un = lookup_undo(ulp, semid); + if (un == NULL) { + /* exit_sem raced with IPC_RMID+semget() that created + * exactly the same semid. Nothing to do. + */ + sem_unlock(sma); + continue; + } - BUG_ON(sem_checkid(sma, u->semid)); + /* remove un from the linked lists */ + assert_spin_locked(&sma->sem_perm.lock); + list_del(&un->list_id); - /* remove u from the sma->undo list */ - for (unp = &sma->undo; (un = *unp); unp = &un->id_next) { - if (u == un) - goto found; - } - printk ("exit_sem undo list error id=%d\n", u->semid); - goto next_entry; -found: - *unp = un->id_next; - /* perform adjustments registered in u */ - nsems = sma->sem_nsems; - for (i = 0; i < nsems; i++) { + spin_lock(&ulp->lock); + list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); + + /* perform adjustments registered in un */ + for (i = 0; i < sma->sem_nsems; i++) { struct sem * semaphore = &sma->sem_base[i]; - if (u->semadj[i]) { - semaphore->semval += u->semadj[i]; + if (un->semadj[i]) { + semaphore->semval += un->semadj[i]; /* * Range checks of the new semaphore value, * not defined by sus: @@ -1316,10 +1345,11 @@ found: sma->sem_otime = get_seconds(); /* maybe some queued-up processes were waiting for this */ update_queue(sma); -next_entry: sem_unlock(sma); + + call_rcu(&un->rcu, free_un); } - kfree(undo_list); + kfree(ulp); } #ifdef CONFIG_PROC_FS diff --git a/ipc/shm.c b/ipc/shm.c index 790240cd067..e77ec698cf4 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -112,23 +112,8 @@ void __init shm_init (void) } /* - * shm_lock_(check_)down routines are called in the paths where the rw_mutex - * is held to protect access to the idr tree. - */ -static inline struct shmid_kernel *shm_lock_down(struct ipc_namespace *ns, - int id) -{ - struct kern_ipc_perm *ipcp = ipc_lock_down(&shm_ids(ns), id); - - if (IS_ERR(ipcp)) - return (struct shmid_kernel *)ipcp; - - return container_of(ipcp, struct shmid_kernel, shm_perm); -} - -/* * shm_lock_(check_) routines are called in the paths where the rw_mutex - * is not held. + * is not necessarily held. */ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) { @@ -211,7 +196,7 @@ static void shm_close(struct vm_area_struct *vma) down_write(&shm_ids(ns).rw_mutex); /* remove from the list of attaches of the shm segment */ - shp = shm_lock_down(ns, sfd->id); + shp = shm_lock(ns, sfd->id); BUG_ON(IS_ERR(shp)); shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); @@ -577,7 +562,8 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, if (is_file_hugepages(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; - *rss += (HPAGE_SIZE/PAGE_SIZE)*mapping->nrpages; + struct hstate *h = hstate_file(shp->shm_file); + *rss += pages_per_huge_page(h) * mapping->nrpages; } else { struct shmem_inode_info *info = SHMEM_I(inode); spin_lock(&info->lock); @@ -931,7 +917,7 @@ invalid: out_nattch: down_write(&shm_ids(ns).rw_mutex); - shp = shm_lock_down(ns, shmid); + shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; if(shp->shm_nattch == 0 && diff --git a/ipc/util.c b/ipc/util.c index 3339177b336..49b3ea615dc 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -688,10 +688,6 @@ void ipc64_perm_to_ipc_perm (struct ipc64_perm *in, struct ipc_perm *out) * Look for an id in the ipc ids idr and lock the associated ipc object. * * The ipc object is locked on exit. - * - * This is the routine that should be called when the rw_mutex is not already - * held, i.e. idr tree not protected: it protects the idr tree in read mode - * during the idr_find(). */ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) @@ -699,18 +695,13 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) struct kern_ipc_perm *out; int lid = ipcid_to_idx(id); - down_read(&ids->rw_mutex); - rcu_read_lock(); out = idr_find(&ids->ipcs_idr, lid); if (out == NULL) { rcu_read_unlock(); - up_read(&ids->rw_mutex); return ERR_PTR(-EINVAL); } - up_read(&ids->rw_mutex); - spin_lock(&out->lock); /* ipc_rmid() may have already freed the ID while ipc_lock @@ -725,56 +716,6 @@ struct kern_ipc_perm *ipc_lock(struct ipc_ids *ids, int id) return out; } -/** - * ipc_lock_down - Lock an ipc structure with rw_sem held - * @ids: IPC identifier set - * @id: ipc id to look for - * - * Look for an id in the ipc ids idr and lock the associated ipc object. - * - * The ipc object is locked on exit. - * - * This is the routine that should be called when the rw_mutex is already - * held, i.e. idr tree protected. - */ - -struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *ids, int id) -{ - struct kern_ipc_perm *out; - int lid = ipcid_to_idx(id); - - rcu_read_lock(); - out = idr_find(&ids->ipcs_idr, lid); - if (out == NULL) { - rcu_read_unlock(); - return ERR_PTR(-EINVAL); - } - - spin_lock(&out->lock); - - /* - * No need to verify that the structure is still valid since the - * rw_mutex is held. - */ - return out; -} - -struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id) -{ - struct kern_ipc_perm *out; - - out = ipc_lock_down(ids, id); - if (IS_ERR(out)) - return out; - - if (ipc_checkid(out, id)) { - ipc_unlock(out); - return ERR_PTR(-EIDRM); - } - - return out; -} - struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id) { struct kern_ipc_perm *out; @@ -846,7 +787,7 @@ struct kern_ipc_perm *ipcctl_pre_down(struct ipc_ids *ids, int id, int cmd, int err; down_write(&ids->rw_mutex); - ipcp = ipc_lock_check_down(ids, id); + ipcp = ipc_lock_check(ids, id); if (IS_ERR(ipcp)) { err = PTR_ERR(ipcp); goto out_up; diff --git a/ipc/util.h b/ipc/util.h index cdb966aebe0..3646b45a03c 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -102,11 +102,6 @@ void* ipc_rcu_alloc(int size); void ipc_rcu_getref(void *ptr); void ipc_rcu_putref(void *ptr); -/* - * ipc_lock_down: called with rw_mutex held - * ipc_lock: called without that lock held - */ -struct kern_ipc_perm *ipc_lock_down(struct ipc_ids *, int); struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); void kernel_to_ipc64_perm(struct kern_ipc_perm *in, struct ipc64_perm *out); @@ -155,7 +150,6 @@ static inline void ipc_unlock(struct kern_ipc_perm *perm) rcu_read_unlock(); } -struct kern_ipc_perm *ipc_lock_check_down(struct ipc_ids *ids, int id); struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id); int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids, struct ipc_ops *ops, struct ipc_params *params); |