From 146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Sat, 18 Oct 2008 20:28:03 -0700 Subject: cgroups: fix probable race with put_css_set[_taskexit] and find_css_set put_css_set_taskexit may be called when find_css_set is called on other cpu. And the race will occur: put_css_set_taskexit side find_css_set side | atomic_dec_and_test(&kref->refcount) | /* kref->refcount = 0 */ | .................................................................... | read_lock(&css_set_lock) | find_existing_css_set | get_css_set | read_unlock(&css_set_lock); .................................................................... __release_css_set | .................................................................... | /* use a released css_set */ | [put_css_set is the same. But in the current code, all put_css_set are put into cgroup mutex critical region as the same as find_css_set.] [akpm@linux-foundation.org: repair comments] [menage@google.com: eliminate race in css_set refcounting] Signed-off-by: Lai Jiangshan Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Signed-off-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8c6e1c17e6d..1e49218457e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg) struct cg_cgroup_link *link; struct cg_cgroup_link *saved_link; - write_lock(&css_set_lock); hlist_del(&cg->hlist); css_set_count--; @@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg) list_del(&link->cgrp_link_list); kfree(link); } - - write_unlock(&css_set_lock); } -static void __release_css_set(struct kref *k, int taskexit) +static void __put_css_set(struct css_set *cg, int taskexit) { int i; - struct css_set *cg = container_of(k, struct css_set, ref); - + /* + * Ensure that the refcount doesn't hit zero while any readers + * can see it. Similar to atomic_dec_and_lock(), but for an + * rwlock + */ + if (atomic_add_unless(&cg->refcount, -1, 1)) + return; + write_lock(&css_set_lock); + if (!atomic_dec_and_test(&cg->refcount)) { + write_unlock(&css_set_lock); + return; + } unlink_css_set(cg); + write_unlock(&css_set_lock); rcu_read_lock(); for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { @@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit) kfree(cg); } -static void release_css_set(struct kref *k) -{ - __release_css_set(k, 0); -} - -static void release_css_set_taskexit(struct kref *k) -{ - __release_css_set(k, 1); -} - /* * refcounted get/put for css_set objects */ static inline void get_css_set(struct css_set *cg) { - kref_get(&cg->ref); + atomic_inc(&cg->refcount); } static inline void put_css_set(struct css_set *cg) { - kref_put(&cg->ref, release_css_set); + __put_css_set(cg, 0); } static inline void put_css_set_taskexit(struct css_set *cg) { - kref_put(&cg->ref, release_css_set_taskexit); + __put_css_set(cg, 1); } /* @@ -427,7 +425,7 @@ static struct css_set *find_css_set( return NULL; } - kref_init(&res->ref); + atomic_set(&res->refcount, 1); INIT_LIST_HEAD(&res->cg_links); INIT_LIST_HEAD(&res->tasks); INIT_HLIST_NODE(&res->hlist); @@ -1728,7 +1726,7 @@ int cgroup_task_count(const struct cgroup *cgrp) read_lock(&css_set_lock); list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { - count += atomic_read(&link->cg->ref.refcount); + count += atomic_read(&link->cg->refcount); } read_unlock(&css_set_lock); return count; @@ -2495,8 +2493,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) int __init cgroup_init_early(void) { int i; - kref_init(&init_css_set.ref); - kref_get(&init_css_set.ref); + atomic_set(&init_css_set.refcount, 1); INIT_LIST_HEAD(&init_css_set.cg_links); INIT_LIST_HEAD(&init_css_set.tasks); INIT_HLIST_NODE(&init_css_set.hlist); -- cgit v1.2.3