From baef99a08a2e23d9386b47e53fa5f0d44fc98f66 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 29 Jan 2009 14:25:10 -0800 Subject: cgroups: use hierarchy mutex in creation failure path Now, cgrp->sibling is handled under hierarchy mutex. error route should do so, too. Signed-off-by: KAMEZAWA Hiroyuki Cc: Li Zefan Acked-by Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c29831076e7..2ae7cb47dbf 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2434,7 +2434,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, err_remove: + cgroup_lock_hierarchy(root); list_del(&cgrp->sibling); + cgroup_unlock_hierarchy(root); root->number_of_cgroups--; err_destroy: -- cgit v1.2.3 From 1404f06565ee89e0ce04d4a5859c00b0e3a0dc8d Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 29 Jan 2009 14:25:21 -0800 Subject: cgroups: fix lock inconsistency in cgroup_clone() I fixed a bug in cgroup_clone() in Linus' tree in commit 7b574b7 ("cgroups: fix a race between cgroup_clone and umount") without noticing there was a cleanup patch in -mm tree that should be rebased (now commit 104cbd5, "cgroups: use task_lock() for access tsk->cgroups safe in cgroup_clone()"), thus resulted in lock inconsistency. Signed-off-by: Li Zefan Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2ae7cb47dbf..0066092de19 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2993,20 +2993,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&cgroup_mutex); return 0; } - task_lock(tsk); - cg = tsk->cgroups; - parent = task_cgroup(tsk, subsys->subsys_id); /* Pin the hierarchy */ - if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { + if (!atomic_inc_not_zero(&root->sb->s_active)) { /* We race with the final deactivate_super() */ mutex_unlock(&cgroup_mutex); return 0; } /* Keep the cgroup alive */ + task_lock(tsk); + parent = task_cgroup(tsk, subsys->subsys_id); + cg = tsk->cgroups; get_css_set(cg); task_unlock(tsk); + mutex_unlock(&cgroup_mutex); /* Now do the VFS work to create a cgroup */ @@ -3045,7 +3046,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&inode->i_mutex); put_css_set(cg); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); /* The cgroup is still accessible in the VFS, but * we're not going to try to rmdir() it at this * point. */ @@ -3071,7 +3072,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_lock(&cgroup_mutex); put_css_set(cg); mutex_unlock(&cgroup_mutex); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); return ret; } -- cgit v1.2.3 From 804b3c28a4e4fa1c224571bf76edb534b9c4b1ed Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Thu, 29 Jan 2009 14:25:21 -0800 Subject: cgroups: add cpu_relax() calls in css_tryget() and cgroup_clear_css_refs() css_tryget() and cgroup_clear_css_refs() contain polling loops; these loops should have cpu_relax calls in them to reduce cross-cache traffic. Signed-off-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0066092de19..492215d67fa 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2509,7 +2509,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; int refcnt; - do { + while (1) { /* We can only remove a CSS with a refcnt==1 */ refcnt = atomic_read(&css->refcnt); if (refcnt > 1) { @@ -2523,7 +2523,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) * css_tryget() to spin until we set the * CSS_REMOVED bits or abort */ - } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); + if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) + break; + cpu_relax(); + } } done: for_each_subsys(cgrp->root, ss) { -- cgit v1.2.3 From 839ec5452ebfd5905b9c69b20ceb640903a8ea1a Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Thu, 29 Jan 2009 14:25:22 -0800 Subject: cgroup: fix root_count when mount fails due to busy subsystem root_count was being incremented in cgroup_get_sb() after all error checking was complete, but decremented in cgroup_kill_sb(), which can be called on a superblock that we gave up on due to an error. This patch changes cgroup_kill_sb() to only decrement root_count if the root was previously linked into the list of roots. Signed-off-by: Paul Menage Tested-by: Serge Hallyn Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cgroup.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/cgroup.c') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 492215d67fa..5a54ff42874 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) { } write_unlock(&css_set_lock); - list_del(&root->root_list); - root_count--; + if (!list_empty(&root->root_list)) { + list_del(&root->root_list); + root_count--; + } mutex_unlock(&cgroup_mutex); -- cgit v1.2.3