From 817929ec274bcfe771586d338bb31d1659615686 Mon Sep 17 00:00:00 2001 From: Paul Menage Date: Thu, 18 Oct 2007 23:39:36 -0700 Subject: Task Control Groups: shared cgroup subsystem group arrays Replace the struct css_set embedded in task_struct with a pointer; all tasks that have the same set of memberships across all hierarchies will share a css_set object, and will be linked via their css_sets field to the "tasks" list_head in the css_set. Assuming that many tasks share the same cgroup assignments, this reduces overall space usage and keeps the size of the task_struct down (three pointers added to task_struct compared to a non-cgroups kernel, no matter how many subsystems are registered). [akpm@linux-foundation.org: fix a printk] [akpm@linux-foundation.org: build fix] Signed-off-by: Paul Menage Cc: Serge E. Hallyn Cc: "Eric W. Biederman" Cc: Dave Hansen Cc: Balbir Singh Cc: Paul Jackson Cc: Kirill Korotaev Cc: Herbert Poetzl Cc: Srivatsa Vaddagiri Cc: Cedric Le Goater Cc: Serge E. Hallyn Cc: "Eric W. Biederman" Cc: Dave Hansen Cc: Balbir Singh Cc: Paul Jackson Cc: Kirill Korotaev Cc: Herbert Poetzl Cc: Srivatsa Vaddagiri Cc: Cedric Le Goater Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 89 ++++++++++++++++++++++++++++++++++++++++++++------ include/linux/sched.h | 33 +++---------------- 2 files changed, 83 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a9553568118..836b3557bb7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -27,10 +27,19 @@ extern void cgroup_lock(void); extern void cgroup_unlock(void); extern void cgroup_fork(struct task_struct *p); extern void cgroup_fork_callbacks(struct task_struct *p); +extern void cgroup_post_fork(struct task_struct *p); extern void cgroup_exit(struct task_struct *p, int run_callbacks); extern struct file_operations proc_cgroup_operations; +/* Define the enumeration of all cgroup subsystems */ +#define SUBSYS(_x) _x ## _subsys_id, +enum cgroup_subsys_id { +#include + CGROUP_SUBSYS_COUNT +}; +#undef SUBSYS + /* Per-subsystem/per-cgroup state maintained by the system. */ struct cgroup_subsys_state { /* The cgroup that this subsystem is attached to. Useful @@ -97,6 +106,52 @@ struct cgroup { struct cgroupfs_root *root; struct cgroup *top_cgroup; + + /* + * List of cg_cgroup_links pointing at css_sets with + * tasks in this cgroup. Protected by css_set_lock + */ + struct list_head css_sets; +}; + +/* A css_set is a structure holding pointers to a set of + * cgroup_subsys_state objects. This saves space in the task struct + * object and speeds up fork()/exit(), since a single inc/dec and a + * list_add()/del() can bump the reference count on the entire + * cgroup set for a task. + */ + +struct css_set { + + /* Reference count */ + struct kref ref; + + /* + * List running through all cgroup groups. Protected by + * css_set_lock + */ + struct list_head list; + + /* + * List running through all tasks using this cgroup + * group. Protected by css_set_lock + */ + struct list_head tasks; + + /* + * List of cg_cgroup_link objects on link chains from + * cgroups referenced from this css_set. Protected by + * css_set_lock + */ + struct list_head cg_links; + + /* + * Set of subsystem states, one for each subsystem. This array + * is immutable after creation apart from the init_css_set + * during subsystem registration (at boot time). + */ + struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; + }; /* struct cftype: @@ -157,15 +212,7 @@ int cgroup_is_removed(const struct cgroup *cont); int cgroup_path(const struct cgroup *cont, char *buf, int buflen); -int __cgroup_task_count(const struct cgroup *cont); -static inline int cgroup_task_count(const struct cgroup *cont) -{ - int task_count; - rcu_read_lock(); - task_count = __cgroup_task_count(cont); - rcu_read_unlock(); - return task_count; -} +int cgroup_task_count(const struct cgroup *cont); /* Return true if the cgroup is a descendant of the current cgroup */ int cgroup_is_descendant(const struct cgroup *cont); @@ -213,7 +260,7 @@ static inline struct cgroup_subsys_state *cgroup_subsys_state( static inline struct cgroup_subsys_state *task_subsys_state( struct task_struct *task, int subsys_id) { - return rcu_dereference(task->cgroups.subsys[subsys_id]); + return rcu_dereference(task->cgroups->subsys[subsys_id]); } static inline struct cgroup* task_cgroup(struct task_struct *task, @@ -226,6 +273,27 @@ int cgroup_path(const struct cgroup *cont, char *buf, int buflen); int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss); +/* A cgroup_iter should be treated as an opaque object */ +struct cgroup_iter { + struct list_head *cg_link; + struct list_head *task; +}; + +/* To iterate across the tasks in a cgroup: + * + * 1) call cgroup_iter_start to intialize an iterator + * + * 2) call cgroup_iter_next() to retrieve member tasks until it + * returns NULL or until you want to end the iteration + * + * 3) call cgroup_iter_end() to destroy the iterator. + */ +void cgroup_iter_start(struct cgroup *cont, struct cgroup_iter *it); +struct task_struct *cgroup_iter_next(struct cgroup *cont, + struct cgroup_iter *it); +void cgroup_iter_end(struct cgroup *cont, struct cgroup_iter *it); + + #else /* !CONFIG_CGROUPS */ static inline int cgroup_init_early(void) { return 0; } @@ -233,6 +301,7 @@ static inline int cgroup_init(void) { return 0; } static inline void cgroup_init_smp(void) {} static inline void cgroup_fork(struct task_struct *p) {} static inline void cgroup_fork_callbacks(struct task_struct *p) {} +static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p, int callbacks) {} static inline void cgroup_lock(void) {} diff --git a/include/linux/sched.h b/include/linux/sched.h index af2ed4bae67..1aa1cfa63b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -894,34 +894,6 @@ struct sched_entity { #endif }; -#ifdef CONFIG_CGROUPS - -#define SUBSYS(_x) _x ## _subsys_id, -enum cgroup_subsys_id { -#include - CGROUP_SUBSYS_COUNT -}; -#undef SUBSYS - -/* A css_set is a structure holding pointers to a set of - * cgroup_subsys_state objects. - */ - -struct css_set { - - /* Set of subsystem states, one for each subsystem. NULL for - * subsystems that aren't part of this hierarchy. These - * pointers reduce the number of dereferences required to get - * from a task to its state for a given cgroup, but result - * in increased space usage if tasks are in wildly different - * groupings across different hierarchies. This array is - * immutable after creation */ - struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; - -}; - -#endif /* CONFIG_CGROUPS */ - struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1159,7 +1131,10 @@ struct task_struct { int cpuset_mem_spread_rotor; #endif #ifdef CONFIG_CGROUPS - struct css_set cgroups; + /* Control Group info protected by css_set_lock */ + struct css_set *cgroups; + /* cg_list protected by css_set_lock and tsk->alloc_lock */ + struct list_head cg_list; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; -- cgit v1.2.3