From 967fc04671feea4dbf780c9e55a0bc8fcf68a14e Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 29 Dec 2008 09:39:52 -0500 Subject: sched: add sched_class->needs_post_schedule() member We currently run class->post_schedule() outside of the rq->lock, which means that we need to test for the need to post_schedule outside of the lock to avoid a forced reacquistion. This is currently not a problem as we only look at rq->rt.overloaded. However, we want to enhance this going forward to look at more state to reduce the need to post_schedule to a bare minimum set. Therefore, we introduce a new member-func called needs_post_schedule() which tests for the post_schedule condtion without actually performing the work. Therefore it is safe to call this function before the rq->lock is released, because we are guaranteed not to drop the lock at an intermediate point (such as what post_schedule() may do). We will use this later in the series [ rostedt: removed paranoid BUG_ON ] Signed-off-by: Gregory Haskins --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e5f928a079e..836a86c32a6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1012,6 +1012,7 @@ struct sched_class { struct rq *busiest, struct sched_domain *sd, enum cpu_idle_type idle); void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); + int (*needs_post_schedule) (struct rq *this_rq); void (*post_schedule) (struct rq *this_rq); void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); -- cgit v1.2.3 From 4075134e40804821f90866d7de56802e4dcecb1e Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 29 Dec 2008 09:39:53 -0500 Subject: plist: fix PLIST_NODE_INIT to work with debug enabled It seems that PLIST_NODE_INIT breaks if used and DEBUG_PI_LIST is defined. Since there are no current users of PLIST_NODE_INIT, this has gone undetected. This patch fixes the build issue that enables the DEBUG_PI_LIST later in the series when we use it in init_task.h Signed-off-by: Gregory Haskins --- include/linux/plist.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/plist.h b/include/linux/plist.h index 85de2f05587..45926d77d6a 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -96,6 +96,10 @@ struct plist_node { # define PLIST_HEAD_LOCK_INIT(_lock) #endif +#define _PLIST_HEAD_INIT(head) \ + .prio_list = LIST_HEAD_INIT((head).prio_list), \ + .node_list = LIST_HEAD_INIT((head).node_list) + /** * PLIST_HEAD_INIT - static struct plist_head initializer * @head: struct plist_head variable name @@ -103,8 +107,7 @@ struct plist_node { */ #define PLIST_HEAD_INIT(head, _lock) \ { \ - .prio_list = LIST_HEAD_INIT((head).prio_list), \ - .node_list = LIST_HEAD_INIT((head).node_list), \ + _PLIST_HEAD_INIT(head), \ PLIST_HEAD_LOCK_INIT(&(_lock)) \ } @@ -116,7 +119,7 @@ struct plist_node { #define PLIST_NODE_INIT(node, __prio) \ { \ .prio = (__prio), \ - .plist = PLIST_HEAD_INIT((node).plist, NULL), \ + .plist = { _PLIST_HEAD_INIT((node).plist) }, \ } /** -- cgit v1.2.3 From 917b627d4d981dc614519d7b34ea31a976b14e12 Mon Sep 17 00:00:00 2001 From: Gregory Haskins Date: Mon, 29 Dec 2008 09:39:53 -0500 Subject: sched: create "pushable_tasks" list to limit pushing to one attempt The RT scheduler employs a "push/pull" design to actively balance tasks within the system (on a per disjoint cpuset basis). When a task is awoken, it is immediately determined if there are any lower priority cpus which should be preempted. This is opposed to the way normal SCHED_OTHER tasks behave, which will wait for a periodic rebalancing operation to occur before spreading out load. When a particular RQ has more than 1 active RT task, it is said to be in an "overloaded" state. Once this occurs, the system enters the active balancing mode, where it will try to push the task away, or persuade a different cpu to pull it over. The system will stay in this state until the system falls back below the <= 1 queued RT task per RQ. However, the current implementation suffers from a limitation in the push logic. Once overloaded, all tasks (other than current) on the RQ are analyzed on every push operation, even if it was previously unpushable (due to affinity, etc). Whats more, the operation stops at the first task that is unpushable and will not look at items lower in the queue. This causes two problems: 1) We can have the same tasks analyzed over and over again during each push, which extends out the fast path in the scheduler for no gain. Consider a RQ that has dozens of tasks that are bound to a core. Each one of those tasks will be encountered and skipped for each push operation while they are queued. 2) There may be lower-priority tasks under the unpushable task that could have been successfully pushed, but will never be considered until either the unpushable task is cleared, or a pull operation succeeds. The net result is a potential latency source for mid priority tasks. This patch aims to rectify these two conditions by introducing a new priority sorted list: "pushable_tasks". A task is added to the list each time a task is activated or preempted. It is removed from the list any time it is deactivated, made current, or fails to push. This works because a task only needs to be attempted to push once. After an initial failure to push, the other cpus will eventually try to pull the task when the conditions are proper. This also solves the problem that we don't completely analyze all tasks due to encountering an unpushable tasks. Now every task will have a push attempted (when appropriate). This reduces latency both by shorting the critical section of the rq->lock for certain workloads, and by making sure the algorithm considers all eligible tasks in the system. [ rostedt: added a couple more BUG_ONs ] Signed-off-by: Gregory Haskins Acked-by: Steven Rostedt --- include/linux/init_task.h | 1 + include/linux/sched.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e..6851225f44a 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -140,6 +140,7 @@ extern struct group_info init_groups; .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ + .pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \ .ptraced = LIST_HEAD_INIT(tsk.ptraced), \ .ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \ .real_parent = &tsk, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 836a86c32a6..440cabb2d43 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1179,6 +1179,7 @@ struct task_struct { #endif struct list_head tasks; + struct plist_node pushable_tasks; struct mm_struct *mm, *active_mm; -- cgit v1.2.3 From 831451ac4e44d3a20b581ce726ef1d1144373f7d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2009 12:39:18 +0100 Subject: sched: introduce avg_wakeup Introduce a new avg_wakeup statistic. avg_wakeup is a measure of how frequently a task wakes up other tasks, it represents the average time between wakeups, with a limit of avg_runtime for when it doesn't wake up anybody. Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4cae9b81a1f..daf4e07bc97 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1046,6 +1046,9 @@ struct sched_entity { u64 exec_max; u64 slice_max; + u64 start_runtime; + u64 avg_wakeup; + u64 nr_migrations; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; -- cgit v1.2.3 From 34cb61359b503d7aff6447acb037a5efd6ce93b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 16 Jan 2009 13:36:06 +0100 Subject: sched: fix !CONFIG_SCHEDSTATS build failure Stephen Rothwell reported this linux-next build failure with !CONFIG_SCHEDSTATS: | In file included from kernel/sched.c:1703: | kernel/sched_fair.c: In function 'adaptive_gran': | kernel/sched_fair.c:1324: error: 'struct sched_entity' has no member named 'avg_wakeup' The start_runtime and avg_wakeup metrics are now not just for statistics, but also for scheduling - so they always need to be available. (Also move out the nr_migrations fields - for future perfcounters usage.) Reported-by: Stephen Rothwell Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index daf4e07bc97..5d56b54350a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1031,6 +1031,10 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; + u64 start_runtime; + u64 avg_wakeup; + u64 nr_migrations; + #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; @@ -1046,10 +1050,6 @@ struct sched_entity { u64 exec_max; u64 slice_max; - u64 start_runtime; - u64 avg_wakeup; - - u64 nr_migrations; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; -- cgit v1.2.3 From ad0b0fd554dfc126b5750d14908dccc3bbf602be Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 10 Feb 2009 11:42:26 -0800 Subject: sched, latencytop: incorporate review feedback from Andrew Morton Andrew had some suggestions for the latencytop file; this patch takes care of most of these: * Add documentation * Turn account_scheduler_latency into an inline function * Don't report negative values to userspace * Make the file operations struct const * Fix a few checkpatch.pl warnings Signed-off-by: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/latencytop.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index 901c2d6377a..b0e99898527 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -9,6 +9,7 @@ #ifndef _INCLUDE_GUARD_LATENCYTOP_H_ #define _INCLUDE_GUARD_LATENCYTOP_H_ +#include #ifdef CONFIG_LATENCYTOP #define LT_SAVECOUNT 32 @@ -24,7 +25,14 @@ struct latency_record { struct task_struct; -void account_scheduler_latency(struct task_struct *task, int usecs, int inter); +extern int latencytop_enabled; +void __account_scheduler_latency(struct task_struct *task, int usecs, int inter); +static inline void +account_scheduler_latency(struct task_struct *task, int usecs, int inter) +{ + if (unlikely(latencytop_enabled)) + __account_scheduler_latency(task, usecs, inter); +} void clear_all_latency_tracing(struct task_struct *p); -- cgit v1.2.3 From b342501cd31e5546d0c9ca8ceff5ded1832f9e5b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Feb 2009 20:20:29 +0100 Subject: sched: allow architectures to specify sched_clock_stable Allow CONFIG_HAVE_UNSTABLE_SCHED_CLOCK architectures to still specify that their sched_clock() implementation is reliable. This will be used by x86 to switch on a faster sched_clock_cpu() implementation on certain CPU types. Signed-off-by: Ingo Molnar --- include/linux/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8981e52c714..a063d19b7a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1670,6 +1670,16 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) return set_cpus_allowed_ptr(p, &new_mask); } +/* + * Architectures can set this to 1 if they have specified + * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, + * but then during bootup it turns out that sched_clock() + * is reliable after all: + */ +#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +extern int sched_clock_stable; +#endif + extern unsigned long long sched_clock(void); extern void sched_clock_init(void); -- cgit v1.2.3