aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-07 13:34:16 -0700
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>2009-04-07 13:34:16 -0700
commit38f4b8c0da01ae7cd9b93386842ce272d6fde9ab (patch)
tree3c8c52201aac038094bfea7efdd0984a8f62045e /kernel/sched.c
parenta811454027352c762e0d5bba1b1d8f7d26bf96ae (diff)
parent8e2c4f2844c0e8dcdfe312e5f2204854ca8532c6 (diff)
Merge commit 'origin/master' into for-linus/xen/master
* commit 'origin/master': (4825 commits) Fix build errors due to CONFIG_BRANCH_TRACER=y parport: Use the PCI IRQ if offered tty: jsm cleanups Adjust path to gpio headers KGDB_SERIAL_CONSOLE check for module Change KCONFIG name tty: Blackin CTS/RTS Change hardware flow control from poll to interrupt driven Add support for the MAX3100 SPI UART. lanana: assign a device name and numbering for MAX3100 serqt: initial clean up pass for tty side tty: Use the generic RS485 ioctl on CRIS tty: Correct inline types for tty_driver_kref_get() splice: fix deadlock in splicing to file nilfs2: support nanosecond timestamp nilfs2: introduce secondary super block nilfs2: simplify handling of active state of segments nilfs2: mark minor flag for checkpoint created by internal operation nilfs2: clean up sketch file nilfs2: super block operations fix endian bug ... Conflicts: arch/x86/include/asm/thread_info.h arch/x86/lguest/boot.c drivers/xen/manage.c
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c161
1 files changed, 125 insertions, 36 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 133762aece5..b38bd96098f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -231,13 +231,20 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
spin_lock(&rt_b->rt_runtime_lock);
for (;;) {
+ unsigned long delta;
+ ktime_t soft, hard;
+
if (hrtimer_active(&rt_b->rt_period_timer))
break;
now = hrtimer_cb_get_time(&rt_b->rt_period_timer);
hrtimer_forward(&rt_b->rt_period_timer, now, rt_b->rt_period);
- hrtimer_start_expires(&rt_b->rt_period_timer,
- HRTIMER_MODE_ABS);
+
+ soft = hrtimer_get_softexpires(&rt_b->rt_period_timer);
+ hard = hrtimer_get_expires(&rt_b->rt_period_timer);
+ delta = ktime_to_ns(ktime_sub(hard, soft));
+ __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
+ HRTIMER_MODE_ABS, 0);
}
spin_unlock(&rt_b->rt_runtime_lock);
}
@@ -1110,7 +1117,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
if (rq == this_rq()) {
hrtimer_restart(timer);
} else if (!rq->hrtick_csd_pending) {
- __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
+ __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
rq->hrtick_csd_pending = 1;
}
}
@@ -1146,7 +1153,8 @@ static __init void init_hrtick(void)
*/
static void hrtick_start(struct rq *rq, u64 delay)
{
- hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
+ __hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
+ HRTIMER_MODE_REL, 0);
}
static inline void init_hrtick(void)
@@ -3818,19 +3826,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
*/
#define MAX_PINNED_INTERVAL 512
+/* Working cpumask for load_balance and load_balance_newidle. */
+static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
+
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
*/
static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle,
- int *balance, struct cpumask *cpus)
+ int *balance)
{
int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group;
unsigned long imbalance;
struct rq *busiest;
unsigned long flags;
+ struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
cpumask_setall(cpus);
@@ -3985,8 +3997,7 @@ out:
* this_rq is locked.
*/
static int
-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
- struct cpumask *cpus)
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
{
struct sched_group *group;
struct rq *busiest = NULL;
@@ -3994,6 +4005,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
int ld_moved = 0;
int sd_idle = 0;
int all_pinned = 0;
+ struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
cpumask_setall(cpus);
@@ -4134,10 +4146,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
struct sched_domain *sd;
int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
- cpumask_var_t tmpmask;
-
- if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
- return;
for_each_domain(this_cpu, sd) {
unsigned long interval;
@@ -4148,7 +4156,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
if (sd->flags & SD_BALANCE_NEWIDLE)
/* If we've pulled tasks over stop searching: */
pulled_task = load_balance_newidle(this_cpu, this_rq,
- sd, tmpmask);
+ sd);
interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval))
@@ -4163,7 +4171,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
*/
this_rq->next_balance = next_balance;
}
- free_cpumask_var(tmpmask);
}
/*
@@ -4313,11 +4320,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
unsigned long next_balance = jiffies + 60*HZ;
int update_next_balance = 0;
int need_serialize;
- cpumask_var_t tmp;
-
- /* Fails alloc? Rebalancing probably not a priority right now. */
- if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
- return;
for_each_domain(cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE))
@@ -4342,7 +4344,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
}
if (time_after_eq(jiffies, sd->last_balance + interval)) {
- if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+ if (load_balance(cpu, rq, sd, idle, &balance)) {
/*
* We've pulled tasks over so either we're no
* longer idle, or one of our SMT siblings is
@@ -4376,8 +4378,6 @@ out:
*/
if (likely(update_next_balance))
rq->next_balance = next_balance;
-
- free_cpumask_var(tmp);
}
/*
@@ -4781,10 +4781,7 @@ void scheduler_tick(void)
#endif
}
-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
- defined(CONFIG_PREEMPT_TRACER))
-
-static inline unsigned long get_parent_ip(unsigned long addr)
+unsigned long get_parent_ip(unsigned long addr)
{
if (in_lock_functions(addr)) {
addr = CALLER_ADDR2;
@@ -4794,6 +4791,9 @@ static inline unsigned long get_parent_ip(unsigned long addr)
return addr;
}
+#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+ defined(CONFIG_PREEMPT_TRACER))
+
void __kprobes add_preempt_count(int val)
{
#ifdef CONFIG_DEBUG_PREEMPT
@@ -4942,15 +4942,13 @@ pick_next_task(struct rq *rq)
/*
* schedule() is the main scheduler function.
*/
-asmlinkage void __sched schedule(void)
+asmlinkage void __sched __schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;
-need_resched:
- preempt_disable();
cpu = smp_processor_id();
rq = cpu_rq(cpu);
rcu_qsctr_inc(cpu);
@@ -5007,13 +5005,80 @@ need_resched_nonpreemptible:
if (unlikely(reacquire_kernel_lock(current) < 0))
goto need_resched_nonpreemptible;
+}
+asmlinkage void __sched schedule(void)
+{
+need_resched:
+ preempt_disable();
+ __schedule();
preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
EXPORT_SYMBOL(schedule);
+#ifdef CONFIG_SMP
+/*
+ * Look out! "owner" is an entirely speculative pointer
+ * access and not reliable.
+ */
+int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+{
+ unsigned int cpu;
+ struct rq *rq;
+
+ if (!sched_feat(OWNER_SPIN))
+ return 0;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * Need to access the cpu field knowing that
+ * DEBUG_PAGEALLOC could have unmapped it if
+ * the mutex owner just released it and exited.
+ */
+ if (probe_kernel_address(&owner->cpu, cpu))
+ goto out;
+#else
+ cpu = owner->cpu;
+#endif
+
+ /*
+ * Even if the access succeeded (likely case),
+ * the cpu field may no longer be valid.
+ */
+ if (cpu >= nr_cpumask_bits)
+ goto out;
+
+ /*
+ * We need to validate that we can do a
+ * get_cpu() and that we have the percpu area.
+ */
+ if (!cpu_online(cpu))
+ goto out;
+
+ rq = cpu_rq(cpu);
+
+ for (;;) {
+ /*
+ * Owner changed, break to re-assess state.
+ */
+ if (lock->owner != owner)
+ break;
+
+ /*
+ * Is that owner really running on that cpu?
+ */
+ if (task_thread_info(rq->curr) != owner || need_resched())
+ return 0;
+
+ cpu_relax();
+ }
+out:
+ return 1;
+}
+#endif
+
#ifdef CONFIG_PREEMPT
/*
* this is the entry point to schedule() from in-kernel preemption
@@ -5131,11 +5196,17 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
__wake_up_common(q, mode, 1, 0, NULL);
}
+void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
+{
+ __wake_up_common(q, mode, 1, 0, key);
+}
+
/**
- * __wake_up_sync - wake up threads blocked on a waitqueue.
+ * __wake_up_sync_key - wake up threads blocked on a waitqueue.
* @q: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
+ * @key: opaque value to be passed to wakeup targets
*
* The sync wakeup differs that the waker knows that it will schedule
* away soon, so while the target thread will be woken up, it will not
@@ -5144,8 +5215,8 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
*
* On UP it can prevent extra preemption.
*/
-void
-__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
+ int nr_exclusive, void *key)
{
unsigned long flags;
int sync = 1;
@@ -5157,9 +5228,18 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
sync = 0;
spin_lock_irqsave(&q->lock, flags);
- __wake_up_common(q, mode, nr_exclusive, sync, NULL);
+ __wake_up_common(q, mode, nr_exclusive, sync, key);
spin_unlock_irqrestore(&q->lock, flags);
}
+EXPORT_SYMBOL_GPL(__wake_up_sync_key);
+
+/*
+ * __wake_up_sync - see __wake_up_sync_key()
+ */
+void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
+{
+ __wake_up_sync_key(q, mode, nr_exclusive, NULL);
+}
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
/**
@@ -7648,7 +7728,7 @@ cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
{
int group;
- cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+ cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
group = cpumask_first(mask);
if (sg)
*sg = &per_cpu(sched_group_core, group).sg;
@@ -7677,7 +7757,7 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
group = cpumask_first(mask);
#elif defined(CONFIG_SCHED_SMT)
- cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+ cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
group = cpumask_first(mask);
#else
group = cpu;
@@ -8020,7 +8100,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
SD_INIT(sd, SIBLING);
set_domain_attribute(sd, attr);
cpumask_and(sched_domain_span(sd),
- &per_cpu(cpu_sibling_map, i), cpu_map);
+ topology_thread_cpumask(i), cpu_map);
sd->parent = p;
p->child = sd;
cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -8031,7 +8111,7 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
/* Set up CPU (sibling) groups */
for_each_cpu(i, cpu_map) {
cpumask_and(this_sibling_map,
- &per_cpu(cpu_sibling_map, i), cpu_map);
+ topology_thread_cpumask(i), cpu_map);
if (i != cpumask_first(this_sibling_map))
continue;
@@ -8707,6 +8787,9 @@ void __init sched_init(void)
#ifdef CONFIG_USER_SCHED
alloc_size *= 2;
#endif
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ alloc_size += num_possible_cpus() * cpumask_size();
+#endif
/*
* As sched_init() is called before page_alloc is setup,
* we use alloc_bootmem().
@@ -8744,6 +8827,12 @@ void __init sched_init(void)
ptr += nr_cpu_ids * sizeof(void **);
#endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+ for_each_possible_cpu(i) {
+ per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+ ptr += cpumask_size();
+ }
+#endif /* CONFIG_CPUMASK_OFFSTACK */
}
#ifdef CONFIG_SMP