diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/irq/Makefile | 1 | ||||
-rw-r--r-- | kernel/irq/internals.h | 2 | ||||
-rw-r--r-- | kernel/irq/manage.c | 31 | ||||
-rw-r--r-- | kernel/irq/pm.c | 79 | ||||
-rw-r--r-- | kernel/kexec.c | 19 | ||||
-rw-r--r-- | kernel/lockdep.c | 528 | ||||
-rw-r--r-- | kernel/lockdep_internals.h | 45 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 22 | ||||
-rw-r--r-- | kernel/lockdep_states.h | 9 | ||||
-rw-r--r-- | kernel/mutex-debug.c | 9 | ||||
-rw-r--r-- | kernel/mutex-debug.h | 18 | ||||
-rw-r--r-- | kernel/mutex.c | 121 | ||||
-rw-r--r-- | kernel/mutex.h | 22 | ||||
-rw-r--r-- | kernel/power/disk.c | 138 | ||||
-rw-r--r-- | kernel/power/main.c | 55 | ||||
-rw-r--r-- | kernel/sched.c | 71 | ||||
-rw-r--r-- | kernel/sched_features.h | 1 | ||||
-rw-r--r-- | kernel/timer.c | 68 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 75 |
19 files changed, 905 insertions, 409 deletions
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index 4dd5b1edac9..3394f8f5296 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o +obj-$(CONFIG_PM_SLEEP) += pm.o diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ee1aa9f8e8b..01ce20eab38 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -12,6 +12,8 @@ extern void compat_irq_chip_set_default_handler(struct irq_desc *desc); extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, unsigned long flags); +extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp); +extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume); extern struct lock_class_key irq_desc_lock_class; extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 6458e99984c..1516ab77355 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -162,6 +162,20 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc) } #endif +void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend) +{ + if (suspend) { + if (!desc->action || (desc->action->flags & IRQF_TIMER)) + return; + desc->status |= IRQ_SUSPENDED; + } + + if (!desc->depth++) { + desc->status |= IRQ_DISABLED; + desc->chip->disable(irq); + } +} + /** * disable_irq_nosync - disable an irq without waiting * @irq: Interrupt to disable @@ -182,10 +196,7 @@ void disable_irq_nosync(unsigned int irq) return; spin_lock_irqsave(&desc->lock, flags); - if (!desc->depth++) { - desc->status |= IRQ_DISABLED; - desc->chip->disable(irq); - } + __disable_irq(desc, irq, false); spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(disable_irq_nosync); @@ -215,15 +226,21 @@ void disable_irq(unsigned int irq) } EXPORT_SYMBOL(disable_irq); -static void __enable_irq(struct irq_desc *desc, unsigned int irq) +void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume) { + if (resume) + desc->status &= ~IRQ_SUSPENDED; + switch (desc->depth) { case 0: + err_out: WARN(1, KERN_WARNING "Unbalanced enable for IRQ %d\n", irq); break; case 1: { unsigned int status = desc->status & ~IRQ_DISABLED; + if (desc->status & IRQ_SUSPENDED) + goto err_out; /* Prevent probing on this irq: */ desc->status = status | IRQ_NOPROBE; check_irq_resend(desc, irq); @@ -253,7 +270,7 @@ void enable_irq(unsigned int irq) return; spin_lock_irqsave(&desc->lock, flags); - __enable_irq(desc, irq); + __enable_irq(desc, irq, false); spin_unlock_irqrestore(&desc->lock, flags); } EXPORT_SYMBOL(enable_irq); @@ -511,7 +528,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ if (shared && (desc->status & IRQ_SPURIOUS_DISABLED)) { desc->status &= ~IRQ_SPURIOUS_DISABLED; - __enable_irq(desc, irq); + __enable_irq(desc, irq, false); } spin_unlock_irqrestore(&desc->lock, flags); diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c new file mode 100644 index 00000000000..638d8bedec1 --- /dev/null +++ b/kernel/irq/pm.c @@ -0,0 +1,79 @@ +/* + * linux/kernel/irq/pm.c + * + * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. + * + * This file contains power management functions related to interrupts. + */ + +#include <linux/irq.h> +#include <linux/module.h> +#include <linux/interrupt.h> + +#include "internals.h" + +/** + * suspend_device_irqs - disable all currently enabled interrupt lines + * + * During system-wide suspend or hibernation device interrupts need to be + * disabled at the chip level and this function is provided for this purpose. + * It disables all interrupt lines that are enabled at the moment and sets the + * IRQ_SUSPENDED flag for them. + */ +void suspend_device_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + __disable_irq(desc, irq, true); + spin_unlock_irqrestore(&desc->lock, flags); + } + + for_each_irq_desc(irq, desc) + if (desc->status & IRQ_SUSPENDED) + synchronize_irq(irq); +} +EXPORT_SYMBOL_GPL(suspend_device_irqs); + +/** + * resume_device_irqs - enable interrupt lines disabled by suspend_device_irqs() + * + * Enable all interrupt lines previously disabled by suspend_device_irqs() that + * have the IRQ_SUSPENDED flag set. + */ +void resume_device_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) { + unsigned long flags; + + if (!(desc->status & IRQ_SUSPENDED)) + continue; + + spin_lock_irqsave(&desc->lock, flags); + __enable_irq(desc, irq, true); + spin_unlock_irqrestore(&desc->lock, flags); + } +} +EXPORT_SYMBOL_GPL(resume_device_irqs); + +/** + * check_wakeup_irqs - check if any wake-up interrupts are pending + */ +int check_wakeup_irqs(void) +{ + struct irq_desc *desc; + int irq; + + for_each_irq_desc(irq, desc) + if ((desc->status & IRQ_WAKEUP) && (desc->status & IRQ_PENDING)) + return -EBUSY; + + return 0; +} diff --git a/kernel/kexec.c b/kernel/kexec.c index c7fd6692939..93eed85fe01 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1450,11 +1450,7 @@ int kernel_kexec(void) error = device_suspend(PMSG_FREEZE); if (error) goto Resume_console; - error = disable_nonboot_cpus(); - if (error) - goto Resume_devices; device_pm_lock(); - local_irq_disable(); /* At this point, device_suspend() has been called, * but *not* device_power_down(). We *must* * device_power_down() now. Otherwise, drivers for @@ -1464,12 +1460,15 @@ int kernel_kexec(void) */ error = device_power_down(PMSG_FREEZE); if (error) - goto Enable_irqs; - + goto Resume_devices; + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + local_irq_disable(); /* Suspend system devices */ error = sysdev_suspend(PMSG_FREEZE); if (error) - goto Power_up_devices; + goto Enable_irqs; } else #endif { @@ -1483,13 +1482,13 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { sysdev_resume(); - Power_up_devices: - device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); - device_pm_unlock(); + Enable_cpus: enable_nonboot_cpus(); + device_power_up(PMSG_RESTORE); Resume_devices: + device_pm_unlock(); device_resume(PMSG_RESTORE); Resume_console: resume_console(); diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 06b0c3568f0..3673a3f44d9 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -41,6 +41,7 @@ #include <linux/utsname.h> #include <linux/hash.h> #include <linux/ftrace.h> +#include <linux/stringify.h> #include <asm/sections.h> @@ -310,12 +311,14 @@ EXPORT_SYMBOL(lockdep_on); #if VERBOSE # define HARDIRQ_VERBOSE 1 # define SOFTIRQ_VERBOSE 1 +# define RECLAIM_VERBOSE 1 #else # define HARDIRQ_VERBOSE 0 # define SOFTIRQ_VERBOSE 0 +# define RECLAIM_VERBOSE 0 #endif -#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE +#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE /* * Quick filtering for interesting events: */ @@ -443,17 +446,18 @@ atomic_t nr_find_usage_backwards_recursions; * Locking printouts: */ +#define __USAGE(__STATE) \ + [LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W", \ + [LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W", \ + [LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\ + [LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R", + static const char *usage_str[] = { - [LOCK_USED] = "initial-use ", - [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W", - [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W", - [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W", - [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W", - [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R", - [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R", - [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R", - [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R", +#define LOCKDEP_STATE(__STATE) __USAGE(__STATE) +#include "lockdep_states.h" +#undef LOCKDEP_STATE + [LOCK_USED] = "INITIAL USE", }; const char * __get_key_name(struct lockdep_subclass_key *key, char *str) @@ -461,46 +465,45 @@ const char * __get_key_name(struct lockdep_subclass_key *key, char *str) return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str); } -void -get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4) +static inline unsigned long lock_flag(enum lock_usage_bit bit) { - *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.'; - - if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) - *c1 = '+'; - else - if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) - *c1 = '-'; + return 1UL << bit; +} - if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) - *c2 = '+'; - else - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) - *c2 = '-'; +static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) +{ + char c = '.'; - if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) - *c3 = '-'; - if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) { - *c3 = '+'; - if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) - *c3 = '?'; + if (class->usage_mask & lock_flag(bit + 2)) + c = '+'; + if (class->usage_mask & lock_flag(bit)) { + c = '-'; + if (class->usage_mask & lock_flag(bit + 2)) + c = '?'; } - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) - *c4 = '-'; - if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) { - *c4 = '+'; - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) - *c4 = '?'; - } + return c; +} + +void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS]) +{ + int i = 0; + +#define LOCKDEP_STATE(__STATE) \ + usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE); \ + usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ); +#include "lockdep_states.h" +#undef LOCKDEP_STATE + + usage[i] = '\0'; } static void print_lock_name(struct lock_class *class) { - char str[KSYM_NAME_LEN], c1, c2, c3, c4; + char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS]; const char *name; - get_usage_chars(class, &c1, &c2, &c3, &c4); + get_usage_chars(class, usage); name = class->name; if (!name) { @@ -513,7 +516,7 @@ static void print_lock_name(struct lock_class *class) if (class->subclass) printk("/%d", class->subclass); } - printk("){%c%c%c%c}", c1, c2, c3, c4); + printk("){%s}", usage); } static void print_lockdep_cache(struct lockdep_map *lock) @@ -1263,9 +1266,49 @@ check_usage(struct task_struct *curr, struct held_lock *prev, bit_backwards, bit_forwards, irqclass); } -static int -check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, - struct held_lock *next) +static const char *state_names[] = { +#define LOCKDEP_STATE(__STATE) \ + __stringify(__STATE), +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +static const char *state_rnames[] = { +#define LOCKDEP_STATE(__STATE) \ + __stringify(__STATE)"-READ", +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +static inline const char *state_name(enum lock_usage_bit bit) +{ + return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2]; +} + +static int exclusive_bit(int new_bit) +{ + /* + * USED_IN + * USED_IN_READ + * ENABLED + * ENABLED_READ + * + * bit 0 - write/read + * bit 1 - used_in/enabled + * bit 2+ state + */ + + int state = new_bit & ~3; + int dir = new_bit & 2; + + /* + * keep state, bit flip the direction and strip read. + */ + return state | (dir ^ 2); +} + +static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next, enum lock_usage_bit bit) { /* * Prove that the new dependency does not connect a hardirq-safe @@ -1273,38 +1316,34 @@ check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, * the backwards-subgraph starting at <prev>, and the * forwards-subgraph starting at <next>: */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ, - LOCK_ENABLED_HARDIRQS, "hard")) + if (!check_usage(curr, prev, next, bit, + exclusive_bit(bit), state_name(bit))) return 0; + bit++; /* _READ */ + /* * Prove that the new dependency does not connect a hardirq-safe-read * lock with a hardirq-unsafe lock - to achieve this we search * the backwards-subgraph starting at <prev>, and the * forwards-subgraph starting at <next>: */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ, - LOCK_ENABLED_HARDIRQS, "hard-read")) + if (!check_usage(curr, prev, next, bit, + exclusive_bit(bit), state_name(bit))) return 0; - /* - * Prove that the new dependency does not connect a softirq-safe - * lock with a softirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; - /* - * Prove that the new dependency does not connect a softirq-safe-read - * lock with a softirq-unsafe lock - to achieve this we search - * the backwards-subgraph starting at <prev>, and the - * forwards-subgraph starting at <next>: - */ - if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ, - LOCK_ENABLED_SOFTIRQS, "soft")) + return 1; +} + +static int +check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, + struct held_lock *next) +{ +#define LOCKDEP_STATE(__STATE) \ + if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \ return 0; +#include "lockdep_states.h" +#undef LOCKDEP_STATE return 1; } @@ -1933,7 +1972,7 @@ void print_irqtrace_events(struct task_struct *curr) print_ip_sym(curr->softirq_disable_ip); } -static int hardirq_verbose(struct lock_class *class) +static int HARDIRQ_verbose(struct lock_class *class) { #if HARDIRQ_VERBOSE return class_filter(class); @@ -1941,7 +1980,7 @@ static int hardirq_verbose(struct lock_class *class) return 0; } -static int softirq_verbose(struct lock_class *class) +static int SOFTIRQ_verbose(struct lock_class *class) { #if SOFTIRQ_VERBOSE return class_filter(class); @@ -1949,185 +1988,94 @@ static int softirq_verbose(struct lock_class *class) return 0; } +static int RECLAIM_FS_verbose(struct lock_class *class) +{ +#if RECLAIM_VERBOSE + return class_filter(class); +#endif + return 0; +} + #define STRICT_READ_CHECKS 1 -static int mark_lock_irq(struct task_struct *curr, struct held_lock *this, - enum lock_usage_bit new_bit) +static int (*state_verbose_f[])(struct lock_class *class) = { +#define LOCKDEP_STATE(__STATE) \ + __STATE##_verbose, +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + +static inline int state_verbose(enum lock_usage_bit bit, + struct lock_class *class) { - int ret = 1; + return state_verbose_f[bit >> 2](class); +} - switch(new_bit) { - case LOCK_USED_IN_HARDIRQ: - if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) - return 0; - if (!valid_state(curr, this, new_bit, - LOCK_ENABLED_HARDIRQS_READ)) - return 0; - /* - * just marked it hardirq-safe, check that this lock - * took no hardirq-unsafe lock in the past: - */ - if (!check_usage_forwards(curr, this, - LOCK_ENABLED_HARDIRQS, "hard")) - return 0; -#if STRICT_READ_CHECKS - /* - * just marked it hardirq-safe, check that this lock - * took no hardirq-unsafe-read lock in the past: - */ - if (!check_usage_forwards(curr, this, - LOCK_ENABLED_HARDIRQS_READ, "hard-read")) - return 0; -#endif - if (hardirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_USED_IN_SOFTIRQ: - if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) - return 0; - if (!valid_state(curr, this, new_bit, - LOCK_ENABLED_SOFTIRQS_READ)) - return 0; - /* - * just marked it softirq-safe, check that this lock - * took no softirq-unsafe lock in the past: - */ - if (!check_usage_forwards(curr, this, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; -#if STRICT_READ_CHECKS - /* - * just marked it softirq-safe, check that this lock - * took no softirq-unsafe-read lock in the past: - */ - if (!check_usage_forwards(curr, this, - LOCK_ENABLED_SOFTIRQS_READ, "soft-read")) - return 0; -#endif - if (softirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_USED_IN_HARDIRQ_READ: - if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS)) - return 0; - /* - * just marked it hardirq-read-safe, check that this lock - * took no hardirq-unsafe lock in the past: - */ - if (!check_usage_forwards(curr, this, - LOCK_ENABLED_HARDIRQS, "hard")) - return 0; - if (hardirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_USED_IN_SOFTIRQ_READ: - if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS)) - return 0; - /* - * just marked it softirq-read-safe, check that this lock - * took no softirq-unsafe lock in the past: - */ - if (!check_usage_forwards(curr, this, - LOCK_ENABLED_SOFTIRQS, "soft")) - return 0; - if (softirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_ENABLED_HARDIRQS: - if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) - return 0; - if (!valid_state(curr, this, new_bit, - LOCK_USED_IN_HARDIRQ_READ)) - return 0; - /* - * just marked it hardirq-unsafe, check that no hardirq-safe - * lock in the system ever took it in the past: - */ - if (!check_usage_backwards(curr, this, - LOCK_USED_IN_HARDIRQ, "hard")) - return 0; -#if STRICT_READ_CHECKS - /* - * just marked it hardirq-unsafe, check that no - * hardirq-safe-read lock in the system ever took - * it in the past: - */ - if (!check_usage_backwards(curr, this, - LOCK_USED_IN_HARDIRQ_READ, "hard-read")) - return 0; -#endif - if (hardirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_ENABLED_SOFTIRQS: - if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) - return 0; - if (!valid_state(curr, this, new_bit, - LOCK_USED_IN_SOFTIRQ_READ)) - return 0; - /* - * just marked it softirq-unsafe, check that no softirq-safe - * lock in the system ever took it in the past: - */ - if (!check_usage_backwards(curr, this, - LOCK_USED_IN_SOFTIRQ, "soft")) - return 0; -#if STRICT_READ_CHECKS - /* - * just marked it softirq-unsafe, check that no - * softirq-safe-read lock in the system ever took - * it in the past: - */ - if (!check_usage_backwards(curr, this, - LOCK_USED_IN_SOFTIRQ_READ, "soft-read")) - return 0; -#endif - if (softirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_ENABLED_HARDIRQS_READ: - if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ)) - return 0; -#if STRICT_READ_CHECKS - /* - * just marked it hardirq-read-unsafe, check that no - * hardirq-safe lock in the system ever took it in the past: - */ - if (!check_usage_backwards(curr, this, - LOCK_USED_IN_HARDIRQ, "hard")) - return 0; -#endif - if (hardirq_verbose(hlock_class(this))) - ret = 2; - break; - case LOCK_ENABLED_SOFTIRQS_READ: - if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ)) +typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, + enum lock_usage_bit bit, const char *name); + +static int +mark_lock_irq(struct task_struct *curr, struct held_lock *this, int new_bit) +{ + int excl_bit = exclusive_bit(new_bit); + int read = new_bit & 1; + int dir = new_bit & 2; + + /* + * mark USED_IN has to look forwards -- to ensure no dependency + * has ENABLED state, which would allow recursion deadlocks. + * + * mark ENABLED has to look backwards -- to ensure no dependee + * has USED_IN state, which, again, would allow recursion deadlocks. + */ + check_usage_f usage = dir ? + check_usage_backwards : check_usage_forwards; + + /* + * Validate that this particular lock does not have conflicting + * usage states. + */ + if (!valid_state(curr, this, new_bit, excl_bit)) + return 0; + + /* + * Validate that the lock dependencies don't have conflicting usage + * states. + */ + if ((!read || !dir || STRICT_READ_CHECKS) && + !usage(curr, this, excl_bit, state_name(new_bit))) + return 0; + + /* + * Check for read in write conflicts + */ + if (!read) { + if (!valid_state(curr, this, new_bit, excl_bit + 1)) return 0; -#if STRICT_READ_CHECKS - /* - * just marked it softirq-read-unsafe, check that no - * softirq-safe lock in the system ever took it in the past: - */ - if (!check_usage_backwards(curr, this, - LOCK_USED_IN_SOFTIRQ, "soft")) + + if (STRICT_READ_CHECKS && + !usage(curr, this, excl_bit + 1, + state_name(new_bit + 1))) return 0; -#endif - if (softirq_verbose(hlock_class(this))) - ret = 2; - break; - default: - WARN_ON(1); - break; } - return ret; + if (state_verbose(new_bit, hlock_class(this))) + return 2; + + return 1; } +enum mark_type { +#define LOCKDEP_STATE(__STATE) __STATE, +#include "lockdep_states.h" +#undef LOCKDEP_STATE +}; + /* * Mark all held locks with a usage bit: */ static int -mark_held_locks(struct task_struct *curr, int hardirq) +mark_held_locks(struct task_struct *curr, enum mark_type mark) { enum lock_usage_bit usage_bit; struct held_lock *hlock; @@ -2136,17 +2084,12 @@ mark_held_locks(struct task_struct *curr, int hardirq) for (i = 0; i < curr->lockdep_depth; i++) { hlock = curr->held_locks + i; - if (hardirq) { - if (hlock->read) - usage_bit = LOCK_ENABLED_HARDIRQS_READ; - else - usage_bit = LOCK_ENABLED_HARDIRQS; - } else { - if (hlock->read) - usage_bit = LOCK_ENABLED_SOFTIRQS_READ; - else - usage_bit = LOCK_ENABLED_SOFTIRQS; - } + usage_bit = 2 + (mark << 2); /* ENABLED */ + if (hlock->read) + usage_bit += 1; /* READ */ + + BUG_ON(usage_bit >= LOCK_USAGE_STATES); + if (!mark_lock(curr, hlock, usage_bit)) return 0; } @@ -2200,7 +2143,7 @@ void trace_hardirqs_on_caller(unsigned long ip) * We are going to turn hardirqs on, so set the * usage bit for all held locks: */ - if (!mark_held_locks(curr, 1)) + if (!mark_held_locks(curr, HARDIRQ)) return; /* * If we have softirqs enabled, then set the usage @@ -2208,7 +2151,7 @@ void trace_hardirqs_on_caller(unsigned long ip) * this bit from being set before) */ if (curr->softirqs_enabled) - if (!mark_held_locks(curr, 0)) + if (!mark_held_locks(curr, SOFTIRQ)) return; curr->hardirq_enable_ip = ip; @@ -2288,7 +2231,7 @@ void trace_softirqs_on(unsigned long ip) * enabled too: */ if (curr->hardirqs_enabled) - mark_held_locks(curr, 0); + mark_held_locks(curr, SOFTIRQ); } /* @@ -2317,6 +2260,48 @@ void trace_softirqs_off(unsigned long ip) debug_atomic_inc(&redundant_softirqs_off); } +static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) +{ + struct task_struct *curr = current; + + if (unlikely(!debug_locks)) + return; + + /* no reclaim without waiting on it */ + if (!(gfp_mask & __GFP_WAIT)) + return; + + /* this guy won't enter reclaim */ + if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) + return; + + /* We're only interested __GFP_FS allocations for now */ + if (!(gfp_mask & __GFP_FS)) + return; + + if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))) + return; + + mark_held_locks(curr, RECLAIM_FS); +} + +static void check_flags(unsigned long flags); + +void lockdep_trace_alloc(gfp_t gfp_mask) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + __lockdep_trace_alloc(gfp_mask, flags); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} + static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) { /* @@ -2345,19 +2330,35 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) if (!hlock->hardirqs_off) { if (hlock->read) { if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQS_READ)) + LOCK_ENABLED_HARDIRQ_READ)) return 0; if (curr->softirqs_enabled) if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQS_READ)) + LOCK_ENABLED_SOFTIRQ_READ)) return 0; } else { if (!mark_lock(curr, hlock, - LOCK_ENABLED_HARDIRQS)) + LOCK_ENABLED_HARDIRQ)) return 0; if (curr->softirqs_enabled) if (!mark_lock(curr, hlock, - LOCK_ENABLED_SOFTIRQS)) + LOCK_ENABLED_SOFTIRQ)) + return 0; + } + } + + /* + * We reuse the irq context infrastructure more broadly as a general + * context checking code. This tests GFP_FS recursion (a lock taken + * during reclaim for a GFP_FS allocation is held over a GFP_FS + * allocation). + */ + if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) { + if (hlock->read) { + if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ)) + return 0; + } else { + if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS)) return 0; } } @@ -2412,6 +2413,10 @@ static inline int separate_irq_context(struct task_struct *curr, return 0; } +void lockdep_trace_alloc(gfp_t gfp_mask) +{ +} + #endif /* @@ -2445,14 +2450,13 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, return 0; switch (new_bit) { - case LOCK_USED_IN_HARDIRQ: - case LOCK_USED_IN_SOFTIRQ: - case LOCK_USED_IN_HARDIRQ_READ: - case LOCK_USED_IN_SOFTIRQ_READ: - case LOCK_ENABLED_HARDIRQS: - case LOCK_ENABLED_SOFTIRQS: - case LOCK_ENABLED_HARDIRQS_READ: - case LOCK_ENABLED_SOFTIRQS_READ: +#define LOCKDEP_STATE(__STATE) \ + case LOCK_USED_IN_##__STATE: \ + case LOCK_USED_IN_##__STATE##_READ: \ + case LOCK_ENABLED_##__STATE: \ + case LOCK_ENABLED_##__STATE##_READ: +#include "lockdep_states.h" +#undef LOCKDEP_STATE ret = mark_lock_irq(curr, this, new_bit); if (!ret) return 0; @@ -2966,6 +2970,16 @@ void lock_release(struct lockdep_map *lock, int nested, } EXPORT_SYMBOL_GPL(lock_release); +void lockdep_set_current_reclaim_state(gfp_t gfp_mask) +{ + current->lockdep_reclaim_gfp = gfp_mask; +} + +void lockdep_clear_current_reclaim_state(void) +{ + current->lockdep_reclaim_gfp = 0; +} + #ifdef CONFIG_LOCK_STAT static int print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index 56b196932c0..a2cc7e9a6e8 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h @@ -7,6 +7,45 @@ */ /* + * Lock-class usage-state bits: + */ +enum lock_usage_bit { +#define LOCKDEP_STATE(__STATE) \ + LOCK_USED_IN_##__STATE, \ + LOCK_USED_IN_##__STATE##_READ, \ + LOCK_ENABLED_##__STATE, \ + LOCK_ENABLED_##__STATE##_READ, +#include "lockdep_states.h" +#undef LOCKDEP_STATE + LOCK_USED, + LOCK_USAGE_STATES +}; + +/* + * Usage-state bitmasks: + */ +#define __LOCKF(__STATE) LOCKF_##__STATE = (1 << LOCK_##__STATE), + +enum { +#define LOCKDEP_STATE(__STATE) \ + __LOCKF(USED_IN_##__STATE) \ + __LOCKF(USED_IN_##__STATE##_READ) \ + __LOCKF(ENABLED_##__STATE) \ + __LOCKF(ENABLED_##__STATE##_READ) +#include "lockdep_states.h" +#undef LOCKDEP_STATE + __LOCKF(USED) +}; + +#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) +#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) + +#define LOCKF_ENABLED_IRQ_READ \ + (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) +#define LOCKF_USED_IN_IRQ_READ \ + (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) + +/* * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies * we track. * @@ -31,8 +70,10 @@ extern struct list_head all_lock_classes; extern struct lock_chain lock_chains[]; -extern void -get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4); +#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2) + +extern void get_usage_chars(struct lock_class *class, + char usage[LOCK_USAGE_CHARS]); extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str); diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index 13716b81389..d7135aa2d2c 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c @@ -84,7 +84,7 @@ static int l_show(struct seq_file *m, void *v) { struct lock_class *class = v; struct lock_list *entry; - char c1, c2, c3, c4; + char usage[LOCK_USAGE_CHARS]; if (v == SEQ_START_TOKEN) { seq_printf(m, "all lock classes:\n"); @@ -100,8 +100,8 @@ static int l_show(struct seq_file *m, void *v) seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class)); #endif - get_usage_chars(class, &c1, &c2, &c3, &c4); - seq_printf(m, " %c%c%c%c", c1, c2, c3, c4); + get_usage_chars(class, usage); + seq_printf(m, " %s", usage); seq_printf(m, ": "); print_name(m, class); @@ -300,27 +300,27 @@ static int lockdep_stats_show(struct seq_file *m, void *v) nr_uncategorized++; if (class->usage_mask & LOCKF_USED_IN_IRQ) nr_irq_safe++; - if (class->usage_mask & LOCKF_ENABLED_IRQS) + if (class->usage_mask & LOCKF_ENABLED_IRQ) nr_irq_unsafe++; if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ) nr_softirq_safe++; - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS) + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ) nr_softirq_unsafe++; if (class->usage_mask & LOCKF_USED_IN_HARDIRQ) nr_hardirq_safe++; - if (class->usage_mask & LOCKF_ENABLED_HARDIRQS) + if (class->usage_mask & LOCKF_ENABLED_HARDIRQ) nr_hardirq_unsafe++; if (class->usage_mask & LOCKF_USED_IN_IRQ_READ) nr_irq_read_safe++; - if (class->usage_mask & LOCKF_ENABLED_IRQS_READ) + if (class->usage_mask & LOCKF_ENABLED_IRQ_READ) nr_irq_read_unsafe++; if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) nr_softirq_read_safe++; - if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ) + if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ) nr_softirq_read_unsafe++; if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) nr_hardirq_read_safe++; - if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ) + if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ) nr_hardirq_read_unsafe++; #ifdef CONFIG_PROVE_LOCKING @@ -601,6 +601,10 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data) static void seq_header(struct seq_file *m) { seq_printf(m, "lock_stat version 0.3\n"); + + if (unlikely(!debug_locks)) + seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n"); + seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " "%14s %14s\n", diff --git a/kernel/lockdep_states.h b/kernel/lockdep_states.h new file mode 100644 index 00000000000..995b0cc2b84 --- /dev/null +++ b/kernel/lockdep_states.h @@ -0,0 +1,9 @@ +/* + * Lockdep states, + * + * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever + * you add one, or come up with a nice dynamic solution. + */ +LOCKDEP_STATE(HARDIRQ) +LOCKDEP_STATE(SOFTIRQ) +LOCKDEP_STATE(RECLAIM_FS) diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index 1d94160eb53..50d022e5a56 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c @@ -26,11 +26,6 @@ /* * Must be called with lock->wait_lock held. */ -void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner) -{ - lock->owner = new_owner; -} - void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) { memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter)); @@ -59,7 +54,6 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, /* Mark the current thread as blocked on the lock: */ ti->task->blocked_on = waiter; - waiter->lock = lock; } void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, @@ -82,7 +76,7 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(lock->magic != lock); DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); + mutex_clear_owner(lock); } void debug_mutex_init(struct mutex *lock, const char *name, @@ -95,7 +89,6 @@ void debug_mutex_init(struct mutex *lock, const char *name, debug_check_no_locks_freed((void *)lock, sizeof(*lock)); lockdep_init_map(&lock->dep_map, name, key, 0); #endif - lock->owner = NULL; lock->magic = lock; } diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index babfbdfc534..6b2d735846a 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h @@ -13,14 +13,6 @@ /* * This must be called with lock->wait_lock held. */ -extern void -debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner); - -static inline void debug_mutex_clear_owner(struct mutex *lock) -{ - lock->owner = NULL; -} - extern void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter); extern void debug_mutex_wake_waiter(struct mutex *lock, @@ -35,6 +27,16 @@ extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); +static inline void mutex_set_owner(struct mutex *lock) +{ + lock->owner = current_thread_info(); +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} + #define spin_lock_mutex(lock, flags) \ do { \ struct mutex *l = container_of(lock, struct mutex, wait_lock); \ diff --git a/kernel/mutex.c b/kernel/mutex.c index 4f45d4b658e..5d79781394a 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -10,6 +10,11 @@ * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and * David Howells for suggestions and improvements. * + * - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline + * from the -rt tree, where it was originally implemented for rtmutexes + * by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale + * and Sven Dietrich. + * * Also see Documentation/mutex-design.txt. */ #include <linux/mutex.h> @@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) atomic_set(&lock->count, 1); spin_lock_init(&lock->wait_lock); INIT_LIST_HEAD(&lock->wait_list); + mutex_clear_owner(lock); debug_mutex_init(lock, name, key); } @@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mutex *lock) * 'unlocked' into 'locked' state. */ __mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath); + mutex_set_owner(lock); } EXPORT_SYMBOL(mutex_lock); @@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *lock) * The unlocking fastpath is the 0->1 transition from 'locked' * into 'unlocked' state: */ +#ifndef CONFIG_DEBUG_MUTEXES + /* + * When debugging is enabled we must not clear the owner before time, + * the slow path will always be taken, and that clears the owner field + * after verifying that it was indeed current. + */ + mutex_clear_owner(lock); +#endif __mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath); } @@ -129,21 +144,75 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, { struct task_struct *task = current; struct mutex_waiter waiter; - unsigned int old_val; unsigned long flags; + preempt_disable(); + mutex_acquire(&lock->dep_map, subclass, 0, ip); +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) + /* + * Optimistic spinning. + * + * We try to spin for acquisition when we find that there are no + * pending waiters and the lock owner is currently running on a + * (different) CPU. + * + * The rationale is that if the lock owner is running, it is likely to + * release the lock soon. + * + * Since this needs the lock owner, and this mutex implementation + * doesn't track the owner atomically in the lock field, we need to + * track it non-atomically. + * + * We can't do this for DEBUG_MUTEXES because that relies on wait_lock + * to serialize everything. + */ + + for (;;) { + struct thread_info *owner; + + /* + * If there's an owner, wait for it to either + * release the lock or go to sleep. + */ + owner = ACCESS_ONCE(lock->owner); + if (owner && !mutex_spin_on_owner(lock, owner)) + break; + + if (atomic_cmpxchg(&lock->count, 1, 0) == 1) { + lock_acquired(&lock->dep_map, ip); + mutex_set_owner(lock); + preempt_enable(); + return 0; + } + + /* + * When there's no owner, we might have preempted between the + * owner acquiring the lock and setting the owner field. If + * we're an RT task that will live-lock because we won't let + * the owner complete. + */ + if (!owner && (need_resched() || rt_task(task))) + break; + + /* + * The cpu_relax() call is a compiler barrier which forces + * everything in this loop to be re-loaded. We don't need + * memory barriers as we'll eventually observe the right + * values at the cost of a few extra spins. + */ + cpu_relax(); + } +#endif spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); - mutex_acquire(&lock->dep_map, subclass, 0, ip); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); waiter.task = task; - old_val = atomic_xchg(&lock->count, -1); - if (old_val == 1) + if (atomic_xchg(&lock->count, -1) == 1) goto done; lock_contended(&lock->dep_map, ip); @@ -158,8 +227,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, * that when we release the lock, we properly wake up the * other waiters: */ - old_val = atomic_xchg(&lock->count, -1); - if (old_val == 1) + if (atomic_xchg(&lock->count, -1) == 1) break; /* @@ -173,21 +241,22 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); + preempt_enable(); return -EINTR; } __set_task_state(task, state); /* didnt get the lock, go to sleep: */ spin_unlock_mutex(&lock->wait_lock, flags); - schedule(); + __schedule(); spin_lock_mutex(&lock->wait_lock, flags); } done: lock_acquired(&lock->dep_map, ip); /* got the lock - rejoice! */ - mutex_remove_waiter(lock, &waiter, task_thread_info(task)); - debug_mutex_set_owner(lock, task_thread_info(task)); + mutex_remove_waiter(lock, &waiter, current_thread_info()); + mutex_set_owner(lock); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) @@ -196,6 +265,7 @@ done: spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); + preempt_enable(); return 0; } @@ -222,7 +292,8 @@ int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, + subclass, _RET_IP_); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -260,8 +331,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) wake_up_process(waiter->task); } - debug_mutex_clear_owner(lock); - spin_unlock_mutex(&lock->wait_lock, flags); } @@ -298,18 +367,30 @@ __mutex_lock_interruptible_slowpath(atomic_t *lock_count); */ int __sched mutex_lock_interruptible(struct mutex *lock) { + int ret; + might_sleep(); - return __mutex_fastpath_lock_retval + ret = __mutex_fastpath_lock_retval (&lock->count, __mutex_lock_interruptible_slowpath); + if (!ret) + mutex_set_owner(lock); + + return ret; } EXPORT_SYMBOL(mutex_lock_interruptible); int __sched mutex_lock_killable(struct mutex *lock) { + int ret; + might_sleep(); - return __mutex_fastpath_lock_retval + ret = __mutex_fastpath_lock_retval (&lock->count, __mutex_lock_killable_slowpath); + if (!ret) + mutex_set_owner(lock); + + return ret; } EXPORT_SYMBOL(mutex_lock_killable); @@ -352,9 +433,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) prev = atomic_xchg(&lock->count, -1); if (likely(prev == 1)) { - debug_mutex_set_owner(lock, current_thread_info()); + mutex_set_owner(lock); mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); } + /* Set it back to 0 if there are no waiters: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); @@ -380,8 +462,13 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) */ int __sched mutex_trylock(struct mutex *lock) { - return __mutex_fastpath_trylock(&lock->count, - __mutex_trylock_slowpath); + int ret; + + ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath); + if (ret) + mutex_set_owner(lock); + + return ret; } EXPORT_SYMBOL(mutex_trylock); diff --git a/kernel/mutex.h b/kernel/mutex.h index a075dafbb29..67578ca48f9 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h @@ -16,8 +16,26 @@ #define mutex_remove_waiter(lock, waiter, ti) \ __list_del((waiter)->list.prev, (waiter)->list.next) -#define debug_mutex_set_owner(lock, new_owner) do { } while (0) -#define debug_mutex_clear_owner(lock) do { } while (0) +#ifdef CONFIG_SMP +static inline void mutex_set_owner(struct mutex *lock) +{ + lock->owner = current_thread_info(); +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ + lock->owner = NULL; +} +#else +static inline void mutex_set_owner(struct mutex *lock) +{ +} + +static inline void mutex_clear_owner(struct mutex *lock) +{ +} +#endif + #define debug_mutex_wake_waiter(lock, waiter) do { } while (0) #define debug_mutex_free_waiter(waiter) do { } while (0) #define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0) diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 4a4a206b197..e886d1332a1 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -214,7 +214,7 @@ static int create_image(int platform_mode) return error; device_pm_lock(); - local_irq_disable(); + /* At this point, device_suspend() has been called, but *not* * device_power_down(). We *must* call device_power_down() now. * Otherwise, drivers for some devices (e.g. interrupt controllers) @@ -225,13 +225,25 @@ static int create_image(int platform_mode) if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Enable_irqs; + goto Unlock; } + + error = platform_pre_snapshot(platform_mode); + if (error || hibernation_test(TEST_PLATFORM)) + goto Platform_finish; + + error = disable_nonboot_cpus(); + if (error || hibernation_test(TEST_CPUS) + || hibernation_testmode(HIBERNATION_TEST)) + goto Enable_cpus; + + local_irq_disable(); + sysdev_suspend(PMSG_FREEZE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting hibernation\n"); - goto Power_up_devices; + goto Enable_irqs; } if (hibernation_test(TEST_CORE)) @@ -247,17 +259,28 @@ static int create_image(int platform_mode) restore_processor_state(); if (!in_suspend) platform_leave(platform_mode); + Power_up: sysdev_resume(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ - Power_up_devices: - device_power_up(in_suspend ? - (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + Enable_irqs: local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Platform_finish: + platform_finish(platform_mode); + + device_power_up(in_suspend ? + (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); + + Unlock: device_pm_unlock(); + return error; } @@ -291,25 +314,9 @@ int hibernation_snapshot(int platform_mode) if (hibernation_test(TEST_DEVICES)) goto Recover_platform; - error = platform_pre_snapshot(platform_mode); - if (error || hibernation_test(TEST_PLATFORM)) - goto Finish; - - error = disable_nonboot_cpus(); - if (!error) { - if (hibernation_test(TEST_CPUS)) - goto Enable_cpus; - - if (hibernation_testmode(HIBERNATION_TEST)) - goto Enable_cpus; + error = create_image(platform_mode); + /* Control returns here after successful restore */ - error = create_image(platform_mode); - /* Control returns here after successful restore */ - } - Enable_cpus: - enable_nonboot_cpus(); - Finish: - platform_finish(platform_mode); Resume_devices: device_resume(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); @@ -331,19 +338,33 @@ int hibernation_snapshot(int platform_mode) * kernel. */ -static int resume_target_kernel(void) +static int resume_target_kernel(bool platform_mode) { int error; device_pm_lock(); - local_irq_disable(); + error = device_power_down(PMSG_QUIESCE); if (error) { printk(KERN_ERR "PM: Some devices failed to power down, " "aborting resume\n"); - goto Enable_irqs; + goto Unlock; } - sysdev_suspend(PMSG_QUIESCE); + + error = platform_pre_restore(platform_mode); + if (error) + goto Cleanup; + + error = disable_nonboot_cpus(); + if (error) + goto Enable_cpus; + + local_irq_disable(); + + error = sysdev_suspend(PMSG_QUIESCE); + if (error) + goto Enable_irqs; + /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); error = restore_highmem(); @@ -366,11 +387,23 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); + sysdev_resume(); - device_power_up(PMSG_RECOVER); + Enable_irqs: local_irq_enable(); + + Enable_cpus: + enable_nonboot_cpus(); + + Cleanup: + platform_restore_cleanup(platform_mode); + + device_power_up(PMSG_RECOVER); + + Unlock: device_pm_unlock(); + return error; } @@ -390,19 +423,10 @@ int hibernation_restore(int platform_mode) pm_prepare_console(); suspend_console(); error = device_suspend(PMSG_QUIESCE); - if (error) - goto Finish; - - error = platform_pre_restore(platform_mode); if (!error) { - error = disable_nonboot_cpus(); - if (!error) - error = resume_target_kernel(); - enable_nonboot_cpus(); + error = resume_target_kernel(platform_mode); + device_resume(PMSG_RECOVER); } - platform_restore_cleanup(platform_mode); - device_resume(PMSG_RECOVER); - Finish: resume_console(); pm_restore_console(); return error; @@ -438,38 +462,46 @@ int hibernation_platform_enter(void) goto Resume_devices; } + device_pm_lock(); + + error = device_power_down(PMSG_HIBERNATE); + if (error) + goto Unlock; + error = hibernation_ops->prepare(); if (error) - goto Resume_devices; + goto Platofrm_finish; error = disable_nonboot_cpus(); if (error) - goto Finish; + goto Platofrm_finish; - device_pm_lock(); local_irq_disable(); - error = device_power_down(PMSG_HIBERNATE); - if (!error) { - sysdev_suspend(PMSG_HIBERNATE); - hibernation_ops->enter(); - /* We should never get here */ - while (1); - } - local_irq_enable(); - device_pm_unlock(); + sysdev_suspend(PMSG_HIBERNATE); + hibernation_ops->enter(); + /* We should never get here */ + while (1); /* * We don't need to reenable the nonboot CPUs or resume consoles, since * the system is going to be halted anyway. */ - Finish: + Platofrm_finish: hibernation_ops->finish(); + + device_power_up(PMSG_RESTORE); + + Unlock: + device_pm_unlock(); + Resume_devices: entering_platform_hibernation = false; device_resume(PMSG_RESTORE); resume_console(); + Close: hibernation_ops->end(); + return error; } diff --git a/kernel/power/main.c b/kernel/power/main.c index c9632f841f6..f172f41858b 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -287,17 +287,32 @@ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) */ static int suspend_enter(suspend_state_t state) { - int error = 0; + int error; device_pm_lock(); - arch_suspend_disable_irqs(); - BUG_ON(!irqs_disabled()); - if ((error = device_power_down(PMSG_SUSPEND))) { + error = device_power_down(PMSG_SUSPEND); + if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Done; } + if (suspend_ops->prepare) { + error = suspend_ops->prepare(); + if (error) + goto Power_up_devices; + } + + if (suspend_test(TEST_PLATFORM)) + goto Platfrom_finish; + + error = disable_nonboot_cpus(); + if (error || suspend_test(TEST_CPUS)) + goto Enable_cpus; + + arch_suspend_disable_irqs(); + BUG_ON(!irqs_disabled()); + error = sysdev_suspend(PMSG_SUSPEND); if (!error) { if (!suspend_test(TEST_CORE)) @@ -305,11 +320,22 @@ static int suspend_enter(suspend_state_t state) sysdev_resume(); } - device_power_up(PMSG_RESUME); - Done: arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); + + Enable_cpus: + enable_nonboot_cpus(); + + Platfrom_finish: + if (suspend_ops->finish) + suspend_ops->finish(); + + Power_up_devices: + device_power_up(PMSG_RESUME); + + Done: device_pm_unlock(); + return error; } @@ -341,23 +367,8 @@ int suspend_devices_and_enter(suspend_state_t state) if (suspend_test(TEST_DEVICES)) goto Recover_platform; - if (suspend_ops->prepare) { - error = suspend_ops->prepare(); - if (error) - goto Resume_devices; - } - - if (suspend_test(TEST_PLATFORM)) - goto Finish; - - error = disable_nonboot_cpus(); - if (!error && !suspend_test(TEST_CPUS)) - suspend_enter(state); + suspend_enter(state); - enable_nonboot_cpus(); - Finish: - if (suspend_ops->finish) - suspend_ops->finish(); Resume_devices: suspend_test_start(); device_resume(PMSG_RESUME); diff --git a/kernel/sched.c b/kernel/sched.c index 5757e03cfac..196d48babbe 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4942,15 +4942,13 @@ pick_next_task(struct rq *rq) /* * schedule() is the main scheduler function. */ -asmlinkage void __sched schedule(void) +asmlinkage void __sched __schedule(void) { struct task_struct *prev, *next; unsigned long *switch_count; struct rq *rq; int cpu; -need_resched: - preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); rcu_qsctr_inc(cpu); @@ -5007,13 +5005,80 @@ need_resched_nonpreemptible: if (unlikely(reacquire_kernel_lock(current) < 0)) goto need_resched_nonpreemptible; +} +asmlinkage void __sched schedule(void) +{ +need_resched: + preempt_disable(); + __schedule(); preempt_enable_no_resched(); if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) goto need_resched; } EXPORT_SYMBOL(schedule); +#ifdef CONFIG_SMP +/* + * Look out! "owner" is an entirely speculative pointer + * access and not reliable. + */ +int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) +{ + unsigned int cpu; + struct rq *rq; + + if (!sched_feat(OWNER_SPIN)) + return 0; + +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * Need to access the cpu field knowing that + * DEBUG_PAGEALLOC could have unmapped it if + * the mutex owner just released it and exited. + */ + if (probe_kernel_address(&owner->cpu, cpu)) + goto out; +#else + cpu = owner->cpu; +#endif + + /* + * Even if the access succeeded (likely case), + * the cpu field may no longer be valid. + */ + if (cpu >= nr_cpumask_bits) + goto out; + + /* + * We need to validate that we can do a + * get_cpu() and that we have the percpu area. + */ + if (!cpu_online(cpu)) + goto out; + + rq = cpu_rq(cpu); + + for (;;) { + /* + * Owner changed, break to re-assess state. + */ + if (lock->owner != owner) + break; + + /* + * Is that owner really running on that cpu? + */ + if (task_thread_info(rq->curr) != owner || need_resched()) + return 0; + + cpu_relax(); + } +out: + return 1; +} +#endif + #ifdef CONFIG_PREEMPT /* * this is the entry point to schedule() from in-kernel preemption diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 76f61756e67..4569bfa7df9 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h @@ -14,3 +14,4 @@ SCHED_FEAT(LB_WAKEUP_UPDATE, 1) SCHED_FEAT(ASYM_EFF_LOAD, 1) SCHED_FEAT(WAKEUP_OVERLAP, 0) SCHED_FEAT(LAST_BUDDY, 1) +SCHED_FEAT(OWNER_SPIN, 1) diff --git a/kernel/timer.c b/kernel/timer.c index 9b77fc9a9ac..b4555568b4e 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -491,14 +491,18 @@ static inline void debug_timer_free(struct timer_list *timer) debug_object_free(timer, &timer_debug_descr); } -static void __init_timer(struct timer_list *timer); +static void __init_timer(struct timer_list *timer, + const char *name, + struct lock_class_key *key); -void init_timer_on_stack(struct timer_list *timer) +void init_timer_on_stack_key(struct timer_list *timer, + const char *name, + struct lock_class_key *key) { debug_object_init_on_stack(timer, &timer_debug_descr); - __init_timer(timer); + __init_timer(timer, name, key); } -EXPORT_SYMBOL_GPL(init_timer_on_stack); +EXPORT_SYMBOL_GPL(init_timer_on_stack_key); void destroy_timer_on_stack(struct timer_list *timer) { @@ -512,7 +516,9 @@ static inline void debug_timer_activate(struct timer_list *timer) { } static inline void debug_timer_deactivate(struct timer_list *timer) { } #endif -static void __init_timer(struct timer_list *timer) +static void __init_timer(struct timer_list *timer, + const char *name, + struct lock_class_key *key) { timer->entry.next = NULL; timer->base = __raw_get_cpu_var(tvec_bases); @@ -521,6 +527,7 @@ static void __init_timer(struct timer_list *timer) timer->start_pid = -1; memset(timer->start_comm, 0, TASK_COMM_LEN); #endif + lockdep_init_map(&timer->lockdep_map, name, key, 0); } /** @@ -530,19 +537,23 @@ static void __init_timer(struct timer_list *timer) * init_timer() must be done to a timer prior calling *any* of the * other timer functions. */ -void init_timer(struct timer_list *timer) +void init_timer_key(struct timer_list *timer, + const char *name, + struct lock_class_key *key) { debug_timer_init(timer); - __init_timer(timer); + __init_timer(timer, name, key); } -EXPORT_SYMBOL(init_timer); +EXPORT_SYMBOL(init_timer_key); -void init_timer_deferrable(struct timer_list *timer) +void init_timer_deferrable_key(struct timer_list *timer, + const char *name, + struct lock_class_key *key) { - init_timer(timer); + init_timer_key(timer, name, key); timer_set_deferrable(timer); } -EXPORT_SYMBOL(init_timer_deferrable); +EXPORT_SYMBOL(init_timer_deferrable_key); static inline void detach_timer(struct timer_list *timer, int clear_pending) @@ -826,6 +837,15 @@ EXPORT_SYMBOL(try_to_del_timer_sync); */ int del_timer_sync(struct timer_list *timer) { +#ifdef CONFIG_LOCKDEP + unsigned long flags; + + local_irq_save(flags); + lock_map_acquire(&timer->lockdep_map); + lock_map_release(&timer->lockdep_map); + local_irq_restore(flags); +#endif + for (;;) { int ret = try_to_del_timer_sync(timer); if (ret >= 0) @@ -897,10 +917,36 @@ static inline void __run_timers(struct tvec_base *base) set_running_timer(base, timer); detach_timer(timer, 1); + spin_unlock_irq(&base->lock); { int preempt_count = preempt_count(); + +#ifdef CONFIG_LOCKDEP + /* + * It is permissible to free the timer from + * inside the function that is called from + * it, this we need to take into account for + * lockdep too. To avoid bogus "held lock + * freed" warnings as well as problems when + * looking into timer->lockdep_map, make a + * copy and use that here. + */ + struct lockdep_map lockdep_map = + timer->lockdep_map; +#endif + /* + * Couple the lock chain with the lock chain at + * del_timer_sync() by acquiring the lock_map + * around the fn() call here and in + * del_timer_sync(). + */ + lock_map_acquire(&lockdep_map); + fn(data); + + lock_map_release(&lockdep_map); + if (preempt_count != preempt_count()) { printk(KERN_ERR "huh, entered %p " "with preempt_count %08x, exited" diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 930c08e5b38..dce71a5b51b 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -42,6 +42,81 @@ static struct tracer_flags tracer_flags = { /* pid on the last trace processed */ static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 }; +/* Add a function return address to the trace stack on thread info.*/ +int +ftrace_push_return_trace(unsigned long ret, unsigned long long time, + unsigned long func, int *depth) +{ + int index; + + if (!current->ret_stack) + return -EBUSY; + + /* The return trace stack is full */ + if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + atomic_inc(¤t->trace_overrun); + return -EBUSY; + } + + index = ++current->curr_ret_stack; + barrier(); + current->ret_stack[index].ret = ret; + current->ret_stack[index].func = func; + current->ret_stack[index].calltime = time; + *depth = index; + + return 0; +} + +/* Retrieve a function return address to the trace stack on thread info.*/ +void +ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) +{ + int index; + + index = current->curr_ret_stack; + + if (unlikely(index < 0)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic, otherwise we have no where to go */ + *ret = (unsigned long)panic; + return; + } + + *ret = current->ret_stack[index].ret; + trace->func = current->ret_stack[index].func; + trace->calltime = current->ret_stack[index].calltime; + trace->overrun = atomic_read(¤t->trace_overrun); + trace->depth = index; + barrier(); + current->curr_ret_stack--; + +} + +/* + * Send the trace to the ring-buffer. + * @return the original return address. + */ +unsigned long ftrace_return_to_handler(void) +{ + struct ftrace_graph_ret trace; + unsigned long ret; + + ftrace_pop_return_trace(&trace, &ret); + trace.rettime = cpu_clock(raw_smp_processor_id()); + ftrace_graph_return(&trace); + + if (unlikely(!ret)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + ret = (unsigned long)panic; + } + + return ret; +} + static int graph_trace_init(struct trace_array *tr) { int cpu, ret; |