aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/acct.c1
-rw-r--r--kernel/audit.c8
-rw-r--r--kernel/audit.h1
-rw-r--r--kernel/auditfilter.c209
-rw-r--r--kernel/auditsc.c65
-rw-r--r--kernel/capability.c8
-rw-r--r--kernel/configs.c1
-rw-r--r--kernel/cpuset.c3
-rw-r--r--kernel/exec_domain.c1
-rw-r--r--kernel/exit.c41
-rw-r--r--kernel/fork.c52
-rw-r--r--kernel/futex.c36
-rw-r--r--kernel/hrtimer.c6
-rw-r--r--kernel/irq/Makefile2
-rw-r--r--kernel/irq/autoprobe.c56
-rw-r--r--kernel/irq/chip.c537
-rw-r--r--kernel/irq/handle.c158
-rw-r--r--kernel/irq/internals.h46
-rw-r--r--kernel/irq/manage.c185
-rw-r--r--kernel/irq/migration.c20
-rw-r--r--kernel/irq/proc.c30
-rw-r--r--kernel/irq/resend.c78
-rw-r--r--kernel/irq/spurious.c37
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/kmod.c3
-rw-r--r--kernel/ksysfs.c1
-rw-r--r--kernel/lockdep.c2702
-rw-r--r--kernel/lockdep_internals.h78
-rw-r--r--kernel/lockdep_proc.c345
-rw-r--r--kernel/module.c128
-rw-r--r--kernel/mutex-debug.c399
-rw-r--r--kernel/mutex-debug.h94
-rw-r--r--kernel/mutex.c74
-rw-r--r--kernel/mutex.h19
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/params.c1
-rw-r--r--kernel/pid.c6
-rw-r--r--kernel/power/Kconfig12
-rw-r--r--kernel/printk.c24
-rw-r--r--kernel/profile.c1
-rw-r--r--kernel/ptrace.c6
-rw-r--r--kernel/rcupdate.c4
-rw-r--r--kernel/resource.c53
-rw-r--r--kernel/rtmutex-debug.c307
-rw-r--r--kernel/rtmutex-debug.h8
-rw-r--r--kernel/rtmutex-tester.c4
-rw-r--r--kernel/rtmutex.c57
-rw-r--r--kernel/rtmutex.h3
-rw-r--r--kernel/rwsem.c147
-rw-r--r--kernel/sched.c773
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/softirq.c141
-rw-r--r--kernel/spinlock.c80
-rw-r--r--kernel/stacktrace.c24
-rw-r--r--kernel/stop_machine.c17
-rw-r--r--kernel/sys.c1
-rw-r--r--kernel/sysctl.c15
-rw-r--r--kernel/timer.c13
-rw-r--r--kernel/wait.c5
-rw-r--r--kernel/workqueue.c2
61 files changed, 5545 insertions, 1606 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 82fb182f6f6..47dbcd570cd 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,10 +8,15 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
- hrtimer.o
+ hrtimer.o rwsem.o
+obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += time/
obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
+obj-$(CONFIG_LOCKDEP) += lockdep.o
+ifeq ($(CONFIG_PROC_FS),y)
+obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
+endif
obj-$(CONFIG_FUTEX) += futex.o
ifeq ($(CONFIG_COMPAT),y)
obj-$(CONFIG_FUTEX) += futex_compat.o
@@ -22,6 +27,7 @@ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
+obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 126ca43d5d2..f18e0b8df3e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -43,7 +43,6 @@
* a struct file opened for write. Fixed. 2/6/2000, AV.
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/acct.h>
diff --git a/kernel/audit.c b/kernel/audit.c
index 82443fb433e..d417ca1db79 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -445,7 +445,7 @@ void audit_send_reply(int pid, int seq, int type, int done, int multi,
* Check for appropriate CAP_AUDIT_ capabilities on incoming audit
* control messages.
*/
-static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
+static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
{
int err = 0;
@@ -459,13 +459,13 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
case AUDIT_DEL:
case AUDIT_DEL_RULE:
case AUDIT_SIGNAL_INFO:
- if (!cap_raised(eff_cap, CAP_AUDIT_CONTROL))
+ if (security_netlink_recv(skb, CAP_AUDIT_CONTROL))
err = -EPERM;
break;
case AUDIT_USER:
case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG:
case AUDIT_FIRST_USER_MSG2...AUDIT_LAST_USER_MSG2:
- if (!cap_raised(eff_cap, CAP_AUDIT_WRITE))
+ if (security_netlink_recv(skb, CAP_AUDIT_WRITE))
err = -EPERM;
break;
default: /* bad msg */
@@ -488,7 +488,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
char *ctx;
u32 len;
- err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type);
+ err = audit_netlink_ok(skb, msg_type);
if (err)
return err;
diff --git a/kernel/audit.h b/kernel/audit.h
index 8323e4132a3..6aa33b848cf 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -81,6 +81,7 @@ struct audit_krule {
u32 mask[AUDIT_BITMASK_SIZE];
u32 buflen; /* for data alloc on list rules */
u32 field_count;
+ char *filterkey; /* ties events to rules */
struct audit_field *fields;
struct audit_field *inode_f; /* quick access to an inode field */
struct audit_watch *watch; /* associated watch */
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 4c99d2c586e..5b4e16276ca 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -141,6 +141,7 @@ static inline void audit_free_rule(struct audit_entry *e)
selinux_audit_rule_free(f->se_rule);
}
kfree(e->rule.fields);
+ kfree(e->rule.filterkey);
kfree(e);
}
@@ -278,6 +279,29 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len,
return 0;
}
+static __u32 *classes[AUDIT_SYSCALL_CLASSES];
+
+int __init audit_register_class(int class, unsigned *list)
+{
+ __u32 *p = kzalloc(AUDIT_BITMASK_SIZE * sizeof(__u32), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ while (*list != ~0U) {
+ unsigned n = *list++;
+ if (n >= AUDIT_BITMASK_SIZE * 32 - AUDIT_SYSCALL_CLASSES) {
+ kfree(p);
+ return -EINVAL;
+ }
+ p[AUDIT_WORD(n)] |= AUDIT_BIT(n);
+ }
+ if (class >= AUDIT_SYSCALL_CLASSES || classes[class]) {
+ kfree(p);
+ return -EINVAL;
+ }
+ classes[class] = p;
+ return 0;
+}
+
/* Common user-space to kernel rule translation. */
static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
{
@@ -321,6 +345,22 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
entry->rule.mask[i] = rule->mask[i];
+ for (i = 0; i < AUDIT_SYSCALL_CLASSES; i++) {
+ int bit = AUDIT_BITMASK_SIZE * 32 - i - 1;
+ __u32 *p = &entry->rule.mask[AUDIT_WORD(bit)];
+ __u32 *class;
+
+ if (!(*p & AUDIT_BIT(bit)))
+ continue;
+ *p &= ~AUDIT_BIT(bit);
+ class = classes[i];
+ if (class) {
+ int j;
+ for (j = 0; j < AUDIT_BITMASK_SIZE; j++)
+ entry->rule.mask[j] |= class[j];
+ }
+ }
+
return entry;
exit_err:
@@ -469,11 +509,16 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
case AUDIT_ARG2:
case AUDIT_ARG3:
break;
- case AUDIT_SE_USER:
- case AUDIT_SE_ROLE:
- case AUDIT_SE_TYPE:
- case AUDIT_SE_SEN:
- case AUDIT_SE_CLR:
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_USER:
+ case AUDIT_OBJ_ROLE:
+ case AUDIT_OBJ_TYPE:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
str = audit_unpack_string(&bufp, &remain, f->val);
if (IS_ERR(str))
goto exit_free;
@@ -511,6 +556,16 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
if (err)
goto exit_free;
break;
+ case AUDIT_FILTERKEY:
+ err = -EINVAL;
+ if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN)
+ goto exit_free;
+ str = audit_unpack_string(&bufp, &remain, f->val);
+ if (IS_ERR(str))
+ goto exit_free;
+ entry->rule.buflen += f->val;
+ entry->rule.filterkey = str;
+ break;
default:
goto exit_free;
}
@@ -600,11 +655,16 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->fields[i] = f->type;
data->fieldflags[i] = f->op;
switch(f->type) {
- case AUDIT_SE_USER:
- case AUDIT_SE_ROLE:
- case AUDIT_SE_TYPE:
- case AUDIT_SE_SEN:
- case AUDIT_SE_CLR:
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_USER:
+ case AUDIT_OBJ_ROLE:
+ case AUDIT_OBJ_TYPE:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
data->buflen += data->values[i] =
audit_pack_string(&bufp, f->se_str);
break;
@@ -612,6 +672,10 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->buflen += data->values[i] =
audit_pack_string(&bufp, krule->watch->path);
break;
+ case AUDIT_FILTERKEY:
+ data->buflen += data->values[i] =
+ audit_pack_string(&bufp, krule->filterkey);
+ break;
default:
data->values[i] = f->val;
}
@@ -639,11 +703,16 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
return 1;
switch(a->fields[i].type) {
- case AUDIT_SE_USER:
- case AUDIT_SE_ROLE:
- case AUDIT_SE_TYPE:
- case AUDIT_SE_SEN:
- case AUDIT_SE_CLR:
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_USER:
+ case AUDIT_OBJ_ROLE:
+ case AUDIT_OBJ_TYPE:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
return 1;
break;
@@ -651,6 +720,11 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
if (strcmp(a->watch->path, b->watch->path))
return 1;
break;
+ case AUDIT_FILTERKEY:
+ /* both filterkeys exist based on above type compare */
+ if (strcmp(a->filterkey, b->filterkey))
+ return 1;
+ break;
default:
if (a->fields[i].val != b->fields[i].val)
return 1;
@@ -730,6 +804,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
u32 fcount = old->field_count;
struct audit_entry *entry;
struct audit_krule *new;
+ char *fk;
int i, err = 0;
entry = audit_init_entry(fcount);
@@ -753,13 +828,25 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
* the originals will all be freed when the old rule is freed. */
for (i = 0; i < fcount; i++) {
switch (new->fields[i].type) {
- case AUDIT_SE_USER:
- case AUDIT_SE_ROLE:
- case AUDIT_SE_TYPE:
- case AUDIT_SE_SEN:
- case AUDIT_SE_CLR:
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_USER:
+ case AUDIT_OBJ_ROLE:
+ case AUDIT_OBJ_TYPE:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
err = audit_dupe_selinux_field(&new->fields[i],
&old->fields[i]);
+ break;
+ case AUDIT_FILTERKEY:
+ fk = kstrdup(old->filterkey, GFP_KERNEL);
+ if (unlikely(!fk))
+ err = -ENOMEM;
+ else
+ new->filterkey = fk;
}
if (err) {
audit_free_rule(entry);
@@ -1245,6 +1332,34 @@ static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
skb_queue_tail(q, skb);
}
+/* Log rule additions and removals */
+static void audit_log_rule_change(uid_t loginuid, u32 sid, char *action,
+ struct audit_krule *rule, int res)
+{
+ struct audit_buffer *ab;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ if (!ab)
+ return;
+ audit_log_format(ab, "auid=%u", loginuid);
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(sid, &ctx, &len))
+ audit_log_format(ab, " ssid=%u", sid);
+ else
+ audit_log_format(ab, " subj=%s", ctx);
+ kfree(ctx);
+ }
+ audit_log_format(ab, " %s rule key=", action);
+ if (rule->filterkey)
+ audit_log_untrustedstring(ab, rule->filterkey);
+ else
+ audit_log_format(ab, "(null)");
+ audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
+ audit_log_end(ab);
+}
+
/**
* audit_receive_filter - apply all rules to the specified message type
* @type: audit message type
@@ -1304,24 +1419,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
err = audit_add_rule(entry,
&audit_filter_list[entry->rule.listnr]);
-
- if (sid) {
- char *ctx = NULL;
- u32 len;
- if (selinux_ctxid_to_string(sid, &ctx, &len)) {
- /* Maybe call audit_panic? */
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u ssid=%u add rule to list=%d res=%d",
- loginuid, sid, entry->rule.listnr, !err);
- } else
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u subj=%s add rule to list=%d res=%d",
- loginuid, ctx, entry->rule.listnr, !err);
- kfree(ctx);
- } else
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u add rule to list=%d res=%d",
- loginuid, entry->rule.listnr, !err);
+ audit_log_rule_change(loginuid, sid, "add", &entry->rule, !err);
if (err)
audit_free_rule(entry);
@@ -1337,24 +1435,8 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
err = audit_del_rule(entry,
&audit_filter_list[entry->rule.listnr]);
-
- if (sid) {
- char *ctx = NULL;
- u32 len;
- if (selinux_ctxid_to_string(sid, &ctx, &len)) {
- /* Maybe call audit_panic? */
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u ssid=%u remove rule from list=%d res=%d",
- loginuid, sid, entry->rule.listnr, !err);
- } else
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u subj=%s remove rule from list=%d res=%d",
- loginuid, ctx, entry->rule.listnr, !err);
- kfree(ctx);
- } else
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u remove rule from list=%d res=%d",
- loginuid, entry->rule.listnr, !err);
+ audit_log_rule_change(loginuid, sid, "remove", &entry->rule,
+ !err);
audit_free_rule(entry);
break;
@@ -1514,11 +1596,16 @@ static inline int audit_rule_has_selinux(struct audit_krule *rule)
for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &rule->fields[i];
switch (f->type) {
- case AUDIT_SE_USER:
- case AUDIT_SE_ROLE:
- case AUDIT_SE_TYPE:
- case AUDIT_SE_SEN:
- case AUDIT_SE_CLR:
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
+ case AUDIT_OBJ_USER:
+ case AUDIT_OBJ_ROLE:
+ case AUDIT_OBJ_TYPE:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
return 1;
}
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index dc5e3f01efe..ae40ac8c39e 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -186,6 +186,7 @@ struct audit_context {
int auditable; /* 1 if record should be written */
int name_count;
struct audit_names names[AUDIT_NAMES];
+ char * filterkey; /* key for rule that triggered record */
struct dentry * pwd;
struct vfsmount * pwdmnt;
struct audit_context *previous; /* For nested syscalls */
@@ -320,11 +321,11 @@ static int audit_filter_rules(struct task_struct *tsk,
if (ctx)
result = audit_comparator(ctx->loginuid, f->op, f->val);
break;
- case AUDIT_SE_USER:
- case AUDIT_SE_ROLE:
- case AUDIT_SE_TYPE:
- case AUDIT_SE_SEN:
- case AUDIT_SE_CLR:
+ case AUDIT_SUBJ_USER:
+ case AUDIT_SUBJ_ROLE:
+ case AUDIT_SUBJ_TYPE:
+ case AUDIT_SUBJ_SEN:
+ case AUDIT_SUBJ_CLR:
/* NOTE: this may return negative values indicating
a temporary error. We simply treat this as a
match for now to avoid losing information that
@@ -341,6 +342,46 @@ static int audit_filter_rules(struct task_struct *tsk,
ctx);
}
break;
+ case AUDIT_OBJ_USER:
+ case AUDIT_OBJ_ROLE:
+ case AUDIT_OBJ_TYPE:
+ case AUDIT_OBJ_LEV_LOW:
+ case AUDIT_OBJ_LEV_HIGH:
+ /* The above note for AUDIT_SUBJ_USER...AUDIT_SUBJ_CLR
+ also applies here */
+ if (f->se_rule) {
+ /* Find files that match */
+ if (name) {
+ result = selinux_audit_rule_match(
+ name->osid, f->type, f->op,
+ f->se_rule, ctx);
+ } else if (ctx) {
+ for (j = 0; j < ctx->name_count; j++) {
+ if (selinux_audit_rule_match(
+ ctx->names[j].osid,
+ f->type, f->op,
+ f->se_rule, ctx)) {
+ ++result;
+ break;
+ }
+ }
+ }
+ /* Find ipc objects that match */
+ if (ctx) {
+ struct audit_aux_data *aux;
+ for (aux = ctx->aux; aux;
+ aux = aux->next) {
+ if (aux->type == AUDIT_IPC) {
+ struct audit_aux_data_ipcctl *axi = (void *)aux;
+ if (selinux_audit_rule_match(axi->osid, f->type, f->op, f->se_rule, ctx)) {
+ ++result;
+ break;
+ }
+ }
+ }
+ }
+ }
+ break;
case AUDIT_ARG0:
case AUDIT_ARG1:
case AUDIT_ARG2:
@@ -348,11 +389,17 @@ static int audit_filter_rules(struct task_struct *tsk,
if (ctx)
result = audit_comparator(ctx->argv[f->type-AUDIT_ARG0], f->op, f->val);
break;
+ case AUDIT_FILTERKEY:
+ /* ignore this field for filtering */
+ result = 1;
+ break;
}
if (!result)
return 0;
}
+ if (rule->filterkey)
+ ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC);
switch (rule->action) {
case AUDIT_NEVER: *state = AUDIT_DISABLED; break;
case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break;
@@ -627,6 +674,7 @@ static inline void audit_free_context(struct audit_context *context)
}
audit_free_names(context);
audit_free_aux(context);
+ kfree(context->filterkey);
kfree(context);
context = previous;
} while (context);
@@ -735,6 +783,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->euid, context->suid, context->fsuid,
context->egid, context->sgid, context->fsgid, tty);
audit_log_task_info(ab, tsk);
+ if (context->filterkey) {
+ audit_log_format(ab, " key=");
+ audit_log_untrustedstring(ab, context->filterkey);
+ } else
+ audit_log_format(ab, " key=(null)");
audit_log_end(ab);
for (aux = context->aux; aux; aux = aux->next) {
@@ -1060,6 +1113,8 @@ void audit_syscall_exit(int valid, long return_code)
} else {
audit_free_names(context);
audit_free_aux(context);
+ kfree(context->filterkey);
+ context->filterkey = NULL;
tsk->audit_context = context;
}
}
diff --git a/kernel/capability.c b/kernel/capability.c
index 1a4d8a40d3f..c7685ad00a9 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -46,7 +46,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr)
int ret = 0;
pid_t pid;
__u32 version;
- task_t *target;
+ struct task_struct *target;
struct __user_cap_data_struct data;
if (get_user(version, &header->version))
@@ -96,7 +96,7 @@ static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
{
- task_t *g, *target;
+ struct task_struct *g, *target;
int ret = -EPERM;
int found = 0;
@@ -128,7 +128,7 @@ static inline int cap_set_all(kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
{
- task_t *g, *target;
+ struct task_struct *g, *target;
int ret = -EPERM;
int found = 0;
@@ -172,7 +172,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
{
kernel_cap_t inheritable, permitted, effective;
__u32 version;
- task_t *target;
+ struct task_struct *target;
int ret;
pid_t pid;
diff --git a/kernel/configs.c b/kernel/configs.c
index 009e1ebdcb8..f9e31974f4a 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -23,7 +23,6 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
-#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1535af3a912..c232dc07743 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -18,7 +18,6 @@
* distribution for more details.
*/
-#include <linux/config.h>
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
@@ -1064,7 +1063,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
}
/*
- * Frequency meter - How fast is some event occuring?
+ * Frequency meter - How fast is some event occurring?
*
* These routines manage a digitally filtered, constant time based,
* event frequency meter. There are four routines:
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index c01cead2cfd..3c2eaea66b1 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -7,7 +7,6 @@
* 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org)
*/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/kmod.h>
diff --git a/kernel/exit.c b/kernel/exit.c
index ab06b9f88f6..6664c084783 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -4,7 +4,6 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#include <linux/config.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
@@ -135,8 +134,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
void release_task(struct task_struct * p)
{
+ struct task_struct *leader;
int zap_leader;
- task_t *leader;
repeat:
atomic_dec(&p->user->processes);
write_lock_irq(&tasklist_lock);
@@ -210,7 +209,7 @@ out:
*
* "I ask you, have you ever known what it is to be an orphan?"
*/
-static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
+static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
{
struct task_struct *p;
int ret = 1;
@@ -583,7 +582,8 @@ static void exit_mm(struct task_struct * tsk)
mmput(mm);
}
-static inline void choose_new_parent(task_t *p, task_t *reaper)
+static inline void
+choose_new_parent(struct task_struct *p, struct task_struct *reaper)
{
/*
* Make sure we're not reparenting to ourselves and that
@@ -593,7 +593,8 @@ static inline void choose_new_parent(task_t *p, task_t *reaper)
p->real_parent = reaper;
}
-static void reparent_thread(task_t *p, task_t *father, int traced)
+static void
+reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
{
/* We don't want people slaying init. */
if (p->exit_signal != -1)
@@ -657,8 +658,8 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
* group, and if no such member exists, give it to
* the global child reaper process (ie "init")
*/
-static void forget_original_parent(struct task_struct * father,
- struct list_head *to_release)
+static void
+forget_original_parent(struct task_struct *father, struct list_head *to_release)
{
struct task_struct *p, *reaper = father;
struct list_head *_p, *_n;
@@ -681,7 +682,7 @@ static void forget_original_parent(struct task_struct * father,
*/
list_for_each_safe(_p, _n, &father->children) {
int ptrace;
- p = list_entry(_p,struct task_struct,sibling);
+ p = list_entry(_p, struct task_struct, sibling);
ptrace = p->ptrace;
@@ -710,7 +711,7 @@ static void forget_original_parent(struct task_struct * father,
list_add(&p->ptrace_list, to_release);
}
list_for_each_safe(_p, _n, &father->ptrace_children) {
- p = list_entry(_p,struct task_struct,ptrace_list);
+ p = list_entry(_p, struct task_struct, ptrace_list);
choose_new_parent(p, reaper);
reparent_thread(p, father, 1);
}
@@ -830,7 +831,7 @@ static void exit_notify(struct task_struct *tsk)
list_for_each_safe(_p, _n, &ptrace_dead) {
list_del_init(_p);
- t = list_entry(_p,struct task_struct,ptrace_list);
+ t = list_entry(_p, struct task_struct, ptrace_list);
release_task(t);
}
@@ -934,10 +935,9 @@ fastcall NORET_TYPE void do_exit(long code)
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
/*
- * If DEBUG_MUTEXES is on, make sure we are holding no locks:
+ * Make sure we are holding no locks:
*/
- mutex_debug_check_no_locks_held(tsk);
- rt_mutex_debug_check_no_locks_held(tsk);
+ debug_check_no_locks_held(tsk);
if (tsk->io_context)
exit_io_context();
@@ -1012,7 +1012,7 @@ asmlinkage void sys_exit_group(int error_code)
do_group_exit((error_code & 0xff) << 8);
}
-static int eligible_child(pid_t pid, int options, task_t *p)
+static int eligible_child(pid_t pid, int options, struct task_struct *p)
{
if (pid > 0) {
if (p->pid != pid)
@@ -1053,12 +1053,13 @@ static int eligible_child(pid_t pid, int options, task_t *p)
return 1;
}
-static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
+static int wait_noreap_copyout(struct task_struct *p, pid_t pid, uid_t uid,
int why, int status,
struct siginfo __user *infop,
struct rusage __user *rusagep)
{
int retval = rusagep ? getrusage(p, RUSAGE_BOTH, rusagep) : 0;
+
put_task_struct(p);
if (!retval)
retval = put_user(SIGCHLD, &infop->si_signo);
@@ -1083,7 +1084,7 @@ static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_zombie(task_t *p, int noreap,
+static int wait_task_zombie(struct task_struct *p, int noreap,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
@@ -1245,8 +1246,8 @@ static int wait_task_zombie(task_t *p, int noreap,
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
- struct siginfo __user *infop,
+static int wait_task_stopped(struct task_struct *p, int delayed_group_leader,
+ int noreap, struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
int retval, exit_code;
@@ -1360,7 +1361,7 @@ bail_ref:
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_continued(task_t *p, int noreap,
+static int wait_task_continued(struct task_struct *p, int noreap,
struct siginfo __user *infop,
int __user *stat_addr, struct rusage __user *ru)
{
@@ -1446,7 +1447,7 @@ repeat:
int ret;
list_for_each(_p,&tsk->children) {
- p = list_entry(_p,struct task_struct,sibling);
+ p = list_entry(_p, struct task_struct, sibling);
ret = eligible_child(pid, options, p);
if (!ret)
diff --git a/kernel/fork.c b/kernel/fork.c
index 628198a4f28..56e4e07e45f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -11,7 +11,6 @@
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
*/
-#include <linux/config.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/unistd.h>
@@ -194,7 +193,10 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
down_write(&oldmm->mmap_sem);
flush_cache_mm(oldmm);
- down_write(&mm->mmap_sem);
+ /*
+ * Not linked in yet - no deadlock potential:
+ */
+ down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
mm->locked_vm = 0;
mm->mmap = NULL;
@@ -920,10 +922,6 @@ static inline void rt_mutex_init_task(struct task_struct *p)
spin_lock_init(&p->pi_lock);
plist_head_init(&p->pi_waiters, &p->pi_lock);
p->pi_blocked_on = NULL;
-# ifdef CONFIG_DEBUG_RT_MUTEXES
- spin_lock_init(&p->held_list_lock);
- INIT_LIST_HEAD(&p->held_list_head);
-# endif
#endif
}
@@ -935,13 +933,13 @@ static inline void rt_mutex_init_task(struct task_struct *p)
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
-static task_t *copy_process(unsigned long clone_flags,
- unsigned long stack_start,
- struct pt_regs *regs,
- unsigned long stack_size,
- int __user *parent_tidptr,
- int __user *child_tidptr,
- int pid)
+static struct task_struct *copy_process(unsigned long clone_flags,
+ unsigned long stack_start,
+ struct pt_regs *regs,
+ unsigned long stack_size,
+ int __user *parent_tidptr,
+ int __user *child_tidptr,
+ int pid)
{
int retval;
struct task_struct *p = NULL;
@@ -973,6 +971,10 @@ static task_t *copy_process(unsigned long clone_flags,
if (!p)
goto fork_out;
+#ifdef CONFIG_TRACE_IRQFLAGS
+ DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
+ DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
+#endif
retval = -EAGAIN;
if (atomic_read(&p->user->processes) >=
p->signal->rlim[RLIMIT_NPROC].rlim_cur) {
@@ -1047,6 +1049,26 @@ static task_t *copy_process(unsigned long clone_flags,
}
mpol_fix_fork_child_flag(p);
#endif
+#ifdef CONFIG_TRACE_IRQFLAGS
+ p->irq_events = 0;
+ p->hardirqs_enabled = 0;
+ p->hardirq_enable_ip = 0;
+ p->hardirq_enable_event = 0;
+ p->hardirq_disable_ip = _THIS_IP_;
+ p->hardirq_disable_event = 0;
+ p->softirqs_enabled = 1;
+ p->softirq_enable_ip = _THIS_IP_;
+ p->softirq_enable_event = 0;
+ p->softirq_disable_ip = 0;
+ p->softirq_disable_event = 0;
+ p->hardirq_context = 0;
+ p->softirq_context = 0;
+#endif
+#ifdef CONFIG_LOCKDEP
+ p->lockdep_depth = 0; /* no locks held yet */
+ p->curr_chain_key = 0;
+ p->lockdep_recursion = 0;
+#endif
rt_mutex_init_task(p);
@@ -1272,9 +1294,9 @@ struct pt_regs * __devinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
return regs;
}
-task_t * __devinit fork_idle(int cpu)
+struct task_struct * __devinit fork_idle(int cpu)
{
- task_t *task;
+ struct task_struct *task;
struct pt_regs regs;
task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL, NULL, 0);
diff --git a/kernel/futex.c b/kernel/futex.c
index 6c91f938005..1dc98e4dd28 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -607,6 +607,22 @@ static int unlock_futex_pi(u32 __user *uaddr, u32 uval)
}
/*
+ * Express the locking dependencies for lockdep:
+ */
+static inline void
+double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
+{
+ if (hb1 <= hb2) {
+ spin_lock(&hb1->lock);
+ if (hb1 < hb2)
+ spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
+ } else { /* hb1 > hb2 */
+ spin_lock(&hb2->lock);
+ spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
+ }
+}
+
+/*
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
*/
@@ -630,8 +646,10 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
list_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key)) {
- if (this->pi_state)
- return -EINVAL;
+ if (this->pi_state) {
+ ret = -EINVAL;
+ break;
+ }
wake_futex(this);
if (++ret >= nr_wake)
break;
@@ -672,11 +690,7 @@ retryfull:
hb2 = hash_futex(&key2);
retry:
- if (hb1 < hb2)
- spin_lock(&hb1->lock);
- spin_lock(&hb2->lock);
- if (hb1 > hb2)
- spin_lock(&hb1->lock);
+ double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
@@ -785,11 +799,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
- if (hb1 < hb2)
- spin_lock(&hb1->lock);
- spin_lock(&hb2->lock);
- if (hb1 > hb2)
- spin_lock(&hb1->lock);
+ double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
u32 curval;
@@ -1208,7 +1218,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
}
down_read(&curr->mm->mmap_sem);
- hb = queue_lock(&q, -1, NULL);
+ spin_lock(q.lock_ptr);
/*
* Got the lock. We might not be the anticipated owner if we
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 8d3dc29ef41..d17766d40da 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -669,7 +669,7 @@ static int hrtimer_wakeup(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, task_t *task)
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
{
sl->timer.function = hrtimer_wakeup;
sl->task = task;
@@ -782,8 +782,10 @@ static void __devinit init_hrtimers_cpu(int cpu)
struct hrtimer_base *base = per_cpu(hrtimer_bases, cpu);
int i;
- for (i = 0; i < MAX_HRTIMER_BASES; i++, base++)
+ for (i = 0; i < MAX_HRTIMER_BASES; i++, base++) {
spin_lock_init(&base->lock);
+ lockdep_set_class(&base->lock, &base->lock_key);
+ }
}
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index 9f77f50d814..1dab0ac3f79 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -1,5 +1,5 @@
-obj-y := handle.o manage.o spurious.o
+obj-y := handle.o manage.o spurious.o resend.o chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 3467097ca61..533068cfb60 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -11,12 +11,14 @@
#include <linux/interrupt.h>
#include <linux/delay.h>
+#include "internals.h"
+
/*
* Autodetection depends on the fact that any interrupt that
* comes in on to an unassigned handler will get stuck with
* "IRQ_WAITING" cleared and the interrupt disabled.
*/
-static DECLARE_MUTEX(probe_sem);
+static DEFINE_MUTEX(probing_active);
/**
* probe_irq_on - begin an interrupt autodetect
@@ -27,11 +29,11 @@ static DECLARE_MUTEX(probe_sem);
*/
unsigned long probe_irq_on(void)
{
- unsigned long val;
- irq_desc_t *desc;
+ struct irq_desc *desc;
+ unsigned long mask;
unsigned int i;
- down(&probe_sem);
+ mutex_lock(&probing_active);
/*
* something may have generated an irq long ago and we want to
* flush such a longstanding irq before considering it as spurious.
@@ -40,8 +42,21 @@ unsigned long probe_irq_on(void)
desc = irq_desc + i;
spin_lock_irq(&desc->lock);
- if (!irq_desc[i].action)
- irq_desc[i].handler->startup(i);
+ if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
+ /*
+ * An old-style architecture might still have
+ * the handle_bad_irq handler there:
+ */
+ compat_irq_chip_set_default_handler(desc);
+
+ /*
+ * Some chips need to know about probing in
+ * progress:
+ */
+ if (desc->chip->set_type)
+ desc->chip->set_type(i, IRQ_TYPE_PROBE);
+ desc->chip->startup(i);
+ }
spin_unlock_irq(&desc->lock);
}
@@ -57,9 +72,9 @@ unsigned long probe_irq_on(void)
desc = irq_desc + i;
spin_lock_irq(&desc->lock);
- if (!desc->action) {
+ if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
- if (desc->handler->startup(i))
+ if (desc->chip->startup(i))
desc->status |= IRQ_PENDING;
}
spin_unlock_irq(&desc->lock);
@@ -73,11 +88,11 @@ unsigned long probe_irq_on(void)
/*
* Now filter out any obviously spurious interrupts
*/
- val = 0;
+ mask = 0;
for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc + i;
unsigned int status;
+ desc = irq_desc + i;
spin_lock_irq(&desc->lock);
status = desc->status;
@@ -85,17 +100,16 @@ unsigned long probe_irq_on(void)
/* It triggered already - consider it spurious. */
if (!(status & IRQ_WAITING)) {
desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
+ desc->chip->shutdown(i);
} else
if (i < 32)
- val |= 1 << i;
+ mask |= 1 << i;
}
spin_unlock_irq(&desc->lock);
}
- return val;
+ return mask;
}
-
EXPORT_SYMBOL(probe_irq_on);
/**
@@ -117,7 +131,7 @@ unsigned int probe_irq_mask(unsigned long val)
mask = 0;
for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc + i;
+ struct irq_desc *desc = irq_desc + i;
unsigned int status;
spin_lock_irq(&desc->lock);
@@ -128,11 +142,11 @@ unsigned int probe_irq_mask(unsigned long val)
mask |= 1 << i;
desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
+ desc->chip->shutdown(i);
}
spin_unlock_irq(&desc->lock);
}
- up(&probe_sem);
+ mutex_unlock(&probing_active);
return mask & val;
}
@@ -160,7 +174,7 @@ int probe_irq_off(unsigned long val)
int i, irq_found = 0, nr_irqs = 0;
for (i = 0; i < NR_IRQS; i++) {
- irq_desc_t *desc = irq_desc + i;
+ struct irq_desc *desc = irq_desc + i;
unsigned int status;
spin_lock_irq(&desc->lock);
@@ -173,16 +187,16 @@ int probe_irq_off(unsigned long val)
nr_irqs++;
}
desc->status = status & ~IRQ_AUTODETECT;
- desc->handler->shutdown(i);
+ desc->chip->shutdown(i);
}
spin_unlock_irq(&desc->lock);
}
- up(&probe_sem);
+ mutex_unlock(&probing_active);
if (nr_irqs > 1)
irq_found = -irq_found;
+
return irq_found;
}
-
EXPORT_SYMBOL(probe_irq_off);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
new file mode 100644
index 00000000000..9336f2e89e4
--- /dev/null
+++ b/kernel/irq/chip.c
@@ -0,0 +1,537 @@
+/*
+ * linux/kernel/irq/chip.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
+ *
+ * This file contains the core interrupt handling code, for irq-chip
+ * based architectures.
+ *
+ * Detailed information is available in Documentation/DocBook/genericirq
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+
+#include "internals.h"
+
+/**
+ * set_irq_chip - set the irq chip for an irq
+ * @irq: irq number
+ * @chip: pointer to irq chip description structure
+ */
+int set_irq_chip(unsigned int irq, struct irq_chip *chip)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS) {
+ printk(KERN_ERR "Trying to install chip for IRQ%d\n", irq);
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (!chip)
+ chip = &no_irq_chip;
+
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock, flags);
+ irq_chip_set_defaults(chip);
+ desc->chip = chip;
+ /*
+ * For compatibility only:
+ */
+ desc->chip = chip;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(set_irq_chip);
+
+/**
+ * set_irq_type - set the irq type for an irq
+ * @irq: irq number
+ * @type: interrupt type - see include/linux/interrupt.h
+ */
+int set_irq_type(unsigned int irq, unsigned int type)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+ int ret = -ENXIO;
+
+ if (irq >= NR_IRQS) {
+ printk(KERN_ERR "Trying to set irq type for IRQ%d\n", irq);
+ return -ENODEV;
+ }
+
+ desc = irq_desc + irq;
+ if (desc->chip->set_type) {
+ spin_lock_irqsave(&desc->lock, flags);
+ ret = desc->chip->set_type(irq, type);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(set_irq_type);
+
+/**
+ * set_irq_data - set irq type data for an irq
+ * @irq: Interrupt number
+ * @data: Pointer to interrupt specific data
+ *
+ * Set the hardware irq controller data for an irq
+ */
+int set_irq_data(unsigned int irq, void *data)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS) {
+ printk(KERN_ERR
+ "Trying to install controller data for IRQ%d\n", irq);
+ return -EINVAL;
+ }
+
+ desc = irq_desc + irq;
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->handler_data = data;
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return 0;
+}
+EXPORT_SYMBOL(set_irq_data);
+
+/**
+ * set_irq_chip_data - set irq chip data for an irq
+ * @irq: Interrupt number
+ * @data: Pointer to chip specific data
+ *
+ * Set the hardware irq chip data for an irq
+ */
+int set_irq_chip_data(unsigned int irq, void *data)
+{
+ struct irq_desc *desc = irq_desc + irq;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS || !desc->chip) {
+ printk(KERN_ERR "BUG: bad set_irq_chip_data(IRQ#%d)\n", irq);
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&desc->lock, flags);
+ desc->chip_data = data;
+ spin_unlock_irqrestore(&desc->lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(set_irq_chip_data);
+
+/*
+ * default enable function
+ */
+static void default_enable(unsigned int irq)
+{
+ struct irq_desc *desc = irq_desc + irq;
+
+ desc->chip->unmask(irq);
+ desc->status &= ~IRQ_MASKED;
+}
+
+/*
+ * default disable function
+ */
+static void default_disable(unsigned int irq)
+{
+ struct irq_desc *desc = irq_desc + irq;
+
+ if (!(desc->status & IRQ_DELAYED_DISABLE))
+ irq_desc[irq].chip->mask(irq);
+}
+
+/*
+ * default startup function
+ */
+static unsigned int default_startup(unsigned int irq)
+{
+ irq_desc[irq].chip->enable(irq);
+
+ return 0;
+}
+
+/*
+ * Fixup enable/disable function pointers
+ */
+void irq_chip_set_defaults(struct irq_chip *chip)
+{
+ if (!chip->enable)
+ chip->enable = default_enable;
+ if (!chip->disable)
+ chip->disable = default_disable;
+ if (!chip->startup)
+ chip->startup = default_startup;
+ if (!chip->shutdown)
+ chip->shutdown = chip->disable;
+ if (!chip->name)
+ chip->name = chip->typename;
+}
+
+static inline void mask_ack_irq(struct irq_desc *desc, int irq)
+{
+ if (desc->chip->mask_ack)
+ desc->chip->mask_ack(irq);
+ else {
+ desc->chip->mask(irq);
+ desc->chip->ack(irq);
+ }
+}
+
+/**
+ * handle_simple_irq - Simple and software-decoded IRQs.
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ * @regs: pointer to a register structure
+ *
+ * Simple interrupts are either sent from a demultiplexing interrupt
+ * handler or come from hardware, where no interrupt hardware control
+ * is necessary.
+ *
+ * Note: The caller is expected to handle the ack, clear, mask and
+ * unmask issues if necessary.
+ */
+void fastcall
+handle_simple_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+{
+ struct irqaction *action;
+ irqreturn_t action_ret;
+ const unsigned int cpu = smp_processor_id();
+
+ spin_lock(&desc->lock);
+
+ if (unlikely(desc->status & IRQ_INPROGRESS))
+ goto out_unlock;
+ desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ kstat_cpu(cpu).irqs[irq]++;
+
+ action = desc->action;
+ if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+ goto out_unlock;
+
+ desc->status |= IRQ_INPROGRESS;
+ spin_unlock(&desc->lock);
+
+ action_ret = handle_IRQ_event(irq, regs, action);
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret, regs);
+
+ spin_lock(&desc->lock);
+ desc->status &= ~IRQ_INPROGRESS;
+out_unlock:
+ spin_unlock(&desc->lock);
+}
+
+/**
+ * handle_level_irq - Level type irq handler
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ * @regs: pointer to a register structure
+ *
+ * Level type interrupts are active as long as the hardware line has
+ * the active level. This may require to mask the interrupt and unmask
+ * it after the associated handler has acknowledged the device, so the
+ * interrupt line is back to inactive.
+ */
+void fastcall
+handle_level_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+{
+ unsigned int cpu = smp_processor_id();
+ struct irqaction *action;
+ irqreturn_t action_ret;
+
+ spin_lock(&desc->lock);
+ mask_ack_irq(desc, irq);
+
+ if (unlikely(desc->status & IRQ_INPROGRESS))
+ goto out;
+ desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ kstat_cpu(cpu).irqs[irq]++;
+
+ /*
+ * If its disabled or no action available
+ * keep it masked and get out of here
+ */
+ action = desc->action;
+ if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
+ desc->status |= IRQ_PENDING;
+ goto out;
+ }
+
+ desc->status |= IRQ_INPROGRESS;
+ desc->status &= ~IRQ_PENDING;
+ spin_unlock(&desc->lock);
+
+ action_ret = handle_IRQ_event(irq, regs, action);
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret, regs);
+
+ spin_lock(&desc->lock);
+ desc->status &= ~IRQ_INPROGRESS;
+out:
+ if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
+ desc->chip->unmask(irq);
+ spin_unlock(&desc->lock);
+}
+
+/**
+ * handle_fasteoi_irq - irq handler for transparent controllers
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ * @regs: pointer to a register structure
+ *
+ * Only a single callback will be issued to the chip: an ->eoi()
+ * call when the interrupt has been serviced. This enables support
+ * for modern forms of interrupt handlers, which handle the flow
+ * details in hardware, transparently.
+ */
+void fastcall
+handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc,
+ struct pt_regs *regs)
+{
+ unsigned int cpu = smp_processor_id();
+ struct irqaction *action;
+ irqreturn_t action_ret;
+
+ spin_lock(&desc->lock);
+
+ if (unlikely(desc->status & IRQ_INPROGRESS))
+ goto out;
+
+ desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ kstat_cpu(cpu).irqs[irq]++;
+
+ /*
+ * If its disabled or no action available
+ * keep it masked and get out of here
+ */
+ action = desc->action;
+ if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
+ desc->status |= IRQ_PENDING;
+ goto out;
+ }
+
+ desc->status |= IRQ_INPROGRESS;
+ desc->status &= ~IRQ_PENDING;
+ spin_unlock(&desc->lock);
+
+ action_ret = handle_IRQ_event(irq, regs, action);
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret, regs);
+
+ spin_lock(&desc->lock);
+ desc->status &= ~IRQ_INPROGRESS;
+out:
+ desc->chip->eoi(irq);
+
+ spin_unlock(&desc->lock);
+}
+
+/**
+ * handle_edge_irq - edge type IRQ handler
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ * @regs: pointer to a register structure
+ *
+ * Interrupt occures on the falling and/or rising edge of a hardware
+ * signal. The occurence is latched into the irq controller hardware
+ * and must be acked in order to be reenabled. After the ack another
+ * interrupt can happen on the same source even before the first one
+ * is handled by the assosiacted event handler. If this happens it
+ * might be necessary to disable (mask) the interrupt depending on the
+ * controller hardware. This requires to reenable the interrupt inside
+ * of the loop which handles the interrupts which have arrived while
+ * the handler was running. If all pending interrupts are handled, the
+ * loop is left.
+ */
+void fastcall
+handle_edge_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+{
+ const unsigned int cpu = smp_processor_id();
+
+ spin_lock(&desc->lock);
+
+ desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+
+ /*
+ * If we're currently running this IRQ, or its disabled,
+ * we shouldn't process the IRQ. Mark it pending, handle
+ * the necessary masking and go out
+ */
+ if (unlikely((desc->status & (IRQ_INPROGRESS | IRQ_DISABLED)) ||
+ !desc->action)) {
+ desc->status |= (IRQ_PENDING | IRQ_MASKED);
+ mask_ack_irq(desc, irq);
+ goto out_unlock;
+ }
+
+ kstat_cpu(cpu).irqs[irq]++;
+
+ /* Start handling the irq */
+ desc->chip->ack(irq);
+
+ /* Mark the IRQ currently in progress.*/
+ desc->status |= IRQ_INPROGRESS;
+
+ do {
+ struct irqaction *action = desc->action;
+ irqreturn_t action_ret;
+
+ if (unlikely(!action)) {
+ desc->chip->mask(irq);
+ goto out_unlock;
+ }
+
+ /*
+ * When another irq arrived while we were handling
+ * one, we could have masked the irq.
+ * Renable it, if it was not disabled in meantime.
+ */
+ if (unlikely((desc->status &
+ (IRQ_PENDING | IRQ_MASKED | IRQ_DISABLED)) ==
+ (IRQ_PENDING | IRQ_MASKED))) {
+ desc->chip->unmask(irq);
+ desc->status &= ~IRQ_MASKED;
+ }
+
+ desc->status &= ~IRQ_PENDING;
+ spin_unlock(&desc->lock);
+ action_ret = handle_IRQ_event(irq, regs, action);
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret, regs);
+ spin_lock(&desc->lock);
+
+ } while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
+
+ desc->status &= ~IRQ_INPROGRESS;
+out_unlock:
+ spin_unlock(&desc->lock);
+}
+
+#ifdef CONFIG_SMP
+/**
+ * handle_percpu_IRQ - Per CPU local irq handler
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ * @regs: pointer to a register structure
+ *
+ * Per CPU interrupts on SMP machines without locking requirements
+ */
+void fastcall
+handle_percpu_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+{
+ irqreturn_t action_ret;
+
+ kstat_this_cpu.irqs[irq]++;
+
+ if (desc->chip->ack)
+ desc->chip->ack(irq);
+
+ action_ret = handle_IRQ_event(irq, regs, desc->action);
+ if (!noirqdebug)
+ note_interrupt(irq, desc, action_ret, regs);
+
+ if (desc->chip->eoi)
+ desc->chip->eoi(irq);
+}
+
+#endif /* CONFIG_SMP */
+
+void
+__set_irq_handler(unsigned int irq,
+ void fastcall (*handle)(unsigned int, irq_desc_t *,
+ struct pt_regs *),
+ int is_chained)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+
+ if (irq >= NR_IRQS) {
+ printk(KERN_ERR
+ "Trying to install type control for IRQ%d\n", irq);
+ return;
+ }
+
+ desc = irq_desc + irq;
+
+ if (!handle)
+ handle = handle_bad_irq;
+
+ if (desc->chip == &no_irq_chip) {
+ printk(KERN_WARNING "Trying to install %sinterrupt handler "
+ "for IRQ%d\n", is_chained ? "chained " : " ", irq);
+ /*
+ * Some ARM implementations install a handler for really dumb
+ * interrupt hardware without setting an irq_chip. This worked
+ * with the ARM no_irq_chip but the check in setup_irq would
+ * prevent us to setup the interrupt at all. Switch it to
+ * dummy_irq_chip for easy transition.
+ */
+ desc->chip = &dummy_irq_chip;
+ }
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ /* Uninstall? */
+ if (handle == handle_bad_irq) {
+ if (desc->chip != &no_irq_chip) {
+ desc->chip->mask(irq);
+ desc->chip->ack(irq);
+ }
+ desc->status |= IRQ_DISABLED;
+ desc->depth = 1;
+ }
+ desc->handle_irq = handle;
+
+ if (handle != handle_bad_irq && is_chained) {
+ desc->status &= ~IRQ_DISABLED;
+ desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ desc->depth = 0;
+ desc->chip->unmask(irq);
+ }
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
+void
+set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+ void fastcall (*handle)(unsigned int,
+ struct irq_desc *,
+ struct pt_regs *))
+{
+ set_irq_chip(irq, chip);
+ __set_irq_handler(irq, handle, 0);
+}
+
+/*
+ * Get a descriptive string for the highlevel handler, for
+ * /proc/interrupts output:
+ */
+const char *
+handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
+ struct pt_regs *))
+{
+ if (handle == handle_level_irq)
+ return "level ";
+ if (handle == handle_fasteoi_irq)
+ return "fasteoi";
+ if (handle == handle_edge_irq)
+ return "edge ";
+ if (handle == handle_simple_irq)
+ return "simple ";
+#ifdef CONFIG_SMP
+ if (handle == handle_percpu_irq)
+ return "percpu ";
+#endif
+ if (handle == handle_bad_irq)
+ return "bad ";
+
+ return NULL;
+}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 0f653011710..fc4e906aedb 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -1,9 +1,13 @@
/*
* linux/kernel/irq/handle.c
*
- * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner, Russell King
*
* This file contains the core interrupt handling code.
+ *
+ * Detailed information is available in Documentation/DocBook/genericirq
+ *
*/
#include <linux/irq.h>
@@ -14,11 +18,22 @@
#include "internals.h"
+/**
+ * handle_bad_irq - handle spurious and unhandled irqs
+ */
+void fastcall
+handle_bad_irq(unsigned int irq, struct irq_desc *desc, struct pt_regs *regs)
+{
+ print_irq_desc(irq, desc);
+ kstat_this_cpu.irqs[irq]++;
+ ack_bad_irq(irq);
+}
+
/*
* Linux has a controller-independent interrupt architecture.
* Every controller has a 'controller-template', that is used
* by the main code to do the right thing. Each driver-visible
- * interrupt source is transparently wired to the apropriate
+ * interrupt source is transparently wired to the appropriate
* controller. Thus drivers need not be aware of the
* interrupt-controller.
*
@@ -28,41 +43,68 @@
*
* Controller mappings for all interrupt sources:
*/
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
+struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned = {
[0 ... NR_IRQS-1] = {
.status = IRQ_DISABLED,
- .handler = &no_irq_type,
- .lock = SPIN_LOCK_UNLOCKED
+ .chip = &no_irq_chip,
+ .handle_irq = handle_bad_irq,
+ .depth = 1,
+ .lock = SPIN_LOCK_UNLOCKED,
+#ifdef CONFIG_SMP
+ .affinity = CPU_MASK_ALL
+#endif
}
};
/*
- * Generic 'no controller' code
+ * What should we do if we get a hw irq event on an illegal vector?
+ * Each architecture has to answer this themself.
*/
-static void end_none(unsigned int irq) { }
-static void enable_none(unsigned int irq) { }
-static void disable_none(unsigned int irq) { }
-static void shutdown_none(unsigned int irq) { }
-static unsigned int startup_none(unsigned int irq) { return 0; }
-
-static void ack_none(unsigned int irq)
+static void ack_bad(unsigned int irq)
{
- /*
- * 'what should we do if we get a hw irq event on an illegal vector'.
- * each architecture has to answer this themself.
- */
+ print_irq_desc(irq, irq_desc + irq);
ack_bad_irq(irq);
}
-struct hw_interrupt_type no_irq_type = {
- .typename = "none",
- .startup = startup_none,
- .shutdown = shutdown_none,
- .enable = enable_none,
- .disable = disable_none,
- .ack = ack_none,
- .end = end_none,
- .set_affinity = NULL
+/*
+ * NOP functions
+ */
+static void noop(unsigned int irq)
+{
+}
+
+static unsigned int noop_ret(unsigned int irq)
+{
+ return 0;
+}
+
+/*
+ * Generic no controller implementation
+ */
+struct irq_chip no_irq_chip = {
+ .name = "none",
+ .startup = noop_ret,
+ .shutdown = noop,
+ .enable = noop,
+ .disable = noop,
+ .ack = ack_bad,
+ .end = noop,
+};
+
+/*
+ * Generic dummy implementation which can be used for
+ * real dumb interrupt sources
+ */
+struct irq_chip dummy_irq_chip = {
+ .name = "dummy",
+ .startup = noop_ret,
+ .shutdown = noop,
+ .enable = noop,
+ .disable = noop,
+ .ack = noop,
+ .mask = noop,
+ .unmask = noop,
+ .end = noop,
};
/*
@@ -73,17 +115,24 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
return IRQ_NONE;
}
-/*
- * Have got an event to handle:
+/**
+ * handle_IRQ_event - irq action chain handler
+ * @irq: the interrupt number
+ * @regs: pointer to a register structure
+ * @action: the interrupt action chain for this irq
+ *
+ * Handles the action chain of an irq event
*/
-fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
- struct irqaction *action)
+irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
+ struct irqaction *action)
{
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
- if (!(action->flags & SA_INTERRUPT))
- local_irq_enable();
+ handle_dynamic_tick(action);
+
+ if (!(action->flags & IRQF_DISABLED))
+ local_irq_enable_in_hardirq();
do {
ret = action->handler(irq, action->dev_id, regs);
@@ -93,22 +142,29 @@ fastcall irqreturn_t handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
action = action->next;
} while (action);
- if (status & SA_SAMPLE_RANDOM)
+ if (status & IRQF_SAMPLE_RANDOM)
add_interrupt_randomness(irq);
local_irq_disable();
return retval;
}
-/*
- * do_IRQ handles all normal device IRQ's (the special
+/**
+ * __do_IRQ - original all in one highlevel IRQ handler
+ * @irq: the interrupt number
+ * @regs: pointer to a register structure
+ *
+ * __do_IRQ handles all normal device IRQ's (the special
* SMP cross-CPU interrupts have their own specific
* handlers).
+ *
+ * This is the original x86 implementation which is used for every
+ * interrupt type.
*/
fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
{
- irq_desc_t *desc = irq_desc + irq;
- struct irqaction * action;
+ struct irq_desc *desc = irq_desc + irq;
+ struct irqaction *action;
unsigned int status;
kstat_this_cpu.irqs[irq]++;
@@ -118,16 +174,16 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
/*
* No locking required for CPU-local interrupts:
*/
- if (desc->handler->ack)
- desc->handler->ack(irq);
+ if (desc->chip->ack)
+ desc->chip->ack(irq);
action_ret = handle_IRQ_event(irq, regs, desc->action);
- desc->handler->end(irq);
+ desc->chip->end(irq);
return 1;
}
spin_lock(&desc->lock);
- if (desc->handler->ack)
- desc->handler->ack(irq);
+ if (desc->chip->ack)
+ desc->chip->ack(irq);
/*
* REPLAY is when Linux resends an IRQ that was dropped earlier
* WAITING is used by probe to mark irqs that are being tested
@@ -187,9 +243,25 @@ out:
* The ->end() handler has to deal with interrupts which got
* disabled while the handler was running.
*/
- desc->handler->end(irq);
+ desc->chip->end(irq);
spin_unlock(&desc->lock);
return 1;
}
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * lockdep: we want to handle all irq_desc locks as a single lock-class:
+ */
+static struct lock_class_key irq_desc_lock_class;
+
+void early_init_irq_lock_class(void)
+{
+ int i;
+
+ for (i = 0; i < NR_IRQS; i++)
+ lockdep_set_class(&irq_desc[i].lock, &irq_desc_lock_class);
+}
+
+#endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 46feba63026..08a849a2244 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -4,6 +4,12 @@
extern int noirqdebug;
+/* Set default functions for irq_chip structures: */
+extern void irq_chip_set_defaults(struct irq_chip *chip);
+
+/* Set default handler: */
+extern void compat_irq_chip_set_default_handler(struct irq_desc *desc);
+
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq);
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
@@ -16,3 +22,43 @@ static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
#endif
+/*
+ * Debugging printout:
+ */
+
+#include <linux/kallsyms.h>
+
+#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+
+static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
+{
+ printk("irq %d, desc: %p, depth: %d, count: %d, unhandled: %d\n",
+ irq, desc, desc->depth, desc->irq_count, desc->irqs_unhandled);
+ printk("->handle_irq(): %p, ", desc->handle_irq);
+ print_symbol("%s\n", (unsigned long)desc->handle_irq);
+ printk("->chip(): %p, ", desc->chip);
+ print_symbol("%s\n", (unsigned long)desc->chip);
+ printk("->action(): %p\n", desc->action);
+ if (desc->action) {
+ printk("->action->handler(): %p, ", desc->action->handler);
+ print_symbol("%s\n", (unsigned long)desc->action->handler);
+ }
+
+ P(IRQ_INPROGRESS);
+ P(IRQ_DISABLED);
+ P(IRQ_PENDING);
+ P(IRQ_REPLAY);
+ P(IRQ_AUTODETECT);
+ P(IRQ_WAITING);
+ P(IRQ_LEVEL);
+ P(IRQ_MASKED);
+#ifdef CONFIG_IRQ_PER_CPU
+ P(IRQ_PER_CPU);
+#endif
+ P(IRQ_NOPROBE);
+ P(IRQ_NOREQUEST);
+ P(IRQ_NOAUTOEN);
+}
+
+#undef P
+
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1279e349953..4e461438e48 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1,12 +1,12 @@
/*
* linux/kernel/irq/manage.c
*
- * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006 Thomas Gleixner
*
* This file contains driver APIs to the irq subsystem.
*/
-#include <linux/config.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/random.h>
@@ -16,12 +16,6 @@
#ifdef CONFIG_SMP
-cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
-
-#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
-cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
-#endif
-
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
* @irq: interrupt number to wait for
@@ -42,7 +36,6 @@ void synchronize_irq(unsigned int irq)
while (desc->status & IRQ_INPROGRESS)
cpu_relax();
}
-
EXPORT_SYMBOL(synchronize_irq);
#endif
@@ -60,7 +53,7 @@ EXPORT_SYMBOL(synchronize_irq);
*/
void disable_irq_nosync(unsigned int irq)
{
- irq_desc_t *desc = irq_desc + irq;
+ struct irq_desc *desc = irq_desc + irq;
unsigned long flags;
if (irq >= NR_IRQS)
@@ -69,11 +62,10 @@ void disable_irq_nosync(unsigned int irq)
spin_lock_irqsave(&desc->lock, flags);
if (!desc->depth++) {
desc->status |= IRQ_DISABLED;
- desc->handler->disable(irq);
+ desc->chip->disable(irq);
}
spin_unlock_irqrestore(&desc->lock, flags);
}
-
EXPORT_SYMBOL(disable_irq_nosync);
/**
@@ -90,7 +82,7 @@ EXPORT_SYMBOL(disable_irq_nosync);
*/
void disable_irq(unsigned int irq)
{
- irq_desc_t *desc = irq_desc + irq;
+ struct irq_desc *desc = irq_desc + irq;
if (irq >= NR_IRQS)
return;
@@ -99,7 +91,6 @@ void disable_irq(unsigned int irq)
if (desc->action)
synchronize_irq(irq);
}
-
EXPORT_SYMBOL(disable_irq);
/**
@@ -114,7 +105,7 @@ EXPORT_SYMBOL(disable_irq);
*/
void enable_irq(unsigned int irq)
{
- irq_desc_t *desc = irq_desc + irq;
+ struct irq_desc *desc = irq_desc + irq;
unsigned long flags;
if (irq >= NR_IRQS)
@@ -123,17 +114,15 @@ void enable_irq(unsigned int irq)
spin_lock_irqsave(&desc->lock, flags);
switch (desc->depth) {
case 0:
+ printk(KERN_WARNING "Unbalanced enable for IRQ %d\n", irq);
WARN_ON(1);
break;
case 1: {
unsigned int status = desc->status & ~IRQ_DISABLED;
- desc->status = status;
- if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
- desc->status = status | IRQ_REPLAY;
- hw_resend_irq(desc->handler,irq);
- }
- desc->handler->enable(irq);
+ /* Prevent probing on this irq: */
+ desc->status = status | IRQ_NOPROBE;
+ check_irq_resend(desc, irq);
/* fall-through */
}
default:
@@ -141,9 +130,29 @@ void enable_irq(unsigned int irq)
}
spin_unlock_irqrestore(&desc->lock, flags);
}
-
EXPORT_SYMBOL(enable_irq);
+/**
+ * set_irq_wake - control irq power management wakeup
+ * @irq: interrupt to control
+ * @on: enable/disable power management wakeup
+ *
+ * Enable/disable power management wakeup mode
+ */
+int set_irq_wake(unsigned int irq, unsigned int on)
+{
+ struct irq_desc *desc = irq_desc + irq;
+ unsigned long flags;
+ int ret = -ENXIO;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ if (desc->chip->set_wake)
+ ret = desc->chip->set_wake(irq, on);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(set_irq_wake);
+
/*
* Internal function that tells the architecture code whether a
* particular irq has been exclusively allocated or is available
@@ -153,22 +162,33 @@ int can_request_irq(unsigned int irq, unsigned long irqflags)
{
struct irqaction *action;
- if (irq >= NR_IRQS)
+ if (irq >= NR_IRQS || irq_desc[irq].status & IRQ_NOREQUEST)
return 0;
action = irq_desc[irq].action;
if (action)
- if (irqflags & action->flags & SA_SHIRQ)
+ if (irqflags & action->flags & IRQF_SHARED)
action = NULL;
return !action;
}
+void compat_irq_chip_set_default_handler(struct irq_desc *desc)
+{
+ /*
+ * If the architecture still has not overriden
+ * the flow handler then zap the default. This
+ * should catch incorrect flow-type setting.
+ */
+ if (desc->handle_irq == &handle_bad_irq)
+ desc->handle_irq = NULL;
+}
+
/*
* Internal function to register an irqaction - typically used to
* allocate special interrupts that are part of the architecture.
*/
-int setup_irq(unsigned int irq, struct irqaction * new)
+int setup_irq(unsigned int irq, struct irqaction *new)
{
struct irq_desc *desc = irq_desc + irq;
struct irqaction *old, **p;
@@ -178,14 +198,14 @@ int setup_irq(unsigned int irq, struct irqaction * new)
if (irq >= NR_IRQS)
return -EINVAL;
- if (desc->handler == &no_irq_type)
+ if (desc->chip == &no_irq_chip)
return -ENOSYS;
/*
* Some drivers like serial.c use request_irq() heavily,
* so we have to be careful not to interfere with a
* running system.
*/
- if (new->flags & SA_SAMPLE_RANDOM) {
+ if (new->flags & IRQF_SAMPLE_RANDOM) {
/*
* This function might sleep, we want to call it first,
* outside of the atomic block.
@@ -200,16 +220,24 @@ int setup_irq(unsigned int irq, struct irqaction * new)
/*
* The following block of code has to be executed atomically
*/
- spin_lock_irqsave(&desc->lock,flags);
+ spin_lock_irqsave(&desc->lock, flags);
p = &desc->action;
- if ((old = *p) != NULL) {
- /* Can't share interrupts unless both agree to */
- if (!(old->flags & new->flags & SA_SHIRQ))
+ old = *p;
+ if (old) {
+ /*
+ * Can't share interrupts unless both agree to and are
+ * the same type (level, edge, polarity). So both flag
+ * fields must have IRQF_SHARED set and the bits which
+ * set the trigger type must match.
+ */
+ if (!((old->flags & new->flags) & IRQF_SHARED) ||
+ ((old->flags ^ new->flags) & IRQF_TRIGGER_MASK))
goto mismatch;
-#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
+#if defined(CONFIG_IRQ_PER_CPU)
/* All handlers must agree on per-cpuness */
- if ((old->flags & IRQ_PER_CPU) != (new->flags & IRQ_PER_CPU))
+ if ((old->flags & IRQF_PERCPU) !=
+ (new->flags & IRQF_PERCPU))
goto mismatch;
#endif
@@ -222,20 +250,45 @@ int setup_irq(unsigned int irq, struct irqaction * new)
}
*p = new;
-#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
- if (new->flags & SA_PERCPU_IRQ)
+#if defined(CONFIG_IRQ_PER_CPU)
+ if (new->flags & IRQF_PERCPU)
desc->status |= IRQ_PER_CPU;
#endif
if (!shared) {
- desc->depth = 0;
- desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
- IRQ_WAITING | IRQ_INPROGRESS);
- if (desc->handler->startup)
- desc->handler->startup(irq);
- else
- desc->handler->enable(irq);
+ irq_chip_set_defaults(desc->chip);
+
+ /* Setup the type (level, edge polarity) if configured: */
+ if (new->flags & IRQF_TRIGGER_MASK) {
+ if (desc->chip && desc->chip->set_type)
+ desc->chip->set_type(irq,
+ new->flags & IRQF_TRIGGER_MASK);
+ else
+ /*
+ * IRQF_TRIGGER_* but the PIC does not support
+ * multiple flow-types?
+ */
+ printk(KERN_WARNING "No IRQF_TRIGGER set_type "
+ "function for IRQ %d (%s)\n", irq,
+ desc->chip ? desc->chip->name :
+ "unknown");
+ } else
+ compat_irq_chip_set_default_handler(desc);
+
+ desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
+ IRQ_INPROGRESS);
+
+ if (!(desc->status & IRQ_NOAUTOEN)) {
+ desc->depth = 0;
+ desc->status &= ~IRQ_DISABLED;
+ if (desc->chip->startup)
+ desc->chip->startup(irq);
+ else
+ desc->chip->enable(irq);
+ } else
+ /* Undo nested disables: */
+ desc->depth = 1;
}
- spin_unlock_irqrestore(&desc->lock,flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
new->irq = irq;
register_irq_proc(irq);
@@ -246,8 +299,8 @@ int setup_irq(unsigned int irq, struct irqaction * new)
mismatch:
spin_unlock_irqrestore(&desc->lock, flags);
- if (!(new->flags & SA_PROBEIRQ)) {
- printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
+ if (!(new->flags & IRQF_PROBE_SHARED)) {
+ printk(KERN_ERR "IRQ handler type mismatch for IRQ %d\n", irq);
dump_stack();
}
return -EBUSY;
@@ -278,10 +331,10 @@ void free_irq(unsigned int irq, void *dev_id)
return;
desc = irq_desc + irq;
- spin_lock_irqsave(&desc->lock,flags);
+ spin_lock_irqsave(&desc->lock, flags);
p = &desc->action;
for (;;) {
- struct irqaction * action = *p;
+ struct irqaction *action = *p;
if (action) {
struct irqaction **pp = p;
@@ -295,18 +348,18 @@ void free_irq(unsigned int irq, void *dev_id)
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
- if (desc->handler->release)
- desc->handler->release(irq, dev_id);
+ if (desc->chip->release)
+ desc->chip->release(irq, dev_id);
#endif
if (!desc->action) {
desc->status |= IRQ_DISABLED;
- if (desc->handler->shutdown)
- desc->handler->shutdown(irq);
+ if (desc->chip->shutdown)
+ desc->chip->shutdown(irq);
else
- desc->handler->disable(irq);
+ desc->chip->disable(irq);
}
- spin_unlock_irqrestore(&desc->lock,flags);
+ spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
/* Make sure it's not being used on another CPU */
@@ -314,12 +367,11 @@ void free_irq(unsigned int irq, void *dev_id)
kfree(action);
return;
}
- printk(KERN_ERR "Trying to free free IRQ%d\n",irq);
- spin_unlock_irqrestore(&desc->lock,flags);
+ printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
+ spin_unlock_irqrestore(&desc->lock, flags);
return;
}
}
-
EXPORT_SYMBOL(free_irq);
/**
@@ -346,28 +398,36 @@ EXPORT_SYMBOL(free_irq);
*
* Flags:
*
- * SA_SHIRQ Interrupt is shared
- * SA_INTERRUPT Disable local interrupts while processing
- * SA_SAMPLE_RANDOM The interrupt can be used for entropy
+ * IRQF_SHARED Interrupt is shared
+ * IRQF_DISABLED Disable local interrupts while processing
+ * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy
*
*/
int request_irq(unsigned int irq,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
- unsigned long irqflags, const char * devname, void *dev_id)
+ unsigned long irqflags, const char *devname, void *dev_id)
{
- struct irqaction * action;
+ struct irqaction *action;
int retval;
+#ifdef CONFIG_LOCKDEP
+ /*
+ * Lockdep wants atomic interrupt handlers:
+ */
+ irqflags |= SA_INTERRUPT;
+#endif
/*
* Sanity-check: shared interrupts must pass in a real dev-ID,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing
* logic etc).
*/
- if ((irqflags & SA_SHIRQ) && !dev_id)
+ if ((irqflags & IRQF_SHARED) && !dev_id)
return -EINVAL;
if (irq >= NR_IRQS)
return -EINVAL;
+ if (irq_desc[irq].status & IRQ_NOREQUEST)
+ return -EINVAL;
if (!handler)
return -EINVAL;
@@ -390,6 +450,5 @@ int request_irq(unsigned int irq,
return retval;
}
-
EXPORT_SYMBOL(request_irq);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index a12d00eb5e7..a57ebe9fa6f 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -3,19 +3,19 @@
void set_pending_irq(unsigned int irq, cpumask_t mask)
{
- irq_desc_t *desc = irq_desc + irq;
+ struct irq_desc *desc = irq_desc + irq;
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
desc->move_irq = 1;
- pending_irq_cpumask[irq] = mask;
+ irq_desc[irq].pending_mask = mask;
spin_unlock_irqrestore(&desc->lock, flags);
}
void move_native_irq(int irq)
{
+ struct irq_desc *desc = irq_desc + irq;
cpumask_t tmp;
- irq_desc_t *desc = irq_descp(irq);
if (likely(!desc->move_irq))
return;
@@ -30,15 +30,15 @@ void move_native_irq(int irq)
desc->move_irq = 0;
- if (unlikely(cpus_empty(pending_irq_cpumask[irq])))
+ if (unlikely(cpus_empty(irq_desc[irq].pending_mask)))
return;
- if (!desc->handler->set_affinity)
+ if (!desc->chip->set_affinity)
return;
assert_spin_locked(&desc->lock);
- cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
+ cpus_and(tmp, irq_desc[irq].pending_mask, cpu_online_map);
/*
* If there was a valid mask to work with, please
@@ -51,12 +51,12 @@ void move_native_irq(int irq)
*/
if (likely(!cpus_empty(tmp))) {
if (likely(!(desc->status & IRQ_DISABLED)))
- desc->handler->disable(irq);
+ desc->chip->disable(irq);
- desc->handler->set_affinity(irq,tmp);
+ desc->chip->set_affinity(irq,tmp);
if (likely(!(desc->status & IRQ_DISABLED)))
- desc->handler->enable(irq);
+ desc->chip->enable(irq);
}
- cpus_clear(pending_irq_cpumask[irq]);
+ cpus_clear(irq_desc[irq].pending_mask);
}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index afacd6f585f..607c7809ad0 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -12,15 +12,10 @@
#include "internals.h"
-static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
+static struct proc_dir_entry *root_irq_dir;
#ifdef CONFIG_SMP
-/*
- * The /proc/irq/<irq>/smp_affinity values:
- */
-static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
-
#ifdef CONFIG_GENERIC_PENDING_IRQ
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
@@ -36,15 +31,15 @@ void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
{
set_balance_irq_affinity(irq, mask_val);
- irq_affinity[irq] = mask_val;
- irq_desc[irq].handler->set_affinity(irq, mask_val);
+ irq_desc[irq].affinity = mask_val;
+ irq_desc[irq].chip->set_affinity(irq, mask_val);
}
#endif
static int irq_affinity_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]);
+ int len = cpumask_scnprintf(page, count, irq_desc[(long)data].affinity);
if (count - len < 2)
return -EINVAL;
@@ -59,7 +54,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
unsigned int irq = (int)(long)data, full_count = count, err;
cpumask_t new_value, tmp;
- if (!irq_desc[irq].handler->set_affinity || no_irq_affinity)
+ if (!irq_desc[irq].chip->set_affinity || no_irq_affinity)
return -EIO;
err = cpumask_parse(buffer, count, new_value);
@@ -102,7 +97,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
{
char name [MAX_NAMELEN];
- if (!irq_dir[irq] || action->dir || !action->name ||
+ if (!irq_desc[irq].dir || action->dir || !action->name ||
!name_unique(irq, action))
return;
@@ -110,7 +105,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
snprintf(name, MAX_NAMELEN, "%s", action->name);
/* create /proc/irq/1234/handler/ */
- action->dir = proc_mkdir(name, irq_dir[irq]);
+ action->dir = proc_mkdir(name, irq_desc[irq].dir);
}
#undef MAX_NAMELEN
@@ -122,22 +117,22 @@ void register_irq_proc(unsigned int irq)
char name [MAX_NAMELEN];
if (!root_irq_dir ||
- (irq_desc[irq].handler == &no_irq_type) ||
- irq_dir[irq])
+ (irq_desc[irq].chip == &no_irq_chip) ||
+ irq_desc[irq].dir)
return;
memset(name, 0, MAX_NAMELEN);
sprintf(name, "%d", irq);
/* create /proc/irq/1234 */
- irq_dir[irq] = proc_mkdir(name, root_irq_dir);
+ irq_desc[irq].dir = proc_mkdir(name, root_irq_dir);
#ifdef CONFIG_SMP
{
struct proc_dir_entry *entry;
/* create /proc/irq/<irq>/smp_affinity */
- entry = create_proc_entry("smp_affinity", 0600, irq_dir[irq]);
+ entry = create_proc_entry("smp_affinity", 0600, irq_desc[irq].dir);
if (entry) {
entry->nlink = 1;
@@ -145,7 +140,6 @@ void register_irq_proc(unsigned int irq)
entry->read_proc = irq_affinity_read_proc;
entry->write_proc = irq_affinity_write_proc;
}
- smp_affinity_entry[irq] = entry;
}
#endif
}
@@ -155,7 +149,7 @@ void register_irq_proc(unsigned int irq)
void unregister_handler_proc(unsigned int irq, struct irqaction *action)
{
if (action->dir)
- remove_proc_entry(action->dir->name, irq_dir[irq]);
+ remove_proc_entry(action->dir->name, irq_desc[irq].dir);
}
void init_irq_proc(void)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
new file mode 100644
index 00000000000..872f91ba2ce
--- /dev/null
+++ b/kernel/irq/resend.c
@@ -0,0 +1,78 @@
+/*
+ * linux/kernel/irq/resend.c
+ *
+ * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar
+ * Copyright (C) 2005-2006, Thomas Gleixner
+ *
+ * This file contains the IRQ-resend code
+ *
+ * If the interrupt is waiting to be processed, we try to re-run it.
+ * We can't directly run it from here since the caller might be in an
+ * interrupt-protected region. Not all irq controller chips can
+ * retrigger interrupts at the hardware level, so in those cases
+ * we allow the resending of IRQs via a tasklet.
+ */
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/random.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
+
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+
+/* Bitmap to handle software resend of interrupts: */
+static DECLARE_BITMAP(irqs_resend, NR_IRQS);
+
+/*
+ * Run software resends of IRQ's
+ */
+static void resend_irqs(unsigned long arg)
+{
+ struct irq_desc *desc;
+ int irq;
+
+ while (!bitmap_empty(irqs_resend, NR_IRQS)) {
+ irq = find_first_bit(irqs_resend, NR_IRQS);
+ clear_bit(irq, irqs_resend);
+ desc = irq_desc + irq;
+ local_irq_disable();
+ desc->handle_irq(irq, desc, NULL);
+ local_irq_enable();
+ }
+}
+
+/* Tasklet to handle resend: */
+static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
+
+#endif
+
+/*
+ * IRQ resend
+ *
+ * Is called with interrupts disabled and desc->lock held.
+ */
+void check_irq_resend(struct irq_desc *desc, unsigned int irq)
+{
+ unsigned int status = desc->status;
+
+ /*
+ * Make sure the interrupt is enabled, before resending it:
+ */
+ desc->chip->enable(irq);
+
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status &= ~IRQ_PENDING;
+ desc->status = status | IRQ_REPLAY;
+
+ if (!desc->chip || !desc->chip->retrigger ||
+ !desc->chip->retrigger(irq)) {
+#ifdef CONFIG_HARDIRQS_SW_RESEND
+ /* Set it pending and activate the softirq: */
+ set_bit(irq, irqs_resend);
+ tasklet_schedule(&resend_tasklet);
+#endif
+ }
+ }
+}
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index b2fb3c18d06..417e98092cf 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -16,39 +16,39 @@ static int irqfixup __read_mostly;
/*
* Recovery handler for misrouted interrupts.
*/
-
static int misrouted_irq(int irq, struct pt_regs *regs)
{
int i;
- irq_desc_t *desc;
int ok = 0;
int work = 0; /* Did we do work for a real IRQ */
- for(i = 1; i < NR_IRQS; i++) {
+ for (i = 1; i < NR_IRQS; i++) {
+ struct irq_desc *desc = irq_desc + i;
struct irqaction *action;
if (i == irq) /* Already tried */
continue;
- desc = &irq_desc[i];
+
spin_lock(&desc->lock);
- action = desc->action;
/* Already running on another processor */
if (desc->status & IRQ_INPROGRESS) {
/*
* Already running: If it is shared get the other
* CPU to go looking for our mystery interrupt too
*/
- if (desc->action && (desc->action->flags & SA_SHIRQ))
+ if (desc->action && (desc->action->flags & IRQF_SHARED))
desc->status |= IRQ_PENDING;
spin_unlock(&desc->lock);
continue;
}
/* Honour the normal IRQ locking */
desc->status |= IRQ_INPROGRESS;
+ action = desc->action;
spin_unlock(&desc->lock);
+
while (action) {
/* Only shared IRQ handlers are safe to call */
- if (action->flags & SA_SHIRQ) {
+ if (action->flags & IRQF_SHARED) {
if (action->handler(i, action->dev_id, regs) ==
IRQ_HANDLED)
ok = 1;
@@ -62,9 +62,8 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
/*
* While we were looking for a fixup someone queued a real
- * IRQ clashing with our walk
+ * IRQ clashing with our walk:
*/
-
while ((desc->status & IRQ_PENDING) && action) {
/*
* Perform real IRQ processing for the IRQ we deferred
@@ -80,8 +79,8 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
* If we did actual work for the real IRQ line we must let the
* IRQ controller clean up too
*/
- if(work)
- desc->handler->end(i);
+ if (work && desc->chip && desc->chip->end)
+ desc->chip->end(i);
spin_unlock(&desc->lock);
}
/* So the caller can adjust the irq error counts */
@@ -100,7 +99,8 @@ static int misrouted_irq(int irq, struct pt_regs *regs)
*/
static void
-__report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+__report_bad_irq(unsigned int irq, struct irq_desc *desc,
+ irqreturn_t action_ret)
{
struct irqaction *action;
@@ -113,6 +113,7 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
}
dump_stack();
printk(KERN_ERR "handlers:\n");
+
action = desc->action;
while (action) {
printk(KERN_ERR "[<%p>]", action->handler);
@@ -123,7 +124,8 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
}
}
-static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
+static void
+report_bad_irq(unsigned int irq, struct irq_desc *desc, irqreturn_t action_ret)
{
static int count = 100;
@@ -133,8 +135,8 @@ static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t actio
}
}
-void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
- struct pt_regs *regs)
+void note_interrupt(unsigned int irq, struct irq_desc *desc,
+ irqreturn_t action_ret, struct pt_regs *regs)
{
if (unlikely(action_ret != IRQ_HANDLED)) {
desc->irqs_unhandled++;
@@ -166,7 +168,8 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
*/
printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
desc->status |= IRQ_DISABLED;
- desc->handler->disable(irq);
+ desc->depth = 1;
+ desc->chip->disable(irq);
}
desc->irqs_unhandled = 0;
}
@@ -177,6 +180,7 @@ int __init noirqdebug_setup(char *str)
{
noirqdebug = 1;
printk(KERN_INFO "IRQ lockup detection disabled\n");
+
return 1;
}
@@ -187,6 +191,7 @@ static int __init irqfixup_setup(char *str)
irqfixup = 1;
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
printk(KERN_WARNING "This may impact system performance.\n");
+
return 1;
}
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 58f0f382597..50087ecf337 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1042,7 +1042,6 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
void crash_kexec(struct pt_regs *regs)
{
- struct kimage *image;
int locked;
@@ -1056,12 +1055,11 @@ void crash_kexec(struct pt_regs *regs)
*/
locked = xchg(&kexec_lock, 1);
if (!locked) {
- image = xchg(&kexec_crash_image, NULL);
- if (image) {
+ if (kexec_crash_image) {
struct pt_regs fixed_regs;
crash_setup_regs(&fixed_regs, regs);
machine_crash_shutdown(&fixed_regs);
- machine_kexec(image);
+ machine_kexec(kexec_crash_image);
}
xchg(&kexec_lock, 0);
}
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 20a997c73c3..1d32defa38a 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -20,7 +20,6 @@
*/
#define __KERNEL_SYSCALLS__
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
@@ -234,7 +233,7 @@ static void __call_usermodehelper(void *data)
int call_usermodehelper_keys(char *path, char **argv, char **envp,
struct key *session_keyring, int wait)
{
- DECLARE_COMPLETION(done);
+ DECLARE_COMPLETION_ONSTACK(done);
struct subprocess_info sub_info = {
.complete = &done,
.path = path,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 9e28478a17a..e0ffe4ab091 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -8,7 +8,6 @@
*
*/
-#include <linux/config.h>
#include <linux/kobject.h>
#include <linux/string.h>
#include <linux/sysfs.h>
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
new file mode 100644
index 00000000000..f32ca78c198
--- /dev/null
+++ b/kernel/lockdep.c
@@ -0,0 +1,2702 @@
+/*
+ * kernel/lockdep.c
+ *
+ * Runtime locking correctness validator
+ *
+ * Started by Ingo Molnar:
+ *
+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * this code maps all the lock dependencies as they occur in a live kernel
+ * and will warn about the following classes of locking bugs:
+ *
+ * - lock inversion scenarios
+ * - circular lock dependencies
+ * - hardirq/softirq safe/unsafe locking bugs
+ *
+ * Bugs are reported even if the current locking scenario does not cause
+ * any deadlock at this point.
+ *
+ * I.e. if anytime in the past two locks were taken in a different order,
+ * even if it happened for another task, even if those were different
+ * locks (but of the same class as this lock), this code will detect it.
+ *
+ * Thanks to Arjan van de Ven for coming up with the initial idea of
+ * mapping lock dependencies runtime.
+ */
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/kallsyms.h>
+#include <linux/interrupt.h>
+#include <linux/stacktrace.h>
+#include <linux/debug_locks.h>
+#include <linux/irqflags.h>
+
+#include <asm/sections.h>
+
+#include "lockdep_internals.h"
+
+/*
+ * hash_lock: protects the lockdep hashes and class/list/hash allocators.
+ *
+ * This is one of the rare exceptions where it's justified
+ * to use a raw spinlock - we really dont want the spinlock
+ * code to recurse back into the lockdep code.
+ */
+static raw_spinlock_t hash_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+
+static int lockdep_initialized;
+
+unsigned long nr_list_entries;
+static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
+
+/*
+ * Allocate a lockdep entry. (assumes hash_lock held, returns
+ * with NULL on failure)
+ */
+static struct lock_list *alloc_list_entry(void)
+{
+ if (nr_list_entries >= MAX_LOCKDEP_ENTRIES) {
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
+ printk("turning off the locking correctness validator.\n");
+ return NULL;
+ }
+ return list_entries + nr_list_entries++;
+}
+
+/*
+ * All data structures here are protected by the global debug_lock.
+ *
+ * Mutex key structs only get allocated, once during bootup, and never
+ * get freed - this significantly simplifies the debugging code.
+ */
+unsigned long nr_lock_classes;
+static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+
+/*
+ * We keep a global list of all lock classes. The list only grows,
+ * never shrinks. The list is only accessed with the lockdep
+ * spinlock lock held.
+ */
+LIST_HEAD(all_lock_classes);
+
+/*
+ * The lockdep classes are in a hash-table as well, for fast lookup:
+ */
+#define CLASSHASH_BITS (MAX_LOCKDEP_KEYS_BITS - 1)
+#define CLASSHASH_SIZE (1UL << CLASSHASH_BITS)
+#define CLASSHASH_MASK (CLASSHASH_SIZE - 1)
+#define __classhashfn(key) ((((unsigned long)key >> CLASSHASH_BITS) + (unsigned long)key) & CLASSHASH_MASK)
+#define classhashentry(key) (classhash_table + __classhashfn((key)))
+
+static struct list_head classhash_table[CLASSHASH_SIZE];
+
+unsigned long nr_lock_chains;
+static struct lock_chain lock_chains[MAX_LOCKDEP_CHAINS];
+
+/*
+ * We put the lock dependency chains into a hash-table as well, to cache
+ * their existence:
+ */
+#define CHAINHASH_BITS (MAX_LOCKDEP_CHAINS_BITS-1)
+#define CHAINHASH_SIZE (1UL << CHAINHASH_BITS)
+#define CHAINHASH_MASK (CHAINHASH_SIZE - 1)
+#define __chainhashfn(chain) \
+ (((chain >> CHAINHASH_BITS) + chain) & CHAINHASH_MASK)
+#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
+
+static struct list_head chainhash_table[CHAINHASH_SIZE];
+
+/*
+ * The hash key of the lock dependency chains is a hash itself too:
+ * it's a hash of all locks taken up to that lock, including that lock.
+ * It's a 64-bit hash, because it's important for the keys to be
+ * unique.
+ */
+#define iterate_chain_key(key1, key2) \
+ (((key1) << MAX_LOCKDEP_KEYS_BITS/2) ^ \
+ ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS/2)) ^ \
+ (key2))
+
+void lockdep_off(void)
+{
+ current->lockdep_recursion++;
+}
+
+EXPORT_SYMBOL(lockdep_off);
+
+void lockdep_on(void)
+{
+ current->lockdep_recursion--;
+}
+
+EXPORT_SYMBOL(lockdep_on);
+
+int lockdep_internal(void)
+{
+ return current->lockdep_recursion != 0;
+}
+
+EXPORT_SYMBOL(lockdep_internal);
+
+/*
+ * Debugging switches:
+ */
+
+#define VERBOSE 0
+#ifdef VERBOSE
+# define VERY_VERBOSE 0
+#endif
+
+#if VERBOSE
+# define HARDIRQ_VERBOSE 1
+# define SOFTIRQ_VERBOSE 1
+#else
+# define HARDIRQ_VERBOSE 0
+# define SOFTIRQ_VERBOSE 0
+#endif
+
+#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
+/*
+ * Quick filtering for interesting events:
+ */
+static int class_filter(struct lock_class *class)
+{
+ if (class->name_version == 1 &&
+ !strcmp(class->name, "&rl->lock"))
+ return 1;
+ if (class->name_version == 1 &&
+ !strcmp(class->name, "&ni->mrec_lock"))
+ return 1;
+ if (class->name_version == 1 &&
+ !strcmp(class->name, "mft_ni_runlist_lock"))
+ return 1;
+ if (class->name_version == 1 &&
+ !strcmp(class->name, "mft_ni_mrec_lock"))
+ return 1;
+ if (class->name_version == 1 &&
+ !strcmp(class->name, "&vol->lcnbmp_lock"))
+ return 1;
+ return 0;
+}
+#endif
+
+static int verbose(struct lock_class *class)
+{
+#if VERBOSE
+ return class_filter(class);
+#endif
+ return 0;
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+static int hardirq_verbose(struct lock_class *class)
+{
+#if HARDIRQ_VERBOSE
+ return class_filter(class);
+#endif
+ return 0;
+}
+
+static int softirq_verbose(struct lock_class *class)
+{
+#if SOFTIRQ_VERBOSE
+ return class_filter(class);
+#endif
+ return 0;
+}
+
+#endif
+
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the hash_lock.
+ */
+unsigned long nr_stack_trace_entries;
+static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
+
+static int save_trace(struct stack_trace *trace)
+{
+ trace->nr_entries = 0;
+ trace->max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
+ trace->entries = stack_trace + nr_stack_trace_entries;
+
+ save_stack_trace(trace, NULL, 0, 3);
+
+ trace->max_entries = trace->nr_entries;
+
+ nr_stack_trace_entries += trace->nr_entries;
+ if (DEBUG_LOCKS_WARN_ON(nr_stack_trace_entries > MAX_STACK_TRACE_ENTRIES))
+ return 0;
+
+ if (nr_stack_trace_entries == MAX_STACK_TRACE_ENTRIES) {
+ __raw_spin_unlock(&hash_lock);
+ if (debug_locks_off()) {
+ printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
+ printk("turning off the locking correctness validator.\n");
+ dump_stack();
+ }
+ return 0;
+ }
+
+ return 1;
+}
+
+unsigned int nr_hardirq_chains;
+unsigned int nr_softirq_chains;
+unsigned int nr_process_chains;
+unsigned int max_lockdep_depth;
+unsigned int max_recursion_depth;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+/*
+ * We cannot printk in early bootup code. Not even early_printk()
+ * might work. So we mark any initialization errors and printk
+ * about it later on, in lockdep_info().
+ */
+static int lockdep_init_error;
+
+/*
+ * Various lockdep statistics:
+ */
+atomic_t chain_lookup_hits;
+atomic_t chain_lookup_misses;
+atomic_t hardirqs_on_events;
+atomic_t hardirqs_off_events;
+atomic_t redundant_hardirqs_on;
+atomic_t redundant_hardirqs_off;
+atomic_t softirqs_on_events;
+atomic_t softirqs_off_events;
+atomic_t redundant_softirqs_on;
+atomic_t redundant_softirqs_off;
+atomic_t nr_unused_locks;
+atomic_t nr_cyclic_checks;
+atomic_t nr_cyclic_check_recursions;
+atomic_t nr_find_usage_forwards_checks;
+atomic_t nr_find_usage_forwards_recursions;
+atomic_t nr_find_usage_backwards_checks;
+atomic_t nr_find_usage_backwards_recursions;
+# define debug_atomic_inc(ptr) atomic_inc(ptr)
+# define debug_atomic_dec(ptr) atomic_dec(ptr)
+# define debug_atomic_read(ptr) atomic_read(ptr)
+#else
+# define debug_atomic_inc(ptr) do { } while (0)
+# define debug_atomic_dec(ptr) do { } while (0)
+# define debug_atomic_read(ptr) 0
+#endif
+
+/*
+ * Locking printouts:
+ */
+
+static const char *usage_str[] =
+{
+ [LOCK_USED] = "initial-use ",
+ [LOCK_USED_IN_HARDIRQ] = "in-hardirq-W",
+ [LOCK_USED_IN_SOFTIRQ] = "in-softirq-W",
+ [LOCK_ENABLED_SOFTIRQS] = "softirq-on-W",
+ [LOCK_ENABLED_HARDIRQS] = "hardirq-on-W",
+ [LOCK_USED_IN_HARDIRQ_READ] = "in-hardirq-R",
+ [LOCK_USED_IN_SOFTIRQ_READ] = "in-softirq-R",
+ [LOCK_ENABLED_SOFTIRQS_READ] = "softirq-on-R",
+ [LOCK_ENABLED_HARDIRQS_READ] = "hardirq-on-R",
+};
+
+const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
+{
+ unsigned long offs, size;
+ char *modname;
+
+ return kallsyms_lookup((unsigned long)key, &size, &offs, &modname, str);
+}
+
+void
+get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
+{
+ *c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
+
+ if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+ *c1 = '+';
+ else
+ if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
+ *c1 = '-';
+
+ if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+ *c2 = '+';
+ else
+ if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
+ *c2 = '-';
+
+ if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+ *c3 = '-';
+ if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
+ *c3 = '+';
+ if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+ *c3 = '?';
+ }
+
+ if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+ *c4 = '-';
+ if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
+ *c4 = '+';
+ if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+ *c4 = '?';
+ }
+}
+
+static void print_lock_name(struct lock_class *class)
+{
+ char str[128], c1, c2, c3, c4;
+ const char *name;
+
+ get_usage_chars(class, &c1, &c2, &c3, &c4);
+
+ name = class->name;
+ if (!name) {
+ name = __get_key_name(class->key, str);
+ printk(" (%s", name);
+ } else {
+ printk(" (%s", name);
+ if (class->name_version > 1)
+ printk("#%d", class->name_version);
+ if (class->subclass)
+ printk("/%d", class->subclass);
+ }
+ printk("){%c%c%c%c}", c1, c2, c3, c4);
+}
+
+static void print_lockdep_cache(struct lockdep_map *lock)
+{
+ const char *name;
+ char str[128];
+
+ name = lock->name;
+ if (!name)
+ name = __get_key_name(lock->key->subkeys, str);
+
+ printk("%s", name);
+}
+
+static void print_lock(struct held_lock *hlock)
+{
+ print_lock_name(hlock->class);
+ printk(", at: ");
+ print_ip_sym(hlock->acquire_ip);
+}
+
+static void lockdep_print_held_locks(struct task_struct *curr)
+{
+ int i, depth = curr->lockdep_depth;
+
+ if (!depth) {
+ printk("no locks held by %s/%d.\n", curr->comm, curr->pid);
+ return;
+ }
+ printk("%d lock%s held by %s/%d:\n",
+ depth, depth > 1 ? "s" : "", curr->comm, curr->pid);
+
+ for (i = 0; i < depth; i++) {
+ printk(" #%d: ", i);
+ print_lock(curr->held_locks + i);
+ }
+}
+/*
+ * Helper to print a nice hierarchy of lock dependencies:
+ */
+static void print_spaces(int nr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++)
+ printk(" ");
+}
+
+static void print_lock_class_header(struct lock_class *class, int depth)
+{
+ int bit;
+
+ print_spaces(depth);
+ printk("->");
+ print_lock_name(class);
+ printk(" ops: %lu", class->ops);
+ printk(" {\n");
+
+ for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
+ if (class->usage_mask & (1 << bit)) {
+ int len = depth;
+
+ print_spaces(depth);
+ len += printk(" %s", usage_str[bit]);
+ len += printk(" at:\n");
+ print_stack_trace(class->usage_traces + bit, len);
+ }
+ }
+ print_spaces(depth);
+ printk(" }\n");
+
+ print_spaces(depth);
+ printk(" ... key at: ");
+ print_ip_sym((unsigned long)class->key);
+}
+
+/*
+ * printk all lock dependencies starting at <entry>:
+ */
+static void print_lock_dependencies(struct lock_class *class, int depth)
+{
+ struct lock_list *entry;
+
+ if (DEBUG_LOCKS_WARN_ON(depth >= 20))
+ return;
+
+ print_lock_class_header(class, depth);
+
+ list_for_each_entry(entry, &class->locks_after, entry) {
+ DEBUG_LOCKS_WARN_ON(!entry->class);
+ print_lock_dependencies(entry->class, depth + 1);
+
+ print_spaces(depth);
+ printk(" ... acquired at:\n");
+ print_stack_trace(&entry->trace, 2);
+ printk("\n");
+ }
+}
+
+/*
+ * Add a new dependency to the head of the list:
+ */
+static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
+ struct list_head *head, unsigned long ip)
+{
+ struct lock_list *entry;
+ /*
+ * Lock not present yet - get a new dependency struct and
+ * add it to the list:
+ */
+ entry = alloc_list_entry();
+ if (!entry)
+ return 0;
+
+ entry->class = this;
+ save_trace(&entry->trace);
+
+ /*
+ * Since we never remove from the dependency list, the list can
+ * be walked lockless by other CPUs, it's only allocation
+ * that must be protected by the spinlock. But this also means
+ * we must make new entries visible only once writes to the
+ * entry become visible - hence the RCU op:
+ */
+ list_add_tail_rcu(&entry->entry, head);
+
+ return 1;
+}
+
+/*
+ * Recursive, forwards-direction lock-dependency checking, used for
+ * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
+ * checking.
+ *
+ * (to keep the stackframe of the recursive functions small we
+ * use these global variables, and we also mark various helper
+ * functions as noinline.)
+ */
+static struct held_lock *check_source, *check_target;
+
+/*
+ * Print a dependency chain entry (this is only done when a deadlock
+ * has been detected):
+ */
+static noinline int
+print_circular_bug_entry(struct lock_list *target, unsigned int depth)
+{
+ if (debug_locks_silent)
+ return 0;
+ printk("\n-> #%u", depth);
+ print_lock_name(target->class);
+ printk(":\n");
+ print_stack_trace(&target->trace, 6);
+
+ return 0;
+}
+
+/*
+ * When a circular dependency is detected, print the
+ * header first:
+ */
+static noinline int
+print_circular_bug_header(struct lock_list *entry, unsigned int depth)
+{
+ struct task_struct *curr = current;
+
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n=======================================================\n");
+ printk( "[ INFO: possible circular locking dependency detected ]\n");
+ printk( "-------------------------------------------------------\n");
+ printk("%s/%d is trying to acquire lock:\n",
+ curr->comm, curr->pid);
+ print_lock(check_source);
+ printk("\nbut task is already holding lock:\n");
+ print_lock(check_target);
+ printk("\nwhich lock already depends on the new lock.\n\n");
+ printk("\nthe existing dependency chain (in reverse order) is:\n");
+
+ print_circular_bug_entry(entry, depth);
+
+ return 0;
+}
+
+static noinline int print_circular_bug_tail(void)
+{
+ struct task_struct *curr = current;
+ struct lock_list this;
+
+ if (debug_locks_silent)
+ return 0;
+
+ this.class = check_source->class;
+ save_trace(&this.trace);
+ print_circular_bug_entry(&this, 0);
+
+ printk("\nother info that might help us debug this:\n\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+static int noinline print_infinite_recursion_bug(void)
+{
+ __raw_spin_unlock(&hash_lock);
+ DEBUG_LOCKS_WARN_ON(1);
+
+ return 0;
+}
+
+/*
+ * Prove that the dependency graph starting at <entry> can not
+ * lead to <target>. Print an error and return 0 if it does.
+ */
+static noinline int
+check_noncircular(struct lock_class *source, unsigned int depth)
+{
+ struct lock_list *entry;
+
+ debug_atomic_inc(&nr_cyclic_check_recursions);
+ if (depth > max_recursion_depth)
+ max_recursion_depth = depth;
+ if (depth >= 20)
+ return print_infinite_recursion_bug();
+ /*
+ * Check this lock's dependency list:
+ */
+ list_for_each_entry(entry, &source->locks_after, entry) {
+ if (entry->class == check_target->class)
+ return print_circular_bug_header(entry, depth+1);
+ debug_atomic_inc(&nr_cyclic_checks);
+ if (!check_noncircular(entry->class, depth+1))
+ return print_circular_bug_entry(entry, depth+1);
+ }
+ return 1;
+}
+
+static int very_verbose(struct lock_class *class)
+{
+#if VERY_VERBOSE
+ return class_filter(class);
+#endif
+ return 0;
+}
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * Forwards and backwards subgraph searching, for the purposes of
+ * proving that two subgraphs can be connected by a new dependency
+ * without creating any illegal irq-safe -> irq-unsafe lock dependency.
+ */
+static enum lock_usage_bit find_usage_bit;
+static struct lock_class *forwards_match, *backwards_match;
+
+/*
+ * Find a node in the forwards-direction dependency sub-graph starting
+ * at <source> that matches <find_usage_bit>.
+ *
+ * Return 2 if such a node exists in the subgraph, and put that node
+ * into <forwards_match>.
+ *
+ * Return 1 otherwise and keep <forwards_match> unchanged.
+ * Return 0 on error.
+ */
+static noinline int
+find_usage_forwards(struct lock_class *source, unsigned int depth)
+{
+ struct lock_list *entry;
+ int ret;
+
+ if (depth > max_recursion_depth)
+ max_recursion_depth = depth;
+ if (depth >= 20)
+ return print_infinite_recursion_bug();
+
+ debug_atomic_inc(&nr_find_usage_forwards_checks);
+ if (source->usage_mask & (1 << find_usage_bit)) {
+ forwards_match = source;
+ return 2;
+ }
+
+ /*
+ * Check this lock's dependency list:
+ */
+ list_for_each_entry(entry, &source->locks_after, entry) {
+ debug_atomic_inc(&nr_find_usage_forwards_recursions);
+ ret = find_usage_forwards(entry->class, depth+1);
+ if (ret == 2 || ret == 0)
+ return ret;
+ }
+ return 1;
+}
+
+/*
+ * Find a node in the backwards-direction dependency sub-graph starting
+ * at <source> that matches <find_usage_bit>.
+ *
+ * Return 2 if such a node exists in the subgraph, and put that node
+ * into <backwards_match>.
+ *
+ * Return 1 otherwise and keep <backwards_match> unchanged.
+ * Return 0 on error.
+ */
+static noinline int
+find_usage_backwards(struct lock_class *source, unsigned int depth)
+{
+ struct lock_list *entry;
+ int ret;
+
+ if (depth > max_recursion_depth)
+ max_recursion_depth = depth;
+ if (depth >= 20)
+ return print_infinite_recursion_bug();
+
+ debug_atomic_inc(&nr_find_usage_backwards_checks);
+ if (source->usage_mask & (1 << find_usage_bit)) {
+ backwards_match = source;
+ return 2;
+ }
+
+ /*
+ * Check this lock's dependency list:
+ */
+ list_for_each_entry(entry, &source->locks_before, entry) {
+ debug_atomic_inc(&nr_find_usage_backwards_recursions);
+ ret = find_usage_backwards(entry->class, depth+1);
+ if (ret == 2 || ret == 0)
+ return ret;
+ }
+ return 1;
+}
+
+static int
+print_bad_irq_dependency(struct task_struct *curr,
+ struct held_lock *prev,
+ struct held_lock *next,
+ enum lock_usage_bit bit1,
+ enum lock_usage_bit bit2,
+ const char *irqclass)
+{
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n======================================================\n");
+ printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
+ irqclass, irqclass);
+ printk( "------------------------------------------------------\n");
+ printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
+ curr->comm, curr->pid,
+ curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
+ curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
+ curr->hardirqs_enabled,
+ curr->softirqs_enabled);
+ print_lock(next);
+
+ printk("\nand this task is already holding:\n");
+ print_lock(prev);
+ printk("which would create a new lock dependency:\n");
+ print_lock_name(prev->class);
+ printk(" ->");
+ print_lock_name(next->class);
+ printk("\n");
+
+ printk("\nbut this new dependency connects a %s-irq-safe lock:\n",
+ irqclass);
+ print_lock_name(backwards_match);
+ printk("\n... which became %s-irq-safe at:\n", irqclass);
+
+ print_stack_trace(backwards_match->usage_traces + bit1, 1);
+
+ printk("\nto a %s-irq-unsafe lock:\n", irqclass);
+ print_lock_name(forwards_match);
+ printk("\n... which became %s-irq-unsafe at:\n", irqclass);
+ printk("...");
+
+ print_stack_trace(forwards_match->usage_traces + bit2, 1);
+
+ printk("\nother info that might help us debug this:\n\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nthe %s-irq-safe lock's dependencies:\n", irqclass);
+ print_lock_dependencies(backwards_match, 0);
+
+ printk("\nthe %s-irq-unsafe lock's dependencies:\n", irqclass);
+ print_lock_dependencies(forwards_match, 0);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+static int
+check_usage(struct task_struct *curr, struct held_lock *prev,
+ struct held_lock *next, enum lock_usage_bit bit_backwards,
+ enum lock_usage_bit bit_forwards, const char *irqclass)
+{
+ int ret;
+
+ find_usage_bit = bit_backwards;
+ /* fills in <backwards_match> */
+ ret = find_usage_backwards(prev->class, 0);
+ if (!ret || ret == 1)
+ return ret;
+
+ find_usage_bit = bit_forwards;
+ ret = find_usage_forwards(next->class, 0);
+ if (!ret || ret == 1)
+ return ret;
+ /* ret == 2 */
+ return print_bad_irq_dependency(curr, prev, next,
+ bit_backwards, bit_forwards, irqclass);
+}
+
+#endif
+
+static int
+print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
+ struct held_lock *next)
+{
+ debug_locks_off();
+ __raw_spin_unlock(&hash_lock);
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n=============================================\n");
+ printk( "[ INFO: possible recursive locking detected ]\n");
+ printk( "---------------------------------------------\n");
+ printk("%s/%d is trying to acquire lock:\n",
+ curr->comm, curr->pid);
+ print_lock(next);
+ printk("\nbut task is already holding lock:\n");
+ print_lock(prev);
+
+ printk("\nother info that might help us debug this:\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+/*
+ * Check whether we are holding such a class already.
+ *
+ * (Note that this has to be done separately, because the graph cannot
+ * detect such classes of deadlocks.)
+ *
+ * Returns: 0 on deadlock detected, 1 on OK, 2 on recursive read
+ */
+static int
+check_deadlock(struct task_struct *curr, struct held_lock *next,
+ struct lockdep_map *next_instance, int read)
+{
+ struct held_lock *prev;
+ int i;
+
+ for (i = 0; i < curr->lockdep_depth; i++) {
+ prev = curr->held_locks + i;
+ if (prev->class != next->class)
+ continue;
+ /*
+ * Allow read-after-read recursion of the same
+ * lock class (i.e. read_lock(lock)+read_lock(lock)):
+ */
+ if ((read == 2) && prev->read)
+ return 2;
+ return print_deadlock_bug(curr, prev, next);
+ }
+ return 1;
+}
+
+/*
+ * There was a chain-cache miss, and we are about to add a new dependency
+ * to a previous lock. We recursively validate the following rules:
+ *
+ * - would the adding of the <prev> -> <next> dependency create a
+ * circular dependency in the graph? [== circular deadlock]
+ *
+ * - does the new prev->next dependency connect any hardirq-safe lock
+ * (in the full backwards-subgraph starting at <prev>) with any
+ * hardirq-unsafe lock (in the full forwards-subgraph starting at
+ * <next>)? [== illegal lock inversion with hardirq contexts]
+ *
+ * - does the new prev->next dependency connect any softirq-safe lock
+ * (in the full backwards-subgraph starting at <prev>) with any
+ * softirq-unsafe lock (in the full forwards-subgraph starting at
+ * <next>)? [== illegal lock inversion with softirq contexts]
+ *
+ * any of these scenarios could lead to a deadlock.
+ *
+ * Then if all the validations pass, we add the forwards and backwards
+ * dependency.
+ */
+static int
+check_prev_add(struct task_struct *curr, struct held_lock *prev,
+ struct held_lock *next)
+{
+ struct lock_list *entry;
+ int ret;
+
+ /*
+ * Prove that the new <prev> -> <next> dependency would not
+ * create a circular dependency in the graph. (We do this by
+ * forward-recursing into the graph starting at <next>, and
+ * checking whether we can reach <prev>.)
+ *
+ * We are using global variables to control the recursion, to
+ * keep the stackframe size of the recursive functions low:
+ */
+ check_source = next;
+ check_target = prev;
+ if (!(check_noncircular(next->class, 0)))
+ return print_circular_bug_tail();
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * Prove that the new dependency does not connect a hardirq-safe
+ * lock with a hardirq-unsafe lock - to achieve this we search
+ * the backwards-subgraph starting at <prev>, and the
+ * forwards-subgraph starting at <next>:
+ */
+ if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
+ LOCK_ENABLED_HARDIRQS, "hard"))
+ return 0;
+
+ /*
+ * Prove that the new dependency does not connect a hardirq-safe-read
+ * lock with a hardirq-unsafe lock - to achieve this we search
+ * the backwards-subgraph starting at <prev>, and the
+ * forwards-subgraph starting at <next>:
+ */
+ if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
+ LOCK_ENABLED_HARDIRQS, "hard-read"))
+ return 0;
+
+ /*
+ * Prove that the new dependency does not connect a softirq-safe
+ * lock with a softirq-unsafe lock - to achieve this we search
+ * the backwards-subgraph starting at <prev>, and the
+ * forwards-subgraph starting at <next>:
+ */
+ if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
+ LOCK_ENABLED_SOFTIRQS, "soft"))
+ return 0;
+ /*
+ * Prove that the new dependency does not connect a softirq-safe-read
+ * lock with a softirq-unsafe lock - to achieve this we search
+ * the backwards-subgraph starting at <prev>, and the
+ * forwards-subgraph starting at <next>:
+ */
+ if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
+ LOCK_ENABLED_SOFTIRQS, "soft"))
+ return 0;
+#endif
+ /*
+ * For recursive read-locks we do all the dependency checks,
+ * but we dont store read-triggered dependencies (only
+ * write-triggered dependencies). This ensures that only the
+ * write-side dependencies matter, and that if for example a
+ * write-lock never takes any other locks, then the reads are
+ * equivalent to a NOP.
+ */
+ if (next->read == 2 || prev->read == 2)
+ return 1;
+ /*
+ * Is the <prev> -> <next> dependency already present?
+ *
+ * (this may occur even though this is a new chain: consider
+ * e.g. the L1 -> L2 -> L3 -> L4 and the L5 -> L1 -> L2 -> L3
+ * chains - the second one will be new, but L1 already has
+ * L2 added to its dependency list, due to the first chain.)
+ */
+ list_for_each_entry(entry, &prev->class->locks_after, entry) {
+ if (entry->class == next->class)
+ return 2;
+ }
+
+ /*
+ * Ok, all validations passed, add the new lock
+ * to the previous lock's dependency list:
+ */
+ ret = add_lock_to_list(prev->class, next->class,
+ &prev->class->locks_after, next->acquire_ip);
+ if (!ret)
+ return 0;
+ /*
+ * Return value of 2 signals 'dependency already added',
+ * in that case we dont have to add the backlink either.
+ */
+ if (ret == 2)
+ return 2;
+ ret = add_lock_to_list(next->class, prev->class,
+ &next->class->locks_before, next->acquire_ip);
+
+ /*
+ * Debugging printouts:
+ */
+ if (verbose(prev->class) || verbose(next->class)) {
+ __raw_spin_unlock(&hash_lock);
+ printk("\n new dependency: ");
+ print_lock_name(prev->class);
+ printk(" => ");
+ print_lock_name(next->class);
+ printk("\n");
+ dump_stack();
+ __raw_spin_lock(&hash_lock);
+ }
+ return 1;
+}
+
+/*
+ * Add the dependency to all directly-previous locks that are 'relevant'.
+ * The ones that are relevant are (in increasing distance from curr):
+ * all consecutive trylock entries and the final non-trylock entry - or
+ * the end of this context's lock-chain - whichever comes first.
+ */
+static int
+check_prevs_add(struct task_struct *curr, struct held_lock *next)
+{
+ int depth = curr->lockdep_depth;
+ struct held_lock *hlock;
+
+ /*
+ * Debugging checks.
+ *
+ * Depth must not be zero for a non-head lock:
+ */
+ if (!depth)
+ goto out_bug;
+ /*
+ * At least two relevant locks must exist for this
+ * to be a head:
+ */
+ if (curr->held_locks[depth].irq_context !=
+ curr->held_locks[depth-1].irq_context)
+ goto out_bug;
+
+ for (;;) {
+ hlock = curr->held_locks + depth-1;
+ /*
+ * Only non-recursive-read entries get new dependencies
+ * added:
+ */
+ if (hlock->read != 2) {
+ check_prev_add(curr, hlock, next);
+ /*
+ * Stop after the first non-trylock entry,
+ * as non-trylock entries have added their
+ * own direct dependencies already, so this
+ * lock is connected to them indirectly:
+ */
+ if (!hlock->trylock)
+ break;
+ }
+ depth--;
+ /*
+ * End of lock-stack?
+ */
+ if (!depth)
+ break;
+ /*
+ * Stop the search if we cross into another context:
+ */
+ if (curr->held_locks[depth].irq_context !=
+ curr->held_locks[depth-1].irq_context)
+ break;
+ }
+ return 1;
+out_bug:
+ __raw_spin_unlock(&hash_lock);
+ DEBUG_LOCKS_WARN_ON(1);
+
+ return 0;
+}
+
+
+/*
+ * Is this the address of a static object:
+ */
+static int static_obj(void *obj)
+{
+ unsigned long start = (unsigned long) &_stext,
+ end = (unsigned long) &_end,
+ addr = (unsigned long) obj;
+#ifdef CONFIG_SMP
+ int i;
+#endif
+
+ /*
+ * static variable?
+ */
+ if ((addr >= start) && (addr < end))
+ return 1;
+
+#ifdef CONFIG_SMP
+ /*
+ * percpu var?
+ */
+ for_each_possible_cpu(i) {
+ start = (unsigned long) &__per_cpu_start + per_cpu_offset(i);
+ end = (unsigned long) &__per_cpu_end + per_cpu_offset(i);
+
+ if ((addr >= start) && (addr < end))
+ return 1;
+ }
+#endif
+
+ /*
+ * module var?
+ */
+ return is_module_address(addr);
+}
+
+/*
+ * To make lock name printouts unique, we calculate a unique
+ * class->name_version generation counter:
+ */
+static int count_matching_names(struct lock_class *new_class)
+{
+ struct lock_class *class;
+ int count = 0;
+
+ if (!new_class->name)
+ return 0;
+
+ list_for_each_entry(class, &all_lock_classes, lock_entry) {
+ if (new_class->key - new_class->subclass == class->key)
+ return class->name_version;
+ if (class->name && !strcmp(class->name, new_class->name))
+ count = max(count, class->name_version);
+ }
+
+ return count + 1;
+}
+
+extern void __error_too_big_MAX_LOCKDEP_SUBCLASSES(void);
+
+/*
+ * Register a lock's class in the hash-table, if the class is not present
+ * yet. Otherwise we look it up. We cache the result in the lock object
+ * itself, so actual lookup of the hash should be once per lock object.
+ */
+static inline struct lock_class *
+register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+{
+ struct lockdep_subclass_key *key;
+ struct list_head *hash_head;
+ struct lock_class *class;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+ /*
+ * If the architecture calls into lockdep before initializing
+ * the hashes then we'll warn about it later. (we cannot printk
+ * right now)
+ */
+ if (unlikely(!lockdep_initialized)) {
+ lockdep_init();
+ lockdep_init_error = 1;
+ }
+#endif
+
+ /*
+ * Static locks do not have their class-keys yet - for them the key
+ * is the lock object itself:
+ */
+ if (unlikely(!lock->key))
+ lock->key = (void *)lock;
+
+ /*
+ * NOTE: the class-key must be unique. For dynamic locks, a static
+ * lock_class_key variable is passed in through the mutex_init()
+ * (or spin_lock_init()) call - which acts as the key. For static
+ * locks we use the lock object itself as the key.
+ */
+ if (sizeof(struct lock_class_key) > sizeof(struct lock_class))
+ __error_too_big_MAX_LOCKDEP_SUBCLASSES();
+
+ key = lock->key->subkeys + subclass;
+
+ hash_head = classhashentry(key);
+
+ /*
+ * We can walk the hash lockfree, because the hash only
+ * grows, and we are careful when adding entries to the end:
+ */
+ list_for_each_entry(class, hash_head, hash_entry)
+ if (class->key == key)
+ goto out_set;
+
+ /*
+ * Debug-check: all keys must be persistent!
+ */
+ if (!static_obj(lock->key)) {
+ debug_locks_off();
+ printk("INFO: trying to register non-static key.\n");
+ printk("the code is fine but needs lockdep annotation.\n");
+ printk("turning off the locking correctness validator.\n");
+ dump_stack();
+
+ return NULL;
+ }
+
+ __raw_spin_lock(&hash_lock);
+ /*
+ * We have to do the hash-walk again, to avoid races
+ * with another CPU:
+ */
+ list_for_each_entry(class, hash_head, hash_entry)
+ if (class->key == key)
+ goto out_unlock_set;
+ /*
+ * Allocate a new key from the static array, and add it to
+ * the hash:
+ */
+ if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
+ printk("turning off the locking correctness validator.\n");
+ return NULL;
+ }
+ class = lock_classes + nr_lock_classes++;
+ debug_atomic_inc(&nr_unused_locks);
+ class->key = key;
+ class->name = lock->name;
+ class->subclass = subclass;
+ INIT_LIST_HEAD(&class->lock_entry);
+ INIT_LIST_HEAD(&class->locks_before);
+ INIT_LIST_HEAD(&class->locks_after);
+ class->name_version = count_matching_names(class);
+ /*
+ * We use RCU's safe list-add method to make
+ * parallel walking of the hash-list safe:
+ */
+ list_add_tail_rcu(&class->hash_entry, hash_head);
+
+ if (verbose(class)) {
+ __raw_spin_unlock(&hash_lock);
+ printk("\nnew class %p: %s", class->key, class->name);
+ if (class->name_version > 1)
+ printk("#%d", class->name_version);
+ printk("\n");
+ dump_stack();
+ __raw_spin_lock(&hash_lock);
+ }
+out_unlock_set:
+ __raw_spin_unlock(&hash_lock);
+
+out_set:
+ lock->class[subclass] = class;
+
+ DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
+
+ return class;
+}
+
+/*
+ * Look up a dependency chain. If the key is not present yet then
+ * add it and return 0 - in this case the new dependency chain is
+ * validated. If the key is already hashed, return 1.
+ */
+static inline int lookup_chain_cache(u64 chain_key)
+{
+ struct list_head *hash_head = chainhashentry(chain_key);
+ struct lock_chain *chain;
+
+ DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+ /*
+ * We can walk it lock-free, because entries only get added
+ * to the hash:
+ */
+ list_for_each_entry(chain, hash_head, entry) {
+ if (chain->chain_key == chain_key) {
+cache_hit:
+ debug_atomic_inc(&chain_lookup_hits);
+ /*
+ * In the debugging case, force redundant checking
+ * by returning 1:
+ */
+#ifdef CONFIG_DEBUG_LOCKDEP
+ __raw_spin_lock(&hash_lock);
+ return 1;
+#endif
+ return 0;
+ }
+ }
+ /*
+ * Allocate a new chain entry from the static array, and add
+ * it to the hash:
+ */
+ __raw_spin_lock(&hash_lock);
+ /*
+ * We have to walk the chain again locked - to avoid duplicates:
+ */
+ list_for_each_entry(chain, hash_head, entry) {
+ if (chain->chain_key == chain_key) {
+ __raw_spin_unlock(&hash_lock);
+ goto cache_hit;
+ }
+ }
+ if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) {
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
+ printk("turning off the locking correctness validator.\n");
+ return 0;
+ }
+ chain = lock_chains + nr_lock_chains++;
+ chain->chain_key = chain_key;
+ list_add_tail_rcu(&chain->entry, hash_head);
+ debug_atomic_inc(&chain_lookup_misses);
+#ifdef CONFIG_TRACE_IRQFLAGS
+ if (current->hardirq_context)
+ nr_hardirq_chains++;
+ else {
+ if (current->softirq_context)
+ nr_softirq_chains++;
+ else
+ nr_process_chains++;
+ }
+#else
+ nr_process_chains++;
+#endif
+
+ return 1;
+}
+
+/*
+ * We are building curr_chain_key incrementally, so double-check
+ * it from scratch, to make sure that it's done correctly:
+ */
+static void check_chain_key(struct task_struct *curr)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+ struct held_lock *hlock, *prev_hlock = NULL;
+ unsigned int i, id;
+ u64 chain_key = 0;
+
+ for (i = 0; i < curr->lockdep_depth; i++) {
+ hlock = curr->held_locks + i;
+ if (chain_key != hlock->prev_chain_key) {
+ debug_locks_off();
+ printk("hm#1, depth: %u [%u], %016Lx != %016Lx\n",
+ curr->lockdep_depth, i,
+ (unsigned long long)chain_key,
+ (unsigned long long)hlock->prev_chain_key);
+ WARN_ON(1);
+ return;
+ }
+ id = hlock->class - lock_classes;
+ DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS);
+ if (prev_hlock && (prev_hlock->irq_context !=
+ hlock->irq_context))
+ chain_key = 0;
+ chain_key = iterate_chain_key(chain_key, id);
+ prev_hlock = hlock;
+ }
+ if (chain_key != curr->curr_chain_key) {
+ debug_locks_off();
+ printk("hm#2, depth: %u [%u], %016Lx != %016Lx\n",
+ curr->lockdep_depth, i,
+ (unsigned long long)chain_key,
+ (unsigned long long)curr->curr_chain_key);
+ WARN_ON(1);
+ }
+#endif
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+
+/*
+ * print irq inversion bug:
+ */
+static int
+print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
+ struct held_lock *this, int forwards,
+ const char *irqclass)
+{
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n=========================================================\n");
+ printk( "[ INFO: possible irq lock inversion dependency detected ]\n");
+ printk( "---------------------------------------------------------\n");
+ printk("%s/%d just changed the state of lock:\n",
+ curr->comm, curr->pid);
+ print_lock(this);
+ if (forwards)
+ printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
+ else
+ printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
+ print_lock_name(other);
+ printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+
+ printk("\nother info that might help us debug this:\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nthe first lock's dependencies:\n");
+ print_lock_dependencies(this->class, 0);
+
+ printk("\nthe second lock's dependencies:\n");
+ print_lock_dependencies(other, 0);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+/*
+ * Prove that in the forwards-direction subgraph starting at <this>
+ * there is no lock matching <mask>:
+ */
+static int
+check_usage_forwards(struct task_struct *curr, struct held_lock *this,
+ enum lock_usage_bit bit, const char *irqclass)
+{
+ int ret;
+
+ find_usage_bit = bit;
+ /* fills in <forwards_match> */
+ ret = find_usage_forwards(this->class, 0);
+ if (!ret || ret == 1)
+ return ret;
+
+ return print_irq_inversion_bug(curr, forwards_match, this, 1, irqclass);
+}
+
+/*
+ * Prove that in the backwards-direction subgraph starting at <this>
+ * there is no lock matching <mask>:
+ */
+static int
+check_usage_backwards(struct task_struct *curr, struct held_lock *this,
+ enum lock_usage_bit bit, const char *irqclass)
+{
+ int ret;
+
+ find_usage_bit = bit;
+ /* fills in <backwards_match> */
+ ret = find_usage_backwards(this->class, 0);
+ if (!ret || ret == 1)
+ return ret;
+
+ return print_irq_inversion_bug(curr, backwards_match, this, 0, irqclass);
+}
+
+static inline void print_irqtrace_events(struct task_struct *curr)
+{
+ printk("irq event stamp: %u\n", curr->irq_events);
+ printk("hardirqs last enabled at (%u): ", curr->hardirq_enable_event);
+ print_ip_sym(curr->hardirq_enable_ip);
+ printk("hardirqs last disabled at (%u): ", curr->hardirq_disable_event);
+ print_ip_sym(curr->hardirq_disable_ip);
+ printk("softirqs last enabled at (%u): ", curr->softirq_enable_event);
+ print_ip_sym(curr->softirq_enable_ip);
+ printk("softirqs last disabled at (%u): ", curr->softirq_disable_event);
+ print_ip_sym(curr->softirq_disable_ip);
+}
+
+#else
+static inline void print_irqtrace_events(struct task_struct *curr)
+{
+}
+#endif
+
+static int
+print_usage_bug(struct task_struct *curr, struct held_lock *this,
+ enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit)
+{
+ __raw_spin_unlock(&hash_lock);
+ debug_locks_off();
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n=================================\n");
+ printk( "[ INFO: inconsistent lock state ]\n");
+ printk( "---------------------------------\n");
+
+ printk("inconsistent {%s} -> {%s} usage.\n",
+ usage_str[prev_bit], usage_str[new_bit]);
+
+ printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
+ curr->comm, curr->pid,
+ trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
+ trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
+ trace_hardirqs_enabled(curr),
+ trace_softirqs_enabled(curr));
+ print_lock(this);
+
+ printk("{%s} state was registered at:\n", usage_str[prev_bit]);
+ print_stack_trace(this->class->usage_traces + prev_bit, 1);
+
+ print_irqtrace_events(curr);
+ printk("\nother info that might help us debug this:\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+/*
+ * Print out an error if an invalid bit is set:
+ */
+static inline int
+valid_state(struct task_struct *curr, struct held_lock *this,
+ enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit)
+{
+ if (unlikely(this->class->usage_mask & (1 << bad_bit)))
+ return print_usage_bug(curr, this, bad_bit, new_bit);
+ return 1;
+}
+
+#define STRICT_READ_CHECKS 1
+
+/*
+ * Mark a lock with a usage bit, and validate the state transition:
+ */
+static int mark_lock(struct task_struct *curr, struct held_lock *this,
+ enum lock_usage_bit new_bit, unsigned long ip)
+{
+ unsigned int new_mask = 1 << new_bit, ret = 1;
+
+ /*
+ * If already set then do not dirty the cacheline,
+ * nor do any checks:
+ */
+ if (likely(this->class->usage_mask & new_mask))
+ return 1;
+
+ __raw_spin_lock(&hash_lock);
+ /*
+ * Make sure we didnt race:
+ */
+ if (unlikely(this->class->usage_mask & new_mask)) {
+ __raw_spin_unlock(&hash_lock);
+ return 1;
+ }
+
+ this->class->usage_mask |= new_mask;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ if (new_bit == LOCK_ENABLED_HARDIRQS ||
+ new_bit == LOCK_ENABLED_HARDIRQS_READ)
+ ip = curr->hardirq_enable_ip;
+ else if (new_bit == LOCK_ENABLED_SOFTIRQS ||
+ new_bit == LOCK_ENABLED_SOFTIRQS_READ)
+ ip = curr->softirq_enable_ip;
+#endif
+ if (!save_trace(this->class->usage_traces + new_bit))
+ return 0;
+
+ switch (new_bit) {
+#ifdef CONFIG_TRACE_IRQFLAGS
+ case LOCK_USED_IN_HARDIRQ:
+ if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
+ return 0;
+ if (!valid_state(curr, this, new_bit,
+ LOCK_ENABLED_HARDIRQS_READ))
+ return 0;
+ /*
+ * just marked it hardirq-safe, check that this lock
+ * took no hardirq-unsafe lock in the past:
+ */
+ if (!check_usage_forwards(curr, this,
+ LOCK_ENABLED_HARDIRQS, "hard"))
+ return 0;
+#if STRICT_READ_CHECKS
+ /*
+ * just marked it hardirq-safe, check that this lock
+ * took no hardirq-unsafe-read lock in the past:
+ */
+ if (!check_usage_forwards(curr, this,
+ LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
+ return 0;
+#endif
+ if (hardirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_USED_IN_SOFTIRQ:
+ if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
+ return 0;
+ if (!valid_state(curr, this, new_bit,
+ LOCK_ENABLED_SOFTIRQS_READ))
+ return 0;
+ /*
+ * just marked it softirq-safe, check that this lock
+ * took no softirq-unsafe lock in the past:
+ */
+ if (!check_usage_forwards(curr, this,
+ LOCK_ENABLED_SOFTIRQS, "soft"))
+ return 0;
+#if STRICT_READ_CHECKS
+ /*
+ * just marked it softirq-safe, check that this lock
+ * took no softirq-unsafe-read lock in the past:
+ */
+ if (!check_usage_forwards(curr, this,
+ LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
+ return 0;
+#endif
+ if (softirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_USED_IN_HARDIRQ_READ:
+ if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
+ return 0;
+ /*
+ * just marked it hardirq-read-safe, check that this lock
+ * took no hardirq-unsafe lock in the past:
+ */
+ if (!check_usage_forwards(curr, this,
+ LOCK_ENABLED_HARDIRQS, "hard"))
+ return 0;
+ if (hardirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_USED_IN_SOFTIRQ_READ:
+ if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
+ return 0;
+ /*
+ * just marked it softirq-read-safe, check that this lock
+ * took no softirq-unsafe lock in the past:
+ */
+ if (!check_usage_forwards(curr, this,
+ LOCK_ENABLED_SOFTIRQS, "soft"))
+ return 0;
+ if (softirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_ENABLED_HARDIRQS:
+ if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
+ return 0;
+ if (!valid_state(curr, this, new_bit,
+ LOCK_USED_IN_HARDIRQ_READ))
+ return 0;
+ /*
+ * just marked it hardirq-unsafe, check that no hardirq-safe
+ * lock in the system ever took it in the past:
+ */
+ if (!check_usage_backwards(curr, this,
+ LOCK_USED_IN_HARDIRQ, "hard"))
+ return 0;
+#if STRICT_READ_CHECKS
+ /*
+ * just marked it hardirq-unsafe, check that no
+ * hardirq-safe-read lock in the system ever took
+ * it in the past:
+ */
+ if (!check_usage_backwards(curr, this,
+ LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
+ return 0;
+#endif
+ if (hardirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_ENABLED_SOFTIRQS:
+ if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
+ return 0;
+ if (!valid_state(curr, this, new_bit,
+ LOCK_USED_IN_SOFTIRQ_READ))
+ return 0;
+ /*
+ * just marked it softirq-unsafe, check that no softirq-safe
+ * lock in the system ever took it in the past:
+ */
+ if (!check_usage_backwards(curr, this,
+ LOCK_USED_IN_SOFTIRQ, "soft"))
+ return 0;
+#if STRICT_READ_CHECKS
+ /*
+ * just marked it softirq-unsafe, check that no
+ * softirq-safe-read lock in the system ever took
+ * it in the past:
+ */
+ if (!check_usage_backwards(curr, this,
+ LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
+ return 0;
+#endif
+ if (softirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_ENABLED_HARDIRQS_READ:
+ if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
+ return 0;
+#if STRICT_READ_CHECKS
+ /*
+ * just marked it hardirq-read-unsafe, check that no
+ * hardirq-safe lock in the system ever took it in the past:
+ */
+ if (!check_usage_backwards(curr, this,
+ LOCK_USED_IN_HARDIRQ, "hard"))
+ return 0;
+#endif
+ if (hardirq_verbose(this->class))
+ ret = 2;
+ break;
+ case LOCK_ENABLED_SOFTIRQS_READ:
+ if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
+ return 0;
+#if STRICT_READ_CHECKS
+ /*
+ * just marked it softirq-read-unsafe, check that no
+ * softirq-safe lock in the system ever took it in the past:
+ */
+ if (!check_usage_backwards(curr, this,
+ LOCK_USED_IN_SOFTIRQ, "soft"))
+ return 0;
+#endif
+ if (softirq_verbose(this->class))
+ ret = 2;
+ break;
+#endif
+ case LOCK_USED:
+ /*
+ * Add it to the global list of classes:
+ */
+ list_add_tail_rcu(&this->class->lock_entry, &all_lock_classes);
+ debug_atomic_dec(&nr_unused_locks);
+ break;
+ default:
+ debug_locks_off();
+ WARN_ON(1);
+ return 0;
+ }
+
+ __raw_spin_unlock(&hash_lock);
+
+ /*
+ * We must printk outside of the hash_lock:
+ */
+ if (ret == 2) {
+ printk("\nmarked lock as {%s}:\n", usage_str[new_bit]);
+ print_lock(this);
+ print_irqtrace_events(curr);
+ dump_stack();
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+/*
+ * Mark all held locks with a usage bit:
+ */
+static int
+mark_held_locks(struct task_struct *curr, int hardirq, unsigned long ip)
+{
+ enum lock_usage_bit usage_bit;
+ struct held_lock *hlock;
+ int i;
+
+ for (i = 0; i < curr->lockdep_depth; i++) {
+ hlock = curr->held_locks + i;
+
+ if (hardirq) {
+ if (hlock->read)
+ usage_bit = LOCK_ENABLED_HARDIRQS_READ;
+ else
+ usage_bit = LOCK_ENABLED_HARDIRQS;
+ } else {
+ if (hlock->read)
+ usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
+ else
+ usage_bit = LOCK_ENABLED_SOFTIRQS;
+ }
+ if (!mark_lock(curr, hlock, usage_bit, ip))
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Debugging helper: via this flag we know that we are in
+ * 'early bootup code', and will warn about any invalid irqs-on event:
+ */
+static int early_boot_irqs_enabled;
+
+void early_boot_irqs_off(void)
+{
+ early_boot_irqs_enabled = 0;
+}
+
+void early_boot_irqs_on(void)
+{
+ early_boot_irqs_enabled = 1;
+}
+
+/*
+ * Hardirqs will be enabled:
+ */
+void trace_hardirqs_on(void)
+{
+ struct task_struct *curr = current;
+ unsigned long ip;
+
+ if (unlikely(!debug_locks || current->lockdep_recursion))
+ return;
+
+ if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled)))
+ return;
+
+ if (unlikely(curr->hardirqs_enabled)) {
+ debug_atomic_inc(&redundant_hardirqs_on);
+ return;
+ }
+ /* we'll do an OFF -> ON transition: */
+ curr->hardirqs_enabled = 1;
+ ip = (unsigned long) __builtin_return_address(0);
+
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return;
+ if (DEBUG_LOCKS_WARN_ON(current->hardirq_context))
+ return;
+ /*
+ * We are going to turn hardirqs on, so set the
+ * usage bit for all held locks:
+ */
+ if (!mark_held_locks(curr, 1, ip))
+ return;
+ /*
+ * If we have softirqs enabled, then set the usage
+ * bit for all held locks. (disabled hardirqs prevented
+ * this bit from being set before)
+ */
+ if (curr->softirqs_enabled)
+ if (!mark_held_locks(curr, 0, ip))
+ return;
+
+ curr->hardirq_enable_ip = ip;
+ curr->hardirq_enable_event = ++curr->irq_events;
+ debug_atomic_inc(&hardirqs_on_events);
+}
+
+EXPORT_SYMBOL(trace_hardirqs_on);
+
+/*
+ * Hardirqs were disabled:
+ */
+void trace_hardirqs_off(void)
+{
+ struct task_struct *curr = current;
+
+ if (unlikely(!debug_locks || current->lockdep_recursion))
+ return;
+
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return;
+
+ if (curr->hardirqs_enabled) {
+ /*
+ * We have done an ON -> OFF transition:
+ */
+ curr->hardirqs_enabled = 0;
+ curr->hardirq_disable_ip = _RET_IP_;
+ curr->hardirq_disable_event = ++curr->irq_events;
+ debug_atomic_inc(&hardirqs_off_events);
+ } else
+ debug_atomic_inc(&redundant_hardirqs_off);
+}
+
+EXPORT_SYMBOL(trace_hardirqs_off);
+
+/*
+ * Softirqs will be enabled:
+ */
+void trace_softirqs_on(unsigned long ip)
+{
+ struct task_struct *curr = current;
+
+ if (unlikely(!debug_locks))
+ return;
+
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return;
+
+ if (curr->softirqs_enabled) {
+ debug_atomic_inc(&redundant_softirqs_on);
+ return;
+ }
+
+ /*
+ * We'll do an OFF -> ON transition:
+ */
+ curr->softirqs_enabled = 1;
+ curr->softirq_enable_ip = ip;
+ curr->softirq_enable_event = ++curr->irq_events;
+ debug_atomic_inc(&softirqs_on_events);
+ /*
+ * We are going to turn softirqs on, so set the
+ * usage bit for all held locks, if hardirqs are
+ * enabled too:
+ */
+ if (curr->hardirqs_enabled)
+ mark_held_locks(curr, 0, ip);
+}
+
+/*
+ * Softirqs were disabled:
+ */
+void trace_softirqs_off(unsigned long ip)
+{
+ struct task_struct *curr = current;
+
+ if (unlikely(!debug_locks))
+ return;
+
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return;
+
+ if (curr->softirqs_enabled) {
+ /*
+ * We have done an ON -> OFF transition:
+ */
+ curr->softirqs_enabled = 0;
+ curr->softirq_disable_ip = ip;
+ curr->softirq_disable_event = ++curr->irq_events;
+ debug_atomic_inc(&softirqs_off_events);
+ DEBUG_LOCKS_WARN_ON(!softirq_count());
+ } else
+ debug_atomic_inc(&redundant_softirqs_off);
+}
+
+#endif
+
+/*
+ * Initialize a lock instance's lock-class mapping info:
+ */
+void lockdep_init_map(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key)
+{
+ if (unlikely(!debug_locks))
+ return;
+
+ if (DEBUG_LOCKS_WARN_ON(!key))
+ return;
+ if (DEBUG_LOCKS_WARN_ON(!name))
+ return;
+ /*
+ * Sanity check, the lock-class key must be persistent:
+ */
+ if (!static_obj(key)) {
+ printk("BUG: key %p not in .data!\n", key);
+ DEBUG_LOCKS_WARN_ON(1);
+ return;
+ }
+ lock->name = name;
+ lock->key = key;
+ memset(lock->class, 0, sizeof(lock->class[0])*MAX_LOCKDEP_SUBCLASSES);
+}
+
+EXPORT_SYMBOL_GPL(lockdep_init_map);
+
+/*
+ * This gets called for every mutex_lock*()/spin_lock*() operation.
+ * We maintain the dependency maps and validate the locking attempt:
+ */
+static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+ int trylock, int read, int check, int hardirqs_off,
+ unsigned long ip)
+{
+ struct task_struct *curr = current;
+ struct held_lock *hlock;
+ struct lock_class *class;
+ unsigned int depth, id;
+ int chain_head = 0;
+ u64 chain_key;
+
+ if (unlikely(!debug_locks))
+ return 0;
+
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return 0;
+
+ if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
+ debug_locks_off();
+ printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
+ printk("turning off the locking correctness validator.\n");
+ return 0;
+ }
+
+ class = lock->class[subclass];
+ /* not cached yet? */
+ if (unlikely(!class)) {
+ class = register_lock_class(lock, subclass);
+ if (!class)
+ return 0;
+ }
+ debug_atomic_inc((atomic_t *)&class->ops);
+ if (very_verbose(class)) {
+ printk("\nacquire class [%p] %s", class->key, class->name);
+ if (class->name_version > 1)
+ printk("#%d", class->name_version);
+ printk("\n");
+ dump_stack();
+ }
+
+ /*
+ * Add the lock to the list of currently held locks.
+ * (we dont increase the depth just yet, up until the
+ * dependency checks are done)
+ */
+ depth = curr->lockdep_depth;
+ if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
+ return 0;
+
+ hlock = curr->held_locks + depth;
+
+ hlock->class = class;
+ hlock->acquire_ip = ip;
+ hlock->instance = lock;
+ hlock->trylock = trylock;
+ hlock->read = read;
+ hlock->check = check;
+ hlock->hardirqs_off = hardirqs_off;
+
+ if (check != 2)
+ goto out_calc_hash;
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If non-trylock use in a hardirq or softirq context, then
+ * mark the lock as used in these contexts:
+ */
+ if (!trylock) {
+ if (read) {
+ if (curr->hardirq_context)
+ if (!mark_lock(curr, hlock,
+ LOCK_USED_IN_HARDIRQ_READ, ip))
+ return 0;
+ if (curr->softirq_context)
+ if (!mark_lock(curr, hlock,
+ LOCK_USED_IN_SOFTIRQ_READ, ip))
+ return 0;
+ } else {
+ if (curr->hardirq_context)
+ if (!mark_lock(curr, hlock, LOCK_USED_IN_HARDIRQ, ip))
+ return 0;
+ if (curr->softirq_context)
+ if (!mark_lock(curr, hlock, LOCK_USED_IN_SOFTIRQ, ip))
+ return 0;
+ }
+ }
+ if (!hardirqs_off) {
+ if (read) {
+ if (!mark_lock(curr, hlock,
+ LOCK_ENABLED_HARDIRQS_READ, ip))
+ return 0;
+ if (curr->softirqs_enabled)
+ if (!mark_lock(curr, hlock,
+ LOCK_ENABLED_SOFTIRQS_READ, ip))
+ return 0;
+ } else {
+ if (!mark_lock(curr, hlock,
+ LOCK_ENABLED_HARDIRQS, ip))
+ return 0;
+ if (curr->softirqs_enabled)
+ if (!mark_lock(curr, hlock,
+ LOCK_ENABLED_SOFTIRQS, ip))
+ return 0;
+ }
+ }
+#endif
+ /* mark it as used: */
+ if (!mark_lock(curr, hlock, LOCK_USED, ip))
+ return 0;
+out_calc_hash:
+ /*
+ * Calculate the chain hash: it's the combined has of all the
+ * lock keys along the dependency chain. We save the hash value
+ * at every step so that we can get the current hash easily
+ * after unlock. The chain hash is then used to cache dependency
+ * results.
+ *
+ * The 'key ID' is what is the most compact key value to drive
+ * the hash, not class->key.
+ */
+ id = class - lock_classes;
+ if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+ return 0;
+
+ chain_key = curr->curr_chain_key;
+ if (!depth) {
+ if (DEBUG_LOCKS_WARN_ON(chain_key != 0))
+ return 0;
+ chain_head = 1;
+ }
+
+ hlock->prev_chain_key = chain_key;
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * Keep track of points where we cross into an interrupt context:
+ */
+ hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) +
+ curr->softirq_context;
+ if (depth) {
+ struct held_lock *prev_hlock;
+
+ prev_hlock = curr->held_locks + depth-1;
+ /*
+ * If we cross into another context, reset the
+ * hash key (this also prevents the checking and the
+ * adding of the dependency to 'prev'):
+ */
+ if (prev_hlock->irq_context != hlock->irq_context) {
+ chain_key = 0;
+ chain_head = 1;
+ }
+ }
+#endif
+ chain_key = iterate_chain_key(chain_key, id);
+ curr->curr_chain_key = chain_key;
+
+ /*
+ * Trylock needs to maintain the stack of held locks, but it
+ * does not add new dependencies, because trylock can be done
+ * in any order.
+ *
+ * We look up the chain_key and do the O(N^2) check and update of
+ * the dependencies only if this is a new dependency chain.
+ * (If lookup_chain_cache() returns with 1 it acquires
+ * hash_lock for us)
+ */
+ if (!trylock && (check == 2) && lookup_chain_cache(chain_key)) {
+ /*
+ * Check whether last held lock:
+ *
+ * - is irq-safe, if this lock is irq-unsafe
+ * - is softirq-safe, if this lock is hardirq-unsafe
+ *
+ * And check whether the new lock's dependency graph
+ * could lead back to the previous lock.
+ *
+ * any of these scenarios could lead to a deadlock. If
+ * All validations
+ */
+ int ret = check_deadlock(curr, hlock, lock, read);
+
+ if (!ret)
+ return 0;
+ /*
+ * Mark recursive read, as we jump over it when
+ * building dependencies (just like we jump over
+ * trylock entries):
+ */
+ if (ret == 2)
+ hlock->read = 2;
+ /*
+ * Add dependency only if this lock is not the head
+ * of the chain, and if it's not a secondary read-lock:
+ */
+ if (!chain_head && ret != 2)
+ if (!check_prevs_add(curr, hlock))
+ return 0;
+ __raw_spin_unlock(&hash_lock);
+ }
+ curr->lockdep_depth++;
+ check_chain_key(curr);
+ if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
+ debug_locks_off();
+ printk("BUG: MAX_LOCK_DEPTH too low!\n");
+ printk("turning off the locking correctness validator.\n");
+ return 0;
+ }
+ if (unlikely(curr->lockdep_depth > max_lockdep_depth))
+ max_lockdep_depth = curr->lockdep_depth;
+
+ return 1;
+}
+
+static int
+print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
+ unsigned long ip)
+{
+ if (!debug_locks_off())
+ return 0;
+ if (debug_locks_silent)
+ return 0;
+
+ printk("\n=====================================\n");
+ printk( "[ BUG: bad unlock balance detected! ]\n");
+ printk( "-------------------------------------\n");
+ printk("%s/%d is trying to release lock (",
+ curr->comm, curr->pid);
+ print_lockdep_cache(lock);
+ printk(") at:\n");
+ print_ip_sym(ip);
+ printk("but there are no more locks to release!\n");
+ printk("\nother info that might help us debug this:\n");
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+
+ return 0;
+}
+
+/*
+ * Common debugging checks for both nested and non-nested unlock:
+ */
+static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
+ unsigned long ip)
+{
+ if (unlikely(!debug_locks))
+ return 0;
+ if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+ return 0;
+
+ if (curr->lockdep_depth <= 0)
+ return print_unlock_inbalance_bug(curr, lock, ip);
+
+ return 1;
+}
+
+/*
+ * Remove the lock to the list of currently held locks in a
+ * potentially non-nested (out of order) manner. This is a
+ * relatively rare operation, as all the unlock APIs default
+ * to nested mode (which uses lock_release()):
+ */
+static int
+lock_release_non_nested(struct task_struct *curr,
+ struct lockdep_map *lock, unsigned long ip)
+{
+ struct held_lock *hlock, *prev_hlock;
+ unsigned int depth;
+ int i;
+
+ /*
+ * Check whether the lock exists in the current stack
+ * of held locks:
+ */
+ depth = curr->lockdep_depth;
+ if (DEBUG_LOCKS_WARN_ON(!depth))
+ return 0;
+
+ prev_hlock = NULL;
+ for (i = depth-1; i >= 0; i--) {
+ hlock = curr->held_locks + i;
+ /*
+ * We must not cross into another context:
+ */
+ if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
+ break;
+ if (hlock->instance == lock)
+ goto found_it;
+ prev_hlock = hlock;
+ }
+ return print_unlock_inbalance_bug(curr, lock, ip);
+
+found_it:
+ /*
+ * We have the right lock to unlock, 'hlock' points to it.
+ * Now we remove it from the stack, and add back the other
+ * entries (if any), recalculating the hash along the way:
+ */
+ curr->lockdep_depth = i;
+ curr->curr_chain_key = hlock->prev_chain_key;
+
+ for (i++; i < depth; i++) {
+ hlock = curr->held_locks + i;
+ if (!__lock_acquire(hlock->instance,
+ hlock->class->subclass, hlock->trylock,
+ hlock->read, hlock->check, hlock->hardirqs_off,
+ hlock->acquire_ip))
+ return 0;
+ }
+
+ if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1))
+ return 0;
+ return 1;
+}
+
+/*
+ * Remove the lock to the list of currently held locks - this gets
+ * called on mutex_unlock()/spin_unlock*() (or on a failed
+ * mutex_lock_interruptible()). This is done for unlocks that nest
+ * perfectly. (i.e. the current top of the lock-stack is unlocked)
+ */
+static int lock_release_nested(struct task_struct *curr,
+ struct lockdep_map *lock, unsigned long ip)
+{
+ struct held_lock *hlock;
+ unsigned int depth;
+
+ /*
+ * Pop off the top of the lock stack:
+ */
+ depth = curr->lockdep_depth - 1;
+ hlock = curr->held_locks + depth;
+
+ /*
+ * Is the unlock non-nested:
+ */
+ if (hlock->instance != lock)
+ return lock_release_non_nested(curr, lock, ip);
+ curr->lockdep_depth--;
+
+ if (DEBUG_LOCKS_WARN_ON(!depth && (hlock->prev_chain_key != 0)))
+ return 0;
+
+ curr->curr_chain_key = hlock->prev_chain_key;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+ hlock->prev_chain_key = 0;
+ hlock->class = NULL;
+ hlock->acquire_ip = 0;
+ hlock->irq_context = 0;
+#endif
+ return 1;
+}
+
+/*
+ * Remove the lock to the list of currently held locks - this gets
+ * called on mutex_unlock()/spin_unlock*() (or on a failed
+ * mutex_lock_interruptible()). This is done for unlocks that nest
+ * perfectly. (i.e. the current top of the lock-stack is unlocked)
+ */
+static void
+__lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+{
+ struct task_struct *curr = current;
+
+ if (!check_unlock(curr, lock, ip))
+ return;
+
+ if (nested) {
+ if (!lock_release_nested(curr, lock, ip))
+ return;
+ } else {
+ if (!lock_release_non_nested(curr, lock, ip))
+ return;
+ }
+
+ check_chain_key(curr);
+}
+
+/*
+ * Check whether we follow the irq-flags state precisely:
+ */
+static void check_flags(unsigned long flags)
+{
+#if defined(CONFIG_DEBUG_LOCKDEP) && defined(CONFIG_TRACE_IRQFLAGS)
+ if (!debug_locks)
+ return;
+
+ if (irqs_disabled_flags(flags))
+ DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled);
+ else
+ DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled);
+
+ /*
+ * We dont accurately track softirq state in e.g.
+ * hardirq contexts (such as on 4KSTACKS), so only
+ * check if not in hardirq contexts:
+ */
+ if (!hardirq_count()) {
+ if (softirq_count())
+ DEBUG_LOCKS_WARN_ON(current->softirqs_enabled);
+ else
+ DEBUG_LOCKS_WARN_ON(!current->softirqs_enabled);
+ }
+
+ if (!debug_locks)
+ print_irqtrace_events(current);
+#endif
+}
+
+/*
+ * We are not always called with irqs disabled - do that here,
+ * and also avoid lockdep recursion:
+ */
+void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+ int trylock, int read, int check, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ check_flags(flags);
+
+ current->lockdep_recursion = 1;
+ __lock_acquire(lock, subclass, trylock, read, check,
+ irqs_disabled_flags(flags), ip);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL_GPL(lock_acquire);
+
+void lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
+{
+ unsigned long flags;
+
+ if (unlikely(current->lockdep_recursion))
+ return;
+
+ raw_local_irq_save(flags);
+ check_flags(flags);
+ current->lockdep_recursion = 1;
+ __lock_release(lock, nested, ip);
+ current->lockdep_recursion = 0;
+ raw_local_irq_restore(flags);
+}
+
+EXPORT_SYMBOL_GPL(lock_release);
+
+/*
+ * Used by the testsuite, sanitize the validator state
+ * after a simulated failure:
+ */
+
+void lockdep_reset(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ current->curr_chain_key = 0;
+ current->lockdep_depth = 0;
+ current->lockdep_recursion = 0;
+ memset(current->held_locks, 0, MAX_LOCK_DEPTH*sizeof(struct held_lock));
+ nr_hardirq_chains = 0;
+ nr_softirq_chains = 0;
+ nr_process_chains = 0;
+ debug_locks = 1;
+ raw_local_irq_restore(flags);
+}
+
+static void zap_class(struct lock_class *class)
+{
+ int i;
+
+ /*
+ * Remove all dependencies this lock is
+ * involved in:
+ */
+ for (i = 0; i < nr_list_entries; i++) {
+ if (list_entries[i].class == class)
+ list_del_rcu(&list_entries[i].entry);
+ }
+ /*
+ * Unhash the class and remove it from the all_lock_classes list:
+ */
+ list_del_rcu(&class->hash_entry);
+ list_del_rcu(&class->lock_entry);
+
+}
+
+static inline int within(void *addr, void *start, unsigned long size)
+{
+ return addr >= start && addr < start + size;
+}
+
+void lockdep_free_key_range(void *start, unsigned long size)
+{
+ struct lock_class *class, *next;
+ struct list_head *head;
+ unsigned long flags;
+ int i;
+
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&hash_lock);
+
+ /*
+ * Unhash all classes that were created by this module:
+ */
+ for (i = 0; i < CLASSHASH_SIZE; i++) {
+ head = classhash_table + i;
+ if (list_empty(head))
+ continue;
+ list_for_each_entry_safe(class, next, head, hash_entry)
+ if (within(class->key, start, size))
+ zap_class(class);
+ }
+
+ __raw_spin_unlock(&hash_lock);
+ raw_local_irq_restore(flags);
+}
+
+void lockdep_reset_lock(struct lockdep_map *lock)
+{
+ struct lock_class *class, *next, *entry;
+ struct list_head *head;
+ unsigned long flags;
+ int i, j;
+
+ raw_local_irq_save(flags);
+ __raw_spin_lock(&hash_lock);
+
+ /*
+ * Remove all classes this lock has:
+ */
+ for (i = 0; i < CLASSHASH_SIZE; i++) {
+ head = classhash_table + i;
+ if (list_empty(head))
+ continue;
+ list_for_each_entry_safe(class, next, head, hash_entry) {
+ for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
+ entry = lock->class[j];
+ if (class == entry) {
+ zap_class(class);
+ lock->class[j] = NULL;
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * Debug check: in the end all mapped classes should
+ * be gone.
+ */
+ for (j = 0; j < MAX_LOCKDEP_SUBCLASSES; j++) {
+ entry = lock->class[j];
+ if (!entry)
+ continue;
+ __raw_spin_unlock(&hash_lock);
+ DEBUG_LOCKS_WARN_ON(1);
+ raw_local_irq_restore(flags);
+ return;
+ }
+
+ __raw_spin_unlock(&hash_lock);
+ raw_local_irq_restore(flags);
+}
+
+void __init lockdep_init(void)
+{
+ int i;
+
+ /*
+ * Some architectures have their own start_kernel()
+ * code which calls lockdep_init(), while we also
+ * call lockdep_init() from the start_kernel() itself,
+ * and we want to initialize the hashes only once:
+ */
+ if (lockdep_initialized)
+ return;
+
+ for (i = 0; i < CLASSHASH_SIZE; i++)
+ INIT_LIST_HEAD(classhash_table + i);
+
+ for (i = 0; i < CHAINHASH_SIZE; i++)
+ INIT_LIST_HEAD(chainhash_table + i);
+
+ lockdep_initialized = 1;
+}
+
+void __init lockdep_info(void)
+{
+ printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
+
+ printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES);
+ printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH);
+ printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS);
+ printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE);
+ printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES);
+ printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS);
+ printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE);
+
+ printk(" memory used by lock dependency info: %lu kB\n",
+ (sizeof(struct lock_class) * MAX_LOCKDEP_KEYS +
+ sizeof(struct list_head) * CLASSHASH_SIZE +
+ sizeof(struct lock_list) * MAX_LOCKDEP_ENTRIES +
+ sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
+ sizeof(struct list_head) * CHAINHASH_SIZE) / 1024);
+
+ printk(" per task-struct memory footprint: %lu bytes\n",
+ sizeof(struct held_lock) * MAX_LOCK_DEPTH);
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+ if (lockdep_init_error)
+ printk("WARNING: lockdep init error! Arch code didnt call lockdep_init() early enough?\n");
+#endif
+}
+
+static inline int in_range(const void *start, const void *addr, const void *end)
+{
+ return addr >= start && addr <= end;
+}
+
+static void
+print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
+ const void *mem_to)
+{
+ if (!debug_locks_off())
+ return;
+ if (debug_locks_silent)
+ return;
+
+ printk("\n=========================\n");
+ printk( "[ BUG: held lock freed! ]\n");
+ printk( "-------------------------\n");
+ printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
+ curr->comm, curr->pid, mem_from, mem_to-1);
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+}
+
+/*
+ * Called when kernel memory is freed (or unmapped), or if a lock
+ * is destroyed or reinitialized - this code checks whether there is
+ * any held lock in the memory range of <from> to <to>:
+ */
+void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
+{
+ const void *mem_to = mem_from + mem_len, *lock_from, *lock_to;
+ struct task_struct *curr = current;
+ struct held_lock *hlock;
+ unsigned long flags;
+ int i;
+
+ if (unlikely(!debug_locks))
+ return;
+
+ local_irq_save(flags);
+ for (i = 0; i < curr->lockdep_depth; i++) {
+ hlock = curr->held_locks + i;
+
+ lock_from = (void *)hlock->instance;
+ lock_to = (void *)(hlock->instance + 1);
+
+ if (!in_range(mem_from, lock_from, mem_to) &&
+ !in_range(mem_from, lock_to, mem_to))
+ continue;
+
+ print_freed_lock_bug(curr, mem_from, mem_to);
+ break;
+ }
+ local_irq_restore(flags);
+}
+
+static void print_held_locks_bug(struct task_struct *curr)
+{
+ if (!debug_locks_off())
+ return;
+ if (debug_locks_silent)
+ return;
+
+ printk("\n=====================================\n");
+ printk( "[ BUG: lock held at task exit time! ]\n");
+ printk( "-------------------------------------\n");
+ printk("%s/%d is exiting with locks still held!\n",
+ curr->comm, curr->pid);
+ lockdep_print_held_locks(curr);
+
+ printk("\nstack backtrace:\n");
+ dump_stack();
+}
+
+void debug_check_no_locks_held(struct task_struct *task)
+{
+ if (unlikely(task->lockdep_depth > 0))
+ print_held_locks_bug(task);
+}
+
+void debug_show_all_locks(void)
+{
+ struct task_struct *g, *p;
+ int count = 10;
+ int unlock = 1;
+
+ printk("\nShowing all locks held in the system:\n");
+
+ /*
+ * Here we try to get the tasklist_lock as hard as possible,
+ * if not successful after 2 seconds we ignore it (but keep
+ * trying). This is to enable a debug printout even if a
+ * tasklist_lock-holding task deadlocks or crashes.
+ */
+retry:
+ if (!read_trylock(&tasklist_lock)) {
+ if (count == 10)
+ printk("hm, tasklist_lock locked, retrying... ");
+ if (count) {
+ count--;
+ printk(" #%d", 10-count);
+ mdelay(200);
+ goto retry;
+ }
+ printk(" ignoring it.\n");
+ unlock = 0;
+ }
+ if (count != 10)
+ printk(" locked it.\n");
+
+ do_each_thread(g, p) {
+ if (p->lockdep_depth)
+ lockdep_print_held_locks(p);
+ if (!unlock)
+ if (read_trylock(&tasklist_lock))
+ unlock = 1;
+ } while_each_thread(g, p);
+
+ printk("\n");
+ printk("=============================================\n\n");
+
+ if (unlock)
+ read_unlock(&tasklist_lock);
+}
+
+EXPORT_SYMBOL_GPL(debug_show_all_locks);
+
+void debug_show_held_locks(struct task_struct *task)
+{
+ lockdep_print_held_locks(task);
+}
+
+EXPORT_SYMBOL_GPL(debug_show_held_locks);
+
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
new file mode 100644
index 00000000000..0d355f24fe0
--- /dev/null
+++ b/kernel/lockdep_internals.h
@@ -0,0 +1,78 @@
+/*
+ * kernel/lockdep_internals.h
+ *
+ * Runtime locking correctness validator
+ *
+ * lockdep subsystem internal functions and variables.
+ */
+
+/*
+ * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
+ * we track.
+ *
+ * We use the per-lock dependency maps in two ways: we grow it by adding
+ * every to-be-taken lock to all currently held lock's own dependency
+ * table (if it's not there yet), and we check it for lock order
+ * conflicts and deadlocks.
+ */
+#define MAX_LOCKDEP_ENTRIES 8192UL
+
+#define MAX_LOCKDEP_KEYS_BITS 11
+#define MAX_LOCKDEP_KEYS (1UL << MAX_LOCKDEP_KEYS_BITS)
+
+#define MAX_LOCKDEP_CHAINS_BITS 13
+#define MAX_LOCKDEP_CHAINS (1UL << MAX_LOCKDEP_CHAINS_BITS)
+
+/*
+ * Stack-trace: tightly packed array of stack backtrace
+ * addresses. Protected by the hash_lock.
+ */
+#define MAX_STACK_TRACE_ENTRIES 131072UL
+
+extern struct list_head all_lock_classes;
+
+extern void
+get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
+
+extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
+
+extern unsigned long nr_lock_classes;
+extern unsigned long nr_list_entries;
+extern unsigned long nr_lock_chains;
+extern unsigned long nr_stack_trace_entries;
+
+extern unsigned int nr_hardirq_chains;
+extern unsigned int nr_softirq_chains;
+extern unsigned int nr_process_chains;
+extern unsigned int max_lockdep_depth;
+extern unsigned int max_recursion_depth;
+
+#ifdef CONFIG_DEBUG_LOCKDEP
+/*
+ * Various lockdep statistics:
+ */
+extern atomic_t chain_lookup_hits;
+extern atomic_t chain_lookup_misses;
+extern atomic_t hardirqs_on_events;
+extern atomic_t hardirqs_off_events;
+extern atomic_t redundant_hardirqs_on;
+extern atomic_t redundant_hardirqs_off;
+extern atomic_t softirqs_on_events;
+extern atomic_t softirqs_off_events;
+extern atomic_t redundant_softirqs_on;
+extern atomic_t redundant_softirqs_off;
+extern atomic_t nr_unused_locks;
+extern atomic_t nr_cyclic_checks;
+extern atomic_t nr_cyclic_check_recursions;
+extern atomic_t nr_find_usage_forwards_checks;
+extern atomic_t nr_find_usage_forwards_recursions;
+extern atomic_t nr_find_usage_backwards_checks;
+extern atomic_t nr_find_usage_backwards_recursions;
+# define debug_atomic_inc(ptr) atomic_inc(ptr)
+# define debug_atomic_dec(ptr) atomic_dec(ptr)
+# define debug_atomic_read(ptr) atomic_read(ptr)
+#else
+# define debug_atomic_inc(ptr) do { } while (0)
+# define debug_atomic_dec(ptr) do { } while (0)
+# define debug_atomic_read(ptr) 0
+#endif
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
new file mode 100644
index 00000000000..f6e72eaab3f
--- /dev/null
+++ b/kernel/lockdep_proc.c
@@ -0,0 +1,345 @@
+/*
+ * kernel/lockdep_proc.c
+ *
+ * Runtime locking correctness validator
+ *
+ * Started by Ingo Molnar:
+ *
+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Code for /proc/lockdep and /proc/lockdep_stats:
+ *
+ */
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/kallsyms.h>
+#include <linux/debug_locks.h>
+
+#include "lockdep_internals.h"
+
+static void *l_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct lock_class *class = v;
+
+ (*pos)++;
+
+ if (class->lock_entry.next != &all_lock_classes)
+ class = list_entry(class->lock_entry.next, struct lock_class,
+ lock_entry);
+ else
+ class = NULL;
+ m->private = class;
+
+ return class;
+}
+
+static void *l_start(struct seq_file *m, loff_t *pos)
+{
+ struct lock_class *class = m->private;
+
+ if (&class->lock_entry == all_lock_classes.next)
+ seq_printf(m, "all lock classes:\n");
+
+ return class;
+}
+
+static void l_stop(struct seq_file *m, void *v)
+{
+}
+
+static unsigned long count_forward_deps(struct lock_class *class)
+{
+ struct lock_list *entry;
+ unsigned long ret = 1;
+
+ /*
+ * Recurse this class's dependency list:
+ */
+ list_for_each_entry(entry, &class->locks_after, entry)
+ ret += count_forward_deps(entry->class);
+
+ return ret;
+}
+
+static unsigned long count_backward_deps(struct lock_class *class)
+{
+ struct lock_list *entry;
+ unsigned long ret = 1;
+
+ /*
+ * Recurse this class's dependency list:
+ */
+ list_for_each_entry(entry, &class->locks_before, entry)
+ ret += count_backward_deps(entry->class);
+
+ return ret;
+}
+
+static int l_show(struct seq_file *m, void *v)
+{
+ unsigned long nr_forward_deps, nr_backward_deps;
+ struct lock_class *class = m->private;
+ char str[128], c1, c2, c3, c4;
+ const char *name;
+
+ seq_printf(m, "%p", class->key);
+#ifdef CONFIG_DEBUG_LOCKDEP
+ seq_printf(m, " OPS:%8ld", class->ops);
+#endif
+ nr_forward_deps = count_forward_deps(class);
+ seq_printf(m, " FD:%5ld", nr_forward_deps);
+
+ nr_backward_deps = count_backward_deps(class);
+ seq_printf(m, " BD:%5ld", nr_backward_deps);
+
+ get_usage_chars(class, &c1, &c2, &c3, &c4);
+ seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
+
+ name = class->name;
+ if (!name) {
+ name = __get_key_name(class->key, str);
+ seq_printf(m, ": %s", name);
+ } else{
+ seq_printf(m, ": %s", name);
+ if (class->name_version > 1)
+ seq_printf(m, "#%d", class->name_version);
+ if (class->subclass)
+ seq_printf(m, "/%d", class->subclass);
+ }
+ seq_puts(m, "\n");
+
+ return 0;
+}
+
+static struct seq_operations lockdep_ops = {
+ .start = l_start,
+ .next = l_next,
+ .stop = l_stop,
+ .show = l_show,
+};
+
+static int lockdep_open(struct inode *inode, struct file *file)
+{
+ int res = seq_open(file, &lockdep_ops);
+ if (!res) {
+ struct seq_file *m = file->private_data;
+
+ if (!list_empty(&all_lock_classes))
+ m->private = list_entry(all_lock_classes.next,
+ struct lock_class, lock_entry);
+ else
+ m->private = NULL;
+ }
+ return res;
+}
+
+static struct file_operations proc_lockdep_operations = {
+ .open = lockdep_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static void lockdep_stats_debug_show(struct seq_file *m)
+{
+#ifdef CONFIG_DEBUG_LOCKDEP
+ unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
+ hi2 = debug_atomic_read(&hardirqs_off_events),
+ hr1 = debug_atomic_read(&redundant_hardirqs_on),
+ hr2 = debug_atomic_read(&redundant_hardirqs_off),
+ si1 = debug_atomic_read(&softirqs_on_events),
+ si2 = debug_atomic_read(&softirqs_off_events),
+ sr1 = debug_atomic_read(&redundant_softirqs_on),
+ sr2 = debug_atomic_read(&redundant_softirqs_off);
+
+ seq_printf(m, " chain lookup misses: %11u\n",
+ debug_atomic_read(&chain_lookup_misses));
+ seq_printf(m, " chain lookup hits: %11u\n",
+ debug_atomic_read(&chain_lookup_hits));
+ seq_printf(m, " cyclic checks: %11u\n",
+ debug_atomic_read(&nr_cyclic_checks));
+ seq_printf(m, " cyclic-check recursions: %11u\n",
+ debug_atomic_read(&nr_cyclic_check_recursions));
+ seq_printf(m, " find-mask forwards checks: %11u\n",
+ debug_atomic_read(&nr_find_usage_forwards_checks));
+ seq_printf(m, " find-mask forwards recursions: %11u\n",
+ debug_atomic_read(&nr_find_usage_forwards_recursions));
+ seq_printf(m, " find-mask backwards checks: %11u\n",
+ debug_atomic_read(&nr_find_usage_backwards_checks));
+ seq_printf(m, " find-mask backwards recursions:%11u\n",
+ debug_atomic_read(&nr_find_usage_backwards_recursions));
+
+ seq_printf(m, " hardirq on events: %11u\n", hi1);
+ seq_printf(m, " hardirq off events: %11u\n", hi2);
+ seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
+ seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
+ seq_printf(m, " softirq on events: %11u\n", si1);
+ seq_printf(m, " softirq off events: %11u\n", si2);
+ seq_printf(m, " redundant softirq ons: %11u\n", sr1);
+ seq_printf(m, " redundant softirq offs: %11u\n", sr2);
+#endif
+}
+
+static int lockdep_stats_show(struct seq_file *m, void *v)
+{
+ struct lock_class *class;
+ unsigned long nr_unused = 0, nr_uncategorized = 0,
+ nr_irq_safe = 0, nr_irq_unsafe = 0,
+ nr_softirq_safe = 0, nr_softirq_unsafe = 0,
+ nr_hardirq_safe = 0, nr_hardirq_unsafe = 0,
+ nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
+ nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
+ nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
+ sum_forward_deps = 0, factor = 0;
+
+ list_for_each_entry(class, &all_lock_classes, lock_entry) {
+
+ if (class->usage_mask == 0)
+ nr_unused++;
+ if (class->usage_mask == LOCKF_USED)
+ nr_uncategorized++;
+ if (class->usage_mask & LOCKF_USED_IN_IRQ)
+ nr_irq_safe++;
+ if (class->usage_mask & LOCKF_ENABLED_IRQS)
+ nr_irq_unsafe++;
+ if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+ nr_softirq_safe++;
+ if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
+ nr_softirq_unsafe++;
+ if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+ nr_hardirq_safe++;
+ if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
+ nr_hardirq_unsafe++;
+ if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
+ nr_irq_read_safe++;
+ if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
+ nr_irq_read_unsafe++;
+ if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
+ nr_softirq_read_safe++;
+ if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+ nr_softirq_read_unsafe++;
+ if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
+ nr_hardirq_read_safe++;
+ if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+ nr_hardirq_read_unsafe++;
+
+ sum_forward_deps += count_forward_deps(class);
+ }
+#ifdef CONFIG_LOCKDEP_DEBUG
+ DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
+#endif
+ seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
+ nr_lock_classes, MAX_LOCKDEP_KEYS);
+ seq_printf(m, " direct dependencies: %11lu [max: %lu]\n",
+ nr_list_entries, MAX_LOCKDEP_ENTRIES);
+ seq_printf(m, " indirect dependencies: %11lu\n",
+ sum_forward_deps);
+
+ /*
+ * Total number of dependencies:
+ *
+ * All irq-safe locks may nest inside irq-unsafe locks,
+ * plus all the other known dependencies:
+ */
+ seq_printf(m, " all direct dependencies: %11lu\n",
+ nr_irq_unsafe * nr_irq_safe +
+ nr_hardirq_unsafe * nr_hardirq_safe +
+ nr_list_entries);
+
+ /*
+ * Estimated factor between direct and indirect
+ * dependencies:
+ */
+ if (nr_list_entries)
+ factor = sum_forward_deps / nr_list_entries;
+
+ seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
+ nr_lock_chains, MAX_LOCKDEP_CHAINS);
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+ seq_printf(m, " in-hardirq chains: %11u\n",
+ nr_hardirq_chains);
+ seq_printf(m, " in-softirq chains: %11u\n",
+ nr_softirq_chains);
+#endif
+ seq_printf(m, " in-process chains: %11u\n",
+ nr_process_chains);
+ seq_printf(m, " stack-trace entries: %11lu [max: %lu]\n",
+ nr_stack_trace_entries, MAX_STACK_TRACE_ENTRIES);
+ seq_printf(m, " combined max dependencies: %11u\n",
+ (nr_hardirq_chains + 1) *
+ (nr_softirq_chains + 1) *
+ (nr_process_chains + 1)
+ );
+ seq_printf(m, " hardirq-safe locks: %11lu\n",
+ nr_hardirq_safe);
+ seq_printf(m, " hardirq-unsafe locks: %11lu\n",
+ nr_hardirq_unsafe);
+ seq_printf(m, " softirq-safe locks: %11lu\n",
+ nr_softirq_safe);
+ seq_printf(m, " softirq-unsafe locks: %11lu\n",
+ nr_softirq_unsafe);
+ seq_printf(m, " irq-safe locks: %11lu\n",
+ nr_irq_safe);
+ seq_printf(m, " irq-unsafe locks: %11lu\n",
+ nr_irq_unsafe);
+
+ seq_printf(m, " hardirq-read-safe locks: %11lu\n",
+ nr_hardirq_read_safe);
+ seq_printf(m, " hardirq-read-unsafe locks: %11lu\n",
+ nr_hardirq_read_unsafe);
+ seq_printf(m, " softirq-read-safe locks: %11lu\n",
+ nr_softirq_read_safe);
+ seq_printf(m, " softirq-read-unsafe locks: %11lu\n",
+ nr_softirq_read_unsafe);
+ seq_printf(m, " irq-read-safe locks: %11lu\n",
+ nr_irq_read_safe);
+ seq_printf(m, " irq-read-unsafe locks: %11lu\n",
+ nr_irq_read_unsafe);
+
+ seq_printf(m, " uncategorized locks: %11lu\n",
+ nr_uncategorized);
+ seq_printf(m, " unused locks: %11lu\n",
+ nr_unused);
+ seq_printf(m, " max locking depth: %11u\n",
+ max_lockdep_depth);
+ seq_printf(m, " max recursion depth: %11u\n",
+ max_recursion_depth);
+ lockdep_stats_debug_show(m);
+ seq_printf(m, " debug_locks: %11u\n",
+ debug_locks);
+
+ return 0;
+}
+
+static int lockdep_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lockdep_stats_show, NULL);
+}
+
+static struct file_operations proc_lockdep_stats_operations = {
+ .open = lockdep_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static int __init lockdep_proc_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ entry = create_proc_entry("lockdep", S_IRUSR, NULL);
+ if (entry)
+ entry->proc_fops = &proc_lockdep_operations;
+
+ entry = create_proc_entry("lockdep_stats", S_IRUSR, NULL);
+ if (entry)
+ entry->proc_fops = &proc_lockdep_stats_operations;
+
+ return 0;
+}
+
+__initcall(lockdep_proc_init);
+
diff --git a/kernel/module.c b/kernel/module.c
index 10e5b872adf..35e1b1f859d 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1,4 +1,4 @@
-/* Rewritten by Rusty Russell, on the backs of many others...
+/*
Copyright (C) 2002 Richard Henderson
Copyright (C) 2001 Rusty Russell, 2002 Rusty Russell IBM.
@@ -16,7 +16,6 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
#include <linux/init.h>
@@ -122,9 +121,17 @@ extern const struct kernel_symbol __start___ksymtab_gpl[];
extern const struct kernel_symbol __stop___ksymtab_gpl[];
extern const struct kernel_symbol __start___ksymtab_gpl_future[];
extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
+extern const struct kernel_symbol __start___ksymtab_unused[];
+extern const struct kernel_symbol __stop___ksymtab_unused[];
+extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
+extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
+extern const struct kernel_symbol __start___ksymtab_gpl_future[];
+extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
extern const unsigned long __start___kcrctab[];
extern const unsigned long __start___kcrctab_gpl[];
extern const unsigned long __start___kcrctab_gpl_future[];
+extern const unsigned long __start___kcrctab_unused[];
+extern const unsigned long __start___kcrctab_unused_gpl[];
#ifndef CONFIG_MODVERSIONS
#define symversion(base, idx) NULL
@@ -144,6 +151,17 @@ static const struct kernel_symbol *lookup_symbol(const char *name,
return NULL;
}
+static void printk_unused_warning(const char *name)
+{
+ printk(KERN_WARNING "Symbol %s is marked as UNUSED, "
+ "however this module is using it.\n", name);
+ printk(KERN_WARNING "This symbol will go away in the future.\n");
+ printk(KERN_WARNING "Please evalute if this is the right api to use, "
+ "and if it really is, submit a report the linux kernel "
+ "mailinglist together with submitting your code for "
+ "inclusion.\n");
+}
+
/* Find a symbol, return value, crc and module which owns it */
static unsigned long __find_symbol(const char *name,
struct module **owner,
@@ -186,6 +204,25 @@ static unsigned long __find_symbol(const char *name,
return ks->value;
}
+ ks = lookup_symbol(name, __start___ksymtab_unused,
+ __stop___ksymtab_unused);
+ if (ks) {
+ printk_unused_warning(name);
+ *crc = symversion(__start___kcrctab_unused,
+ (ks - __start___ksymtab_unused));
+ return ks->value;
+ }
+
+ if (gplok)
+ ks = lookup_symbol(name, __start___ksymtab_unused_gpl,
+ __stop___ksymtab_unused_gpl);
+ if (ks) {
+ printk_unused_warning(name);
+ *crc = symversion(__start___kcrctab_unused_gpl,
+ (ks - __start___ksymtab_unused_gpl));
+ return ks->value;
+ }
+
/* Now try modules. */
list_for_each_entry(mod, &modules, list) {
*owner = mod;
@@ -204,6 +241,23 @@ static unsigned long __find_symbol(const char *name,
return ks->value;
}
}
+ ks = lookup_symbol(name, mod->unused_syms, mod->unused_syms + mod->num_unused_syms);
+ if (ks) {
+ printk_unused_warning(name);
+ *crc = symversion(mod->unused_crcs, (ks - mod->unused_syms));
+ return ks->value;
+ }
+
+ if (gplok) {
+ ks = lookup_symbol(name, mod->unused_gpl_syms,
+ mod->unused_gpl_syms + mod->num_unused_gpl_syms);
+ if (ks) {
+ printk_unused_warning(name);
+ *crc = symversion(mod->unused_gpl_crcs,
+ (ks - mod->unused_gpl_syms));
+ return ks->value;
+ }
+ }
ks = lookup_symbol(name, mod->gpl_future_syms,
(mod->gpl_future_syms +
mod->num_gpl_future_syms));
@@ -1067,6 +1121,9 @@ static void free_module(struct module *mod)
if (mod->percpu)
percpu_modfree(mod->percpu);
+ /* Free lock-classes: */
+ lockdep_free_key_range(mod->module_core, mod->core_size);
+
/* Finally, free the core (containing the module structure) */
module_free(mod, mod->module_core);
}
@@ -1403,10 +1460,27 @@ static struct module *load_module(void __user *umod,
Elf_Ehdr *hdr;
Elf_Shdr *sechdrs;
char *secstrings, *args, *modmagic, *strtab = NULL;
- unsigned int i, symindex = 0, strindex = 0, setupindex, exindex,
- exportindex, modindex, obsparmindex, infoindex, gplindex,
- crcindex, gplcrcindex, versindex, pcpuindex, gplfutureindex,
- gplfuturecrcindex, unwindex = 0;
+ unsigned int i;
+ unsigned int symindex = 0;
+ unsigned int strindex = 0;
+ unsigned int setupindex;
+ unsigned int exindex;
+ unsigned int exportindex;
+ unsigned int modindex;
+ unsigned int obsparmindex;
+ unsigned int infoindex;
+ unsigned int gplindex;
+ unsigned int crcindex;
+ unsigned int gplcrcindex;
+ unsigned int versindex;
+ unsigned int pcpuindex;
+ unsigned int gplfutureindex;
+ unsigned int gplfuturecrcindex;
+ unsigned int unwindex = 0;
+ unsigned int unusedindex;
+ unsigned int unusedcrcindex;
+ unsigned int unusedgplindex;
+ unsigned int unusedgplcrcindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1487,9 +1561,13 @@ static struct module *load_module(void __user *umod,
exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
gplfutureindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl_future");
+ unusedindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused");
+ unusedgplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_unused_gpl");
crcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab");
gplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl");
gplfuturecrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_gpl_future");
+ unusedcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused");
+ unusedgplcrcindex = find_sec(hdr, sechdrs, secstrings, "__kcrctab_unused_gpl");
setupindex = find_sec(hdr, sechdrs, secstrings, "__param");
exindex = find_sec(hdr, sechdrs, secstrings, "__ex_table");
obsparmindex = find_sec(hdr, sechdrs, secstrings, "__obsparm");
@@ -1638,14 +1716,27 @@ static struct module *load_module(void __user *umod,
mod->gpl_crcs = (void *)sechdrs[gplcrcindex].sh_addr;
mod->num_gpl_future_syms = sechdrs[gplfutureindex].sh_size /
sizeof(*mod->gpl_future_syms);
+ mod->num_unused_syms = sechdrs[unusedindex].sh_size /
+ sizeof(*mod->unused_syms);
+ mod->num_unused_gpl_syms = sechdrs[unusedgplindex].sh_size /
+ sizeof(*mod->unused_gpl_syms);
mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
if (gplfuturecrcindex)
mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+ mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
+ if (unusedcrcindex)
+ mod->unused_crcs = (void *)sechdrs[unusedcrcindex].sh_addr;
+ mod->unused_gpl_syms = (void *)sechdrs[unusedgplindex].sh_addr;
+ if (unusedgplcrcindex)
+ mod->unused_crcs = (void *)sechdrs[unusedgplcrcindex].sh_addr;
+
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !crcindex) ||
(mod->num_gpl_syms && !gplcrcindex) ||
- (mod->num_gpl_future_syms && !gplfuturecrcindex)) {
+ (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
+ (mod->num_unused_syms && !unusedcrcindex) ||
+ (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
printk(KERN_WARNING "%s: No versions for exported symbols."
" Tainting kernel.\n", mod->name);
add_taint(TAINT_FORCED_MODULE);
@@ -2071,6 +2162,29 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
return e;
}
+/*
+ * Is this a valid module address?
+ */
+int is_module_address(unsigned long addr)
+{
+ unsigned long flags;
+ struct module *mod;
+
+ spin_lock_irqsave(&modlist_lock, flags);
+
+ list_for_each_entry(mod, &modules, list) {
+ if (within(addr, mod->module_core, mod->core_size)) {
+ spin_unlock_irqrestore(&modlist_lock, flags);
+ return 1;
+ }
+ }
+
+ spin_unlock_irqrestore(&modlist_lock, flags);
+
+ return 0;
+}
+
+
/* Is this a valid kernel address? We don't grab the lock: we are oopsing. */
struct module *__module_text_address(unsigned long addr)
{
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e38e4bac97c..e3203c654dd 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -20,367 +20,19 @@
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
#include "mutex-debug.h"
/*
- * We need a global lock when we walk through the multi-process
- * lock tree. Only used in the deadlock-debugging case.
- */
-DEFINE_SPINLOCK(debug_mutex_lock);
-
-/*
- * All locks held by all tasks, in a single global list:
- */
-LIST_HEAD(debug_mutex_held_locks);
-
-/*
- * In the debug case we carry the caller's instruction pointer into
- * other functions, but we dont want the function argument overhead
- * in the nondebug case - hence these macros:
- */
-#define __IP_DECL__ , unsigned long ip
-#define __IP__ , ip
-#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
-
-/*
- * "mutex debugging enabled" flag. We turn it off when we detect
- * the first problem because we dont want to recurse back
- * into the tracing code when doing error printk or
- * executing a BUG():
- */
-int debug_mutex_on = 1;
-
-static void printk_task(struct task_struct *p)
-{
- if (p)
- printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
- else
- printk("<none>");
-}
-
-static void printk_ti(struct thread_info *ti)
-{
- if (ti)
- printk_task(ti->task);
- else
- printk("<none>");
-}
-
-static void printk_task_short(struct task_struct *p)
-{
- if (p)
- printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
- else
- printk("<none>");
-}
-
-static void printk_lock(struct mutex *lock, int print_owner)
-{
- printk(" [%p] {%s}\n", lock, lock->name);
-
- if (print_owner && lock->owner) {
- printk(".. held by: ");
- printk_ti(lock->owner);
- printk("\n");
- }
- if (lock->owner) {
- printk("... acquired at: ");
- print_symbol("%s\n", lock->acquire_ip);
- }
-}
-
-/*
- * printk locks held by a task:
- */
-static void show_task_locks(struct task_struct *p)
-{
- switch (p->state) {
- case TASK_RUNNING: printk("R"); break;
- case TASK_INTERRUPTIBLE: printk("S"); break;
- case TASK_UNINTERRUPTIBLE: printk("D"); break;
- case TASK_STOPPED: printk("T"); break;
- case EXIT_ZOMBIE: printk("Z"); break;
- case EXIT_DEAD: printk("X"); break;
- default: printk("?"); break;
- }
- printk_task(p);
- if (p->blocked_on) {
- struct mutex *lock = p->blocked_on->lock;
-
- printk(" blocked on mutex:");
- printk_lock(lock, 1);
- } else
- printk(" (not blocked on mutex)\n");
-}
-
-/*
- * printk all locks held in the system (if filter == NULL),
- * or all locks belonging to a single task (if filter != NULL):
- */
-void show_held_locks(struct task_struct *filter)
-{
- struct list_head *curr, *cursor = NULL;
- struct mutex *lock;
- struct thread_info *t;
- unsigned long flags;
- int count = 0;
-
- if (filter) {
- printk("------------------------------\n");
- printk("| showing all locks held by: | (");
- printk_task_short(filter);
- printk("):\n");
- printk("------------------------------\n");
- } else {
- printk("---------------------------\n");
- printk("| showing all locks held: |\n");
- printk("---------------------------\n");
- }
-
- /*
- * Play safe and acquire the global trace lock. We
- * cannot printk with that lock held so we iterate
- * very carefully:
- */
-next:
- debug_spin_lock_save(&debug_mutex_lock, flags);
- list_for_each(curr, &debug_mutex_held_locks) {
- if (cursor && curr != cursor)
- continue;
- lock = list_entry(curr, struct mutex, held_list);
- t = lock->owner;
- if (filter && (t != filter->thread_info))
- continue;
- count++;
- cursor = curr->next;
- debug_spin_unlock_restore(&debug_mutex_lock, flags);
-
- printk("\n#%03d: ", count);
- printk_lock(lock, filter ? 0 : 1);
- goto next;
- }
- debug_spin_unlock_restore(&debug_mutex_lock, flags);
- printk("\n");
-}
-
-void mutex_debug_show_all_locks(void)
-{
- struct task_struct *g, *p;
- int count = 10;
- int unlock = 1;
-
- printk("\nShowing all blocking locks in the system:\n");
-
- /*
- * Here we try to get the tasklist_lock as hard as possible,
- * if not successful after 2 seconds we ignore it (but keep
- * trying). This is to enable a debug printout even if a
- * tasklist_lock-holding task deadlocks or crashes.
- */
-retry:
- if (!read_trylock(&tasklist_lock)) {
- if (count == 10)
- printk("hm, tasklist_lock locked, retrying... ");
- if (count) {
- count--;
- printk(" #%d", 10-count);
- mdelay(200);
- goto retry;
- }
- printk(" ignoring it.\n");
- unlock = 0;
- }
- if (count != 10)
- printk(" locked it.\n");
-
- do_each_thread(g, p) {
- show_task_locks(p);
- if (!unlock)
- if (read_trylock(&tasklist_lock))
- unlock = 1;
- } while_each_thread(g, p);
-
- printk("\n");
- show_held_locks(NULL);
- printk("=============================================\n\n");
-
- if (unlock)
- read_unlock(&tasklist_lock);
-}
-
-static void report_deadlock(struct task_struct *task, struct mutex *lock,
- struct mutex *lockblk, unsigned long ip)
-{
- printk("\n%s/%d is trying to acquire this lock:\n",
- current->comm, current->pid);
- printk_lock(lock, 1);
- printk("... trying at: ");
- print_symbol("%s\n", ip);
- show_held_locks(current);
-
- if (lockblk) {
- printk("but %s/%d is deadlocking current task %s/%d!\n\n",
- task->comm, task->pid, current->comm, current->pid);
- printk("\n%s/%d is blocked on this lock:\n",
- task->comm, task->pid);
- printk_lock(lockblk, 1);
-
- show_held_locks(task);
-
- printk("\n%s/%d's [blocked] stackdump:\n\n",
- task->comm, task->pid);
- show_stack(task, NULL);
- }
-
- printk("\n%s/%d's [current] stackdump:\n\n",
- current->comm, current->pid);
- dump_stack();
- mutex_debug_show_all_locks();
- printk("[ turning off deadlock detection. Please report this. ]\n\n");
- local_irq_disable();
-}
-
-/*
- * Recursively check for mutex deadlocks:
- */
-static int check_deadlock(struct mutex *lock, int depth,
- struct thread_info *ti, unsigned long ip)
-{
- struct mutex *lockblk;
- struct task_struct *task;
-
- if (!debug_mutex_on)
- return 0;
-
- ti = lock->owner;
- if (!ti)
- return 0;
-
- task = ti->task;
- lockblk = NULL;
- if (task->blocked_on)
- lockblk = task->blocked_on->lock;
-
- /* Self-deadlock: */
- if (current == task) {
- DEBUG_OFF();
- if (depth)
- return 1;
- printk("\n==========================================\n");
- printk( "[ BUG: lock recursion deadlock detected! |\n");
- printk( "------------------------------------------\n");
- report_deadlock(task, lock, NULL, ip);
- return 0;
- }
-
- /* Ugh, something corrupted the lock data structure? */
- if (depth > 20) {
- DEBUG_OFF();
- printk("\n===========================================\n");
- printk( "[ BUG: infinite lock dependency detected!? |\n");
- printk( "-------------------------------------------\n");
- report_deadlock(task, lock, lockblk, ip);
- return 0;
- }
-
- /* Recursively check for dependencies: */
- if (lockblk && check_deadlock(lockblk, depth+1, ti, ip)) {
- printk("\n============================================\n");
- printk( "[ BUG: circular locking deadlock detected! ]\n");
- printk( "--------------------------------------------\n");
- report_deadlock(task, lock, lockblk, ip);
- return 0;
- }
- return 0;
-}
-
-/*
- * Called when a task exits, this function checks whether the
- * task is holding any locks, and reports the first one if so:
- */
-void mutex_debug_check_no_locks_held(struct task_struct *task)
-{
- struct list_head *curr, *next;
- struct thread_info *t;
- unsigned long flags;
- struct mutex *lock;
-
- if (!debug_mutex_on)
- return;
-
- debug_spin_lock_save(&debug_mutex_lock, flags);
- list_for_each_safe(curr, next, &debug_mutex_held_locks) {
- lock = list_entry(curr, struct mutex, held_list);
- t = lock->owner;
- if (t != task->thread_info)
- continue;
- list_del_init(curr);
- DEBUG_OFF();
- debug_spin_unlock_restore(&debug_mutex_lock, flags);
-
- printk("BUG: %s/%d, lock held at task exit time!\n",
- task->comm, task->pid);
- printk_lock(lock, 1);
- if (lock->owner != task->thread_info)
- printk("exiting task is not even the owner??\n");
- return;
- }
- debug_spin_unlock_restore(&debug_mutex_lock, flags);
-}
-
-/*
- * Called when kernel memory is freed (or unmapped), or if a mutex
- * is destroyed or reinitialized - this code checks whether there is
- * any held lock in the memory range of <from> to <to>:
- */
-void mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
-{
- struct list_head *curr, *next;
- const void *to = from + len;
- unsigned long flags;
- struct mutex *lock;
- void *lock_addr;
-
- if (!debug_mutex_on)
- return;
-
- debug_spin_lock_save(&debug_mutex_lock, flags);
- list_for_each_safe(curr, next, &debug_mutex_held_locks) {
- lock = list_entry(curr, struct mutex, held_list);
- lock_addr = lock;
- if (lock_addr < from || lock_addr >= to)
- continue;
- list_del_init(curr);
- DEBUG_OFF();
- debug_spin_unlock_restore(&debug_mutex_lock, flags);
-
- printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
- current->comm, current->pid, lock, from, to);
- dump_stack();
- printk_lock(lock, 1);
- if (lock->owner != current_thread_info())
- printk("freeing task is not even the owner??\n");
- return;
- }
- debug_spin_unlock_restore(&debug_mutex_lock, flags);
-}
-
-/*
* Must be called with lock->wait_lock held.
*/
-void debug_mutex_set_owner(struct mutex *lock,
- struct thread_info *new_owner __IP_DECL__)
+void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
{
lock->owner = new_owner;
- DEBUG_WARN_ON(!list_empty(&lock->held_list));
- if (debug_mutex_on) {
- list_add_tail(&lock->held_list, &debug_mutex_held_locks);
- lock->acquire_ip = ip;
- }
}
-void debug_mutex_init_waiter(struct mutex_waiter *waiter)
+void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
{
memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
waiter->magic = waiter;
@@ -389,23 +41,23 @@ void debug_mutex_init_waiter(struct mutex_waiter *waiter)
void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter)
{
- SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
- DEBUG_WARN_ON(list_empty(&lock->wait_list));
- DEBUG_WARN_ON(waiter->magic != waiter);
- DEBUG_WARN_ON(list_empty(&waiter->list));
+ SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+ DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list));
+ DEBUG_LOCKS_WARN_ON(waiter->magic != waiter);
+ DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
}
void debug_mutex_free_waiter(struct mutex_waiter *waiter)
{
- DEBUG_WARN_ON(!list_empty(&waiter->list));
+ DEBUG_LOCKS_WARN_ON(!list_empty(&waiter->list));
memset(waiter, MUTEX_DEBUG_FREE, sizeof(*waiter));
}
void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
- struct thread_info *ti __IP_DECL__)
+ struct thread_info *ti)
{
- SMP_DEBUG_WARN_ON(!spin_is_locked(&lock->wait_lock));
- check_deadlock(lock, 0, ti, ip);
+ SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock));
+
/* Mark the current thread as blocked on the lock: */
ti->task->blocked_on = waiter;
waiter->lock = lock;
@@ -414,9 +66,9 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti)
{
- DEBUG_WARN_ON(list_empty(&waiter->list));
- DEBUG_WARN_ON(waiter->task != ti->task);
- DEBUG_WARN_ON(ti->task->blocked_on != waiter);
+ DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
+ DEBUG_LOCKS_WARN_ON(waiter->task != ti->task);
+ DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter);
ti->task->blocked_on = NULL;
list_del_init(&waiter->list);
@@ -425,24 +77,23 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
void debug_mutex_unlock(struct mutex *lock)
{
- DEBUG_WARN_ON(lock->magic != lock);
- DEBUG_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
- DEBUG_WARN_ON(lock->owner != current_thread_info());
- if (debug_mutex_on) {
- DEBUG_WARN_ON(list_empty(&lock->held_list));
- list_del_init(&lock->held_list);
- }
+ DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
+ DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+ DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
+ DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
}
-void debug_mutex_init(struct mutex *lock, const char *name)
+void debug_mutex_init(struct mutex *lock, const char *name,
+ struct lock_class_key *key)
{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
/*
* Make sure we are not reinitializing a held lock:
*/
- mutex_debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key);
+#endif
lock->owner = NULL;
- INIT_LIST_HEAD(&lock->held_list);
- lock->name = name;
lock->magic = lock;
}
@@ -456,7 +107,7 @@ void debug_mutex_init(struct mutex *lock, const char *name)
*/
void fastcall mutex_destroy(struct mutex *lock)
{
- DEBUG_WARN_ON(mutex_is_locked(lock));
+ DEBUG_LOCKS_WARN_ON(mutex_is_locked(lock));
lock->magic = NULL;
}
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index a5196c36a5f..babfbdfc534 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -10,110 +10,44 @@
* More details are in kernel/mutex-debug.c.
*/
-extern spinlock_t debug_mutex_lock;
-extern struct list_head debug_mutex_held_locks;
-extern int debug_mutex_on;
-
-/*
- * In the debug case we carry the caller's instruction pointer into
- * other functions, but we dont want the function argument overhead
- * in the nondebug case - hence these macros:
- */
-#define __IP_DECL__ , unsigned long ip
-#define __IP__ , ip
-#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
-
/*
* This must be called with lock->wait_lock held.
*/
-extern void debug_mutex_set_owner(struct mutex *lock,
- struct thread_info *new_owner __IP_DECL__);
+extern void
+debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
static inline void debug_mutex_clear_owner(struct mutex *lock)
{
lock->owner = NULL;
}
-extern void debug_mutex_init_waiter(struct mutex_waiter *waiter);
+extern void debug_mutex_lock_common(struct mutex *lock,
+ struct mutex_waiter *waiter);
extern void debug_mutex_wake_waiter(struct mutex *lock,
struct mutex_waiter *waiter);
extern void debug_mutex_free_waiter(struct mutex_waiter *waiter);
extern void debug_mutex_add_waiter(struct mutex *lock,
struct mutex_waiter *waiter,
- struct thread_info *ti __IP_DECL__);
+ struct thread_info *ti);
extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
struct thread_info *ti);
extern void debug_mutex_unlock(struct mutex *lock);
-extern void debug_mutex_init(struct mutex *lock, const char *name);
-
-#define debug_spin_lock_save(lock, flags) \
- do { \
- local_irq_save(flags); \
- if (debug_mutex_on) \
- spin_lock(lock); \
- } while (0)
-
-#define debug_spin_unlock_restore(lock, flags) \
- do { \
- if (debug_mutex_on) \
- spin_unlock(lock); \
- local_irq_restore(flags); \
- preempt_check_resched(); \
- } while (0)
+extern void debug_mutex_init(struct mutex *lock, const char *name,
+ struct lock_class_key *key);
#define spin_lock_mutex(lock, flags) \
do { \
struct mutex *l = container_of(lock, struct mutex, wait_lock); \
\
- DEBUG_WARN_ON(in_interrupt()); \
- debug_spin_lock_save(&debug_mutex_lock, flags); \
- spin_lock(lock); \
- DEBUG_WARN_ON(l->magic != l); \
+ DEBUG_LOCKS_WARN_ON(in_interrupt()); \
+ local_irq_save(flags); \
+ __raw_spin_lock(&(lock)->raw_lock); \
+ DEBUG_LOCKS_WARN_ON(l->magic != l); \
} while (0)
#define spin_unlock_mutex(lock, flags) \
do { \
- spin_unlock(lock); \
- debug_spin_unlock_restore(&debug_mutex_lock, flags); \
+ __raw_spin_unlock(&(lock)->raw_lock); \
+ local_irq_restore(flags); \
+ preempt_check_resched(); \
} while (0)
-
-#define DEBUG_OFF() \
-do { \
- if (debug_mutex_on) { \
- debug_mutex_on = 0; \
- console_verbose(); \
- if (spin_is_locked(&debug_mutex_lock)) \
- spin_unlock(&debug_mutex_lock); \
- } \
-} while (0)
-
-#define DEBUG_BUG() \
-do { \
- if (debug_mutex_on) { \
- DEBUG_OFF(); \
- BUG(); \
- } \
-} while (0)
-
-#define DEBUG_WARN_ON(c) \
-do { \
- if (unlikely(c && debug_mutex_on)) { \
- DEBUG_OFF(); \
- WARN_ON(1); \
- } \
-} while (0)
-
-# define DEBUG_BUG_ON(c) \
-do { \
- if (unlikely(c)) \
- DEBUG_BUG(); \
-} while (0)
-
-#ifdef CONFIG_SMP
-# define SMP_DEBUG_WARN_ON(c) DEBUG_WARN_ON(c)
-# define SMP_DEBUG_BUG_ON(c) DEBUG_BUG_ON(c)
-#else
-# define SMP_DEBUG_WARN_ON(c) do { } while (0)
-# define SMP_DEBUG_BUG_ON(c) do { } while (0)
-#endif
-
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 7043db21bbc..8c71cf72a49 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
/*
* In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -38,13 +39,14 @@
*
* It is not allowed to initialize an already locked mutex.
*/
-void fastcall __mutex_init(struct mutex *lock, const char *name)
+void
+__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
{
atomic_set(&lock->count, 1);
spin_lock_init(&lock->wait_lock);
INIT_LIST_HEAD(&lock->wait_list);
- debug_mutex_init(lock, name);
+ debug_mutex_init(lock, name, key);
}
EXPORT_SYMBOL(__mutex_init);
@@ -56,7 +58,7 @@ EXPORT_SYMBOL(__mutex_init);
* branch is predicted by the CPU as default-untaken.
*/
static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
+__mutex_lock_slowpath(atomic_t *lock_count);
/***
* mutex_lock - acquire the mutex
@@ -79,7 +81,7 @@ __mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__);
*
* This function is similar to (but not equivalent to) down().
*/
-void fastcall __sched mutex_lock(struct mutex *lock)
+void inline fastcall __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*
@@ -92,7 +94,7 @@ void fastcall __sched mutex_lock(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock);
static void fastcall noinline __sched
-__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__);
+__mutex_unlock_slowpath(atomic_t *lock_count);
/***
* mutex_unlock - release the mutex
@@ -120,18 +122,18 @@ EXPORT_SYMBOL(mutex_unlock);
* Lock a mutex (possibly interruptible), slowpath:
*/
static inline int __sched
-__mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
+__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
{
struct task_struct *task = current;
struct mutex_waiter waiter;
unsigned int old_val;
unsigned long flags;
- debug_mutex_init_waiter(&waiter);
-
spin_lock_mutex(&lock->wait_lock, flags);
- debug_mutex_add_waiter(lock, &waiter, task->thread_info, ip);
+ debug_mutex_lock_common(lock, &waiter);
+ mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ debug_mutex_add_waiter(lock, &waiter, task->thread_info);
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
@@ -158,6 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
if (unlikely(state == TASK_INTERRUPTIBLE &&
signal_pending(task))) {
mutex_remove_waiter(lock, &waiter, task->thread_info);
+ mutex_release(&lock->dep_map, 1, _RET_IP_);
spin_unlock_mutex(&lock->wait_lock, flags);
debug_mutex_free_waiter(&waiter);
@@ -173,7 +176,7 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
/* got the lock - rejoice! */
mutex_remove_waiter(lock, &waiter, task->thread_info);
- debug_mutex_set_owner(lock, task->thread_info __IP__);
+ debug_mutex_set_owner(lock, task->thread_info);
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
@@ -183,32 +186,40 @@ __mutex_lock_common(struct mutex *lock, long state __IP_DECL__)
debug_mutex_free_waiter(&waiter);
- DEBUG_WARN_ON(list_empty(&lock->held_list));
- DEBUG_WARN_ON(lock->owner != task->thread_info);
-
return 0;
}
static void fastcall noinline __sched
-__mutex_lock_slowpath(atomic_t *lock_count __IP_DECL__)
+__mutex_lock_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE __IP__);
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0);
+}
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+void __sched
+mutex_lock_nested(struct mutex *lock, unsigned int subclass)
+{
+ might_sleep();
+ __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass);
}
+EXPORT_SYMBOL_GPL(mutex_lock_nested);
+#endif
+
/*
* Release the lock, slowpath:
*/
-static fastcall noinline void
-__mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
+static fastcall inline void
+__mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
unsigned long flags;
- DEBUG_WARN_ON(lock->owner != current_thread_info());
-
spin_lock_mutex(&lock->wait_lock, flags);
+ mutex_release(&lock->dep_map, nested, _RET_IP_);
+ debug_mutex_unlock(lock);
/*
* some architectures leave the lock unlocked in the fastpath failure
@@ -218,8 +229,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
if (__mutex_slowpath_needs_to_unlock())
atomic_set(&lock->count, 1);
- debug_mutex_unlock(lock);
-
if (!list_empty(&lock->wait_list)) {
/* get the first entry from the wait-list: */
struct mutex_waiter *waiter =
@@ -237,11 +246,20 @@ __mutex_unlock_slowpath(atomic_t *lock_count __IP_DECL__)
}
/*
+ * Release the lock, slowpath:
+ */
+static fastcall noinline void
+__mutex_unlock_slowpath(atomic_t *lock_count)
+{
+ __mutex_unlock_common_slowpath(lock_count, 1);
+}
+
+/*
* Here come the less common (and hence less performance-critical) APIs:
* mutex_lock_interruptible() and mutex_trylock().
*/
static int fastcall noinline __sched
-__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__);
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count);
/***
* mutex_lock_interruptible - acquire the mutex, interruptable
@@ -264,11 +282,11 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock)
EXPORT_SYMBOL(mutex_lock_interruptible);
static int fastcall noinline __sched
-__mutex_lock_interruptible_slowpath(atomic_t *lock_count __IP_DECL__)
+__mutex_lock_interruptible_slowpath(atomic_t *lock_count)
{
struct mutex *lock = container_of(lock_count, struct mutex, count);
- return __mutex_lock_common(lock, TASK_INTERRUPTIBLE __IP__);
+ return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0);
}
/*
@@ -284,8 +302,10 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
spin_lock_mutex(&lock->wait_lock, flags);
prev = atomic_xchg(&lock->count, -1);
- if (likely(prev == 1))
- debug_mutex_set_owner(lock, current_thread_info() __RET_IP__);
+ if (likely(prev == 1)) {
+ debug_mutex_set_owner(lock, current_thread_info());
+ mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ }
/* Set it back to 0 if there are no waiters: */
if (likely(list_empty(&lock->wait_list)))
atomic_set(&lock->count, 0);
@@ -309,7 +329,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
* This function must not be used in interrupt context. The
* mutex must be released by the same task that acquired it.
*/
-int fastcall mutex_trylock(struct mutex *lock)
+int fastcall __sched mutex_trylock(struct mutex *lock)
{
return __mutex_fastpath_trylock(&lock->count,
__mutex_trylock_slowpath);
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 06918994725..a075dafbb29 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -16,22 +16,15 @@
#define mutex_remove_waiter(lock, waiter, ti) \
__list_del((waiter)->list.prev, (waiter)->list.next)
-#define DEBUG_WARN_ON(c) do { } while (0)
#define debug_mutex_set_owner(lock, new_owner) do { } while (0)
#define debug_mutex_clear_owner(lock) do { } while (0)
-#define debug_mutex_init_waiter(waiter) do { } while (0)
#define debug_mutex_wake_waiter(lock, waiter) do { } while (0)
#define debug_mutex_free_waiter(waiter) do { } while (0)
-#define debug_mutex_add_waiter(lock, waiter, ti, ip) do { } while (0)
+#define debug_mutex_add_waiter(lock, waiter, ti) do { } while (0)
#define debug_mutex_unlock(lock) do { } while (0)
-#define debug_mutex_init(lock, name) do { } while (0)
-
-/*
- * Return-address parameters/declarations. They are very useful for
- * debugging, but add overhead in the !DEBUG case - so we go the
- * trouble of using this not too elegant but zero-cost solution:
- */
-#define __IP_DECL__
-#define __IP__
-#define __RET_IP__
+#define debug_mutex_init(lock, name, key) do { } while (0)
+static inline void
+debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
+{
+}
diff --git a/kernel/panic.c b/kernel/panic.c
index cc2a4c9c36a..ab13f0f668b 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -8,7 +8,6 @@
* This function is used through-out the kernel (including mm and fs)
* to indicate a major problem.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/delay.h>
diff --git a/kernel/params.c b/kernel/params.c
index af43ecdc8d9..91aea7aa532 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -15,7 +15,6 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <linux/config.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/kernel/pid.c b/kernel/pid.c
index eeb836b65ca..93e212f2067 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -218,7 +218,7 @@ struct pid * fastcall find_pid(int nr)
return NULL;
}
-int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
+int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
{
struct pid_link *link;
struct pid *pid;
@@ -233,7 +233,7 @@ int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
return 0;
}
-void fastcall detach_pid(task_t *task, enum pid_type type)
+void fastcall detach_pid(struct task_struct *task, enum pid_type type)
{
struct pid_link *link;
struct pid *pid;
@@ -267,7 +267,7 @@ struct task_struct * fastcall pid_task(struct pid *pid, enum pid_type type)
/*
* Must be called under rcu_read_lock() or with tasklist_lock read-held.
*/
-task_t *find_task_by_pid_type(int type, int nr)
+struct task_struct *find_task_by_pid_type(int type, int nr)
{
return pid_task(find_pid(nr), type);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 857b4fa0912..ae44a70aae8 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -100,18 +100,6 @@ config PM_STD_PARTITION
suspended image to. It will simply pick the first available swap
device.
-config SWSUSP_ENCRYPT
- bool "Encrypt suspend image"
- depends on SOFTWARE_SUSPEND && CRYPTO=y && (CRYPTO_AES=y || CRYPTO_AES_586=y || CRYPTO_AES_X86_64=y)
- default ""
- ---help---
- To prevent data gathering from swap after resume you can encrypt
- the suspend image with a temporary key that is deleted on
- resume.
-
- Note that the temporary key is stored unencrypted on disk while the
- system is suspended.
-
config SUSPEND_SMP
bool
depends on HOTPLUG_CPU && X86 && PM
diff --git a/kernel/printk.c b/kernel/printk.c
index 95b7fe17f12..bdba5d80496 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -26,7 +26,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/interrupt.h> /* For in_interrupt() */
-#include <linux/config.h>
#include <linux/delay.h>
#include <linux/smp.h>
#include <linux/security.h>
@@ -519,7 +518,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
zap_locks();
/* This stops the holder of console_sem just where we want him */
- spin_lock_irqsave(&logbuf_lock, flags);
+ local_irq_save(flags);
+ lockdep_off();
+ spin_lock(&logbuf_lock);
printk_cpu = smp_processor_id();
/* Emit the output into the temporary buffer */
@@ -589,7 +590,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
*/
console_locked = 1;
printk_cpu = UINT_MAX;
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ spin_unlock(&logbuf_lock);
/*
* Console drivers may assume that per-cpu resources have
@@ -605,6 +606,8 @@ asmlinkage int vprintk(const char *fmt, va_list args)
console_locked = 0;
up(&console_sem);
}
+ lockdep_on();
+ local_irq_restore(flags);
} else {
/*
* Someone else owns the drivers. We drop the spinlock, which
@@ -612,7 +615,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
* console drivers with the output which we just produced.
*/
printk_cpu = UINT_MAX;
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ spin_unlock(&logbuf_lock);
+ lockdep_on();
+ local_irq_restore(flags);
}
preempt_enable();
@@ -810,8 +815,15 @@ void release_console_sem(void)
console_may_schedule = 0;
up(&console_sem);
spin_unlock_irqrestore(&logbuf_lock, flags);
- if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait))
- wake_up_interruptible(&log_wait);
+ if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
+ /*
+ * If we printk from within the lock dependency code,
+ * from within the scheduler code, then do not lock
+ * up due to self-recursion:
+ */
+ if (!lockdep_internal())
+ wake_up_interruptible(&log_wait);
+ }
}
EXPORT_SYMBOL(release_console_sem);
diff --git a/kernel/profile.c b/kernel/profile.c
index 5a730fdb1a2..d5bd75e7501 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -13,7 +13,6 @@
* to resolve timer interrupt livelocks, William Irwin, Oracle, 2004
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/profile.h>
#include <linux/bootmem.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 335c5b932e1..9a111f70145 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -28,7 +28,7 @@
*
* Must be called with the tasklist lock write-held.
*/
-void __ptrace_link(task_t *child, task_t *new_parent)
+void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
BUG_ON(!list_empty(&child->ptrace_list));
if (child->parent == new_parent)
@@ -46,7 +46,7 @@ void __ptrace_link(task_t *child, task_t *new_parent)
* TASK_TRACED, resume it now.
* Requires that irqs be disabled.
*/
-void ptrace_untrace(task_t *child)
+void ptrace_untrace(struct task_struct *child)
{
spin_lock(&child->sighand->siglock);
if (child->state == TASK_TRACED) {
@@ -65,7 +65,7 @@ void ptrace_untrace(task_t *child)
*
* Must be called with the tasklist lock write-held.
*/
-void __ptrace_unlink(task_t *child)
+void __ptrace_unlink(struct task_struct *child)
{
BUG_ON(!child->ptrace);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index f464f5ae3f1..759805c9859 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -53,13 +53,13 @@
static struct rcu_ctrlblk rcu_ctrlblk = {
.cur = -300,
.completed = -300,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
.cpumask = CPU_MASK_NONE,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.cur = -300,
.completed = -300,
- .lock = SPIN_LOCK_UNLOCKED,
+ .lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
.cpumask = CPU_MASK_NONE,
};
diff --git a/kernel/resource.c b/kernel/resource.c
index 2404f9b0bc4..129cf046e56 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -7,7 +7,6 @@
* Arbitrary resource management.
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/errno.h>
@@ -23,20 +22,18 @@
struct resource ioport_resource = {
.name = "PCI IO",
- .start = 0x0000,
+ .start = 0,
.end = IO_SPACE_LIMIT,
.flags = IORESOURCE_IO,
};
-
EXPORT_SYMBOL(ioport_resource);
struct resource iomem_resource = {
.name = "PCI mem",
- .start = 0UL,
- .end = ~0UL,
+ .start = 0,
+ .end = -1,
.flags = IORESOURCE_MEM,
};
-
EXPORT_SYMBOL(iomem_resource);
static DEFINE_RWLOCK(resource_lock);
@@ -83,10 +80,10 @@ static int r_show(struct seq_file *m, void *v)
for (depth = 0, p = r; depth < MAX_IORES_LEVEL; depth++, p = p->parent)
if (p->parent == root)
break;
- seq_printf(m, "%*s%0*lx-%0*lx : %s\n",
+ seq_printf(m, "%*s%0*llx-%0*llx : %s\n",
depth * 2, "",
- width, r->start,
- width, r->end,
+ width, (unsigned long long) r->start,
+ width, (unsigned long long) r->end,
r->name ? r->name : "<BAD>");
return 0;
}
@@ -151,8 +148,8 @@ __initcall(ioresources_init);
/* Return the conflict entry if you can't request it */
static struct resource * __request_resource(struct resource *root, struct resource *new)
{
- unsigned long start = new->start;
- unsigned long end = new->end;
+ resource_size_t start = new->start;
+ resource_size_t end = new->end;
struct resource *tmp, **p;
if (end < start)
@@ -274,11 +271,10 @@ int find_next_system_ram(struct resource *res)
* Find empty slot in the resource tree given range and alignment.
*/
static int find_resource(struct resource *root, struct resource *new,
- unsigned long size,
- unsigned long min, unsigned long max,
- unsigned long align,
+ resource_size_t size, resource_size_t min,
+ resource_size_t max, resource_size_t align,
void (*alignf)(void *, struct resource *,
- unsigned long, unsigned long),
+ resource_size_t, resource_size_t),
void *alignf_data)
{
struct resource *this = root->child;
@@ -320,11 +316,10 @@ static int find_resource(struct resource *root, struct resource *new,
* Allocate empty slot in the resource tree given range and alignment.
*/
int allocate_resource(struct resource *root, struct resource *new,
- unsigned long size,
- unsigned long min, unsigned long max,
- unsigned long align,
+ resource_size_t size, resource_size_t min,
+ resource_size_t max, resource_size_t align,
void (*alignf)(void *, struct resource *,
- unsigned long, unsigned long),
+ resource_size_t, resource_size_t),
void *alignf_data)
{
int err;
@@ -416,10 +411,10 @@ EXPORT_SYMBOL(insert_resource);
* arguments. Returns -EBUSY if it can't fit. Existing children of
* the resource are assumed to be immutable.
*/
-int adjust_resource(struct resource *res, unsigned long start, unsigned long size)
+int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size)
{
struct resource *tmp, *parent = res->parent;
- unsigned long end = start + size - 1;
+ resource_size_t end = start + size - 1;
int result = -EBUSY;
write_lock(&resource_lock);
@@ -466,7 +461,9 @@ EXPORT_SYMBOL(adjust_resource);
*
* Release-region releases a matching busy region.
*/
-struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
+struct resource * __request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n,
+ const char *name)
{
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
@@ -502,7 +499,8 @@ struct resource * __request_region(struct resource *parent, unsigned long start,
EXPORT_SYMBOL(__request_region);
-int __check_region(struct resource *parent, unsigned long start, unsigned long n)
+int __check_region(struct resource *parent, resource_size_t start,
+ resource_size_t n)
{
struct resource * res;
@@ -517,10 +515,11 @@ int __check_region(struct resource *parent, unsigned long start, unsigned long n
EXPORT_SYMBOL(__check_region);
-void __release_region(struct resource *parent, unsigned long start, unsigned long n)
+void __release_region(struct resource *parent, resource_size_t start,
+ resource_size_t n)
{
struct resource **p;
- unsigned long end;
+ resource_size_t end;
p = &parent->child;
end = start + n - 1;
@@ -549,7 +548,9 @@ void __release_region(struct resource *parent, unsigned long start, unsigned lon
write_unlock(&resource_lock);
- printk(KERN_WARNING "Trying to free nonexistent resource <%08lx-%08lx>\n", start, end);
+ printk(KERN_WARNING "Trying to free nonexistent resource "
+ "<%016llx-%016llx>\n", (unsigned long long)start,
+ (unsigned long long)end);
}
EXPORT_SYMBOL(__release_region);
diff --git a/kernel/rtmutex-debug.c b/kernel/rtmutex-debug.c
index 4aa8a2c9f45..0c1faa950af 100644
--- a/kernel/rtmutex-debug.c
+++ b/kernel/rtmutex-debug.c
@@ -26,6 +26,7 @@
#include <linux/interrupt.h>
#include <linux/plist.h>
#include <linux/fs.h>
+#include <linux/debug_locks.h>
#include "rtmutex_common.h"
@@ -45,8 +46,6 @@ do { \
console_verbose(); \
if (spin_is_locked(&current->pi_lock)) \
spin_unlock(&current->pi_lock); \
- if (spin_is_locked(&current->held_list_lock)) \
- spin_unlock(&current->held_list_lock); \
} \
} while (0)
@@ -97,7 +96,7 @@ void deadlock_trace_off(void)
rt_trace_on = 0;
}
-static void printk_task(task_t *p)
+static void printk_task(struct task_struct *p)
{
if (p)
printk("%16s:%5d [%p, %3d]", p->comm, p->pid, p, p->prio);
@@ -105,14 +104,6 @@ static void printk_task(task_t *p)
printk("<none>");
}
-static void printk_task_short(task_t *p)
-{
- if (p)
- printk("%s/%d [%p, %3d]", p->comm, p->pid, p, p->prio);
- else
- printk("<none>");
-}
-
static void printk_lock(struct rt_mutex *lock, int print_owner)
{
if (lock->name)
@@ -128,222 +119,6 @@ static void printk_lock(struct rt_mutex *lock, int print_owner)
printk_task(rt_mutex_owner(lock));
printk("\n");
}
- if (rt_mutex_owner(lock)) {
- printk("... acquired at: ");
- print_symbol("%s\n", lock->acquire_ip);
- }
-}
-
-static void printk_waiter(struct rt_mutex_waiter *w)
-{
- printk("-------------------------\n");
- printk("| waiter struct %p:\n", w);
- printk("| w->list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
- w->list_entry.plist.prio_list.prev, w->list_entry.plist.prio_list.next,
- w->list_entry.plist.node_list.prev, w->list_entry.plist.node_list.next,
- w->list_entry.prio);
- printk("| w->pi_list_entry: [DP:%p/%p|SP:%p/%p|PRI:%d]\n",
- w->pi_list_entry.plist.prio_list.prev, w->pi_list_entry.plist.prio_list.next,
- w->pi_list_entry.plist.node_list.prev, w->pi_list_entry.plist.node_list.next,
- w->pi_list_entry.prio);
- printk("\n| lock:\n");
- printk_lock(w->lock, 1);
- printk("| w->ti->task:\n");
- printk_task(w->task);
- printk("| blocked at: ");
- print_symbol("%s\n", w->ip);
- printk("-------------------------\n");
-}
-
-static void show_task_locks(task_t *p)
-{
- switch (p->state) {
- case TASK_RUNNING: printk("R"); break;
- case TASK_INTERRUPTIBLE: printk("S"); break;
- case TASK_UNINTERRUPTIBLE: printk("D"); break;
- case TASK_STOPPED: printk("T"); break;
- case EXIT_ZOMBIE: printk("Z"); break;
- case EXIT_DEAD: printk("X"); break;
- default: printk("?"); break;
- }
- printk_task(p);
- if (p->pi_blocked_on) {
- struct rt_mutex *lock = p->pi_blocked_on->lock;
-
- printk(" blocked on:");
- printk_lock(lock, 1);
- } else
- printk(" (not blocked)\n");
-}
-
-void rt_mutex_show_held_locks(task_t *task, int verbose)
-{
- struct list_head *curr, *cursor = NULL;
- struct rt_mutex *lock;
- task_t *t;
- unsigned long flags;
- int count = 0;
-
- if (!rt_trace_on)
- return;
-
- if (verbose) {
- printk("------------------------------\n");
- printk("| showing all locks held by: | (");
- printk_task_short(task);
- printk("):\n");
- printk("------------------------------\n");
- }
-
-next:
- spin_lock_irqsave(&task->held_list_lock, flags);
- list_for_each(curr, &task->held_list_head) {
- if (cursor && curr != cursor)
- continue;
- lock = list_entry(curr, struct rt_mutex, held_list_entry);
- t = rt_mutex_owner(lock);
- WARN_ON(t != task);
- count++;
- cursor = curr->next;
- spin_unlock_irqrestore(&task->held_list_lock, flags);
-
- printk("\n#%03d: ", count);
- printk_lock(lock, 0);
- goto next;
- }
- spin_unlock_irqrestore(&task->held_list_lock, flags);
-
- printk("\n");
-}
-
-void rt_mutex_show_all_locks(void)
-{
- task_t *g, *p;
- int count = 10;
- int unlock = 1;
-
- printk("\n");
- printk("----------------------\n");
- printk("| showing all tasks: |\n");
- printk("----------------------\n");
-
- /*
- * Here we try to get the tasklist_lock as hard as possible,
- * if not successful after 2 seconds we ignore it (but keep
- * trying). This is to enable a debug printout even if a
- * tasklist_lock-holding task deadlocks or crashes.
- */
-retry:
- if (!read_trylock(&tasklist_lock)) {
- if (count == 10)
- printk("hm, tasklist_lock locked, retrying... ");
- if (count) {
- count--;
- printk(" #%d", 10-count);
- mdelay(200);
- goto retry;
- }
- printk(" ignoring it.\n");
- unlock = 0;
- }
- if (count != 10)
- printk(" locked it.\n");
-
- do_each_thread(g, p) {
- show_task_locks(p);
- if (!unlock)
- if (read_trylock(&tasklist_lock))
- unlock = 1;
- } while_each_thread(g, p);
-
- printk("\n");
-
- printk("-----------------------------------------\n");
- printk("| showing all locks held in the system: |\n");
- printk("-----------------------------------------\n");
-
- do_each_thread(g, p) {
- rt_mutex_show_held_locks(p, 0);
- if (!unlock)
- if (read_trylock(&tasklist_lock))
- unlock = 1;
- } while_each_thread(g, p);
-
-
- printk("=============================================\n\n");
-
- if (unlock)
- read_unlock(&tasklist_lock);
-}
-
-void rt_mutex_debug_check_no_locks_held(task_t *task)
-{
- struct rt_mutex_waiter *w;
- struct list_head *curr;
- struct rt_mutex *lock;
-
- if (!rt_trace_on)
- return;
- if (!rt_prio(task->normal_prio) && rt_prio(task->prio)) {
- printk("BUG: PI priority boost leaked!\n");
- printk_task(task);
- printk("\n");
- }
- if (list_empty(&task->held_list_head))
- return;
-
- spin_lock(&task->pi_lock);
- plist_for_each_entry(w, &task->pi_waiters, pi_list_entry) {
- TRACE_OFF();
-
- printk("hm, PI interest held at exit time? Task:\n");
- printk_task(task);
- printk_waiter(w);
- return;
- }
- spin_unlock(&task->pi_lock);
-
- list_for_each(curr, &task->held_list_head) {
- lock = list_entry(curr, struct rt_mutex, held_list_entry);
-
- printk("BUG: %s/%d, lock held at task exit time!\n",
- task->comm, task->pid);
- printk_lock(lock, 1);
- if (rt_mutex_owner(lock) != task)
- printk("exiting task is not even the owner??\n");
- }
-}
-
-int rt_mutex_debug_check_no_locks_freed(const void *from, unsigned long len)
-{
- const void *to = from + len;
- struct list_head *curr;
- struct rt_mutex *lock;
- unsigned long flags;
- void *lock_addr;
-
- if (!rt_trace_on)
- return 0;
-
- spin_lock_irqsave(&current->held_list_lock, flags);
- list_for_each(curr, &current->held_list_head) {
- lock = list_entry(curr, struct rt_mutex, held_list_entry);
- lock_addr = lock;
- if (lock_addr < from || lock_addr >= to)
- continue;
- TRACE_OFF();
-
- printk("BUG: %s/%d, active lock [%p(%p-%p)] freed!\n",
- current->comm, current->pid, lock, from, to);
- dump_stack();
- printk_lock(lock, 1);
- if (rt_mutex_owner(lock) != current)
- printk("freeing task is not even the owner??\n");
- return 1;
- }
- spin_unlock_irqrestore(&current->held_list_lock, flags);
-
- return 0;
}
void rt_mutex_debug_task_free(struct task_struct *task)
@@ -395,85 +170,41 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter)
current->comm, current->pid);
printk_lock(waiter->lock, 1);
- printk("... trying at: ");
- print_symbol("%s\n", waiter->ip);
-
printk("\n2) %s/%d is blocked on this lock:\n", task->comm, task->pid);
printk_lock(waiter->deadlock_lock, 1);
- rt_mutex_show_held_locks(current, 1);
- rt_mutex_show_held_locks(task, 1);
+ debug_show_held_locks(current);
+ debug_show_held_locks(task);
printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task->pid);
show_stack(task, NULL);
printk("\n%s/%d's [current] stackdump:\n\n",
current->comm, current->pid);
dump_stack();
- rt_mutex_show_all_locks();
+ debug_show_all_locks();
+
printk("[ turning off deadlock detection."
"Please report this trace. ]\n\n");
local_irq_disable();
}
-void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__)
+void debug_rt_mutex_lock(struct rt_mutex *lock)
{
- unsigned long flags;
-
- if (rt_trace_on) {
- TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
-
- spin_lock_irqsave(&current->held_list_lock, flags);
- list_add_tail(&lock->held_list_entry, &current->held_list_head);
- spin_unlock_irqrestore(&current->held_list_lock, flags);
-
- lock->acquire_ip = ip;
- }
}
void debug_rt_mutex_unlock(struct rt_mutex *lock)
{
- unsigned long flags;
-
- if (rt_trace_on) {
- TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
- TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
-
- spin_lock_irqsave(&current->held_list_lock, flags);
- list_del_init(&lock->held_list_entry);
- spin_unlock_irqrestore(&current->held_list_lock, flags);
- }
+ TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
}
-void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
- struct task_struct *powner __IP_DECL__)
+void
+debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner)
{
- unsigned long flags;
-
- if (rt_trace_on) {
- TRACE_WARN_ON_LOCKED(!list_empty(&lock->held_list_entry));
-
- spin_lock_irqsave(&powner->held_list_lock, flags);
- list_add_tail(&lock->held_list_entry, &powner->held_list_head);
- spin_unlock_irqrestore(&powner->held_list_lock, flags);
-
- lock->acquire_ip = ip;
- }
}
void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
- unsigned long flags;
-
- if (rt_trace_on) {
- struct task_struct *owner = rt_mutex_owner(lock);
-
- TRACE_WARN_ON_LOCKED(!owner);
- TRACE_WARN_ON_LOCKED(list_empty(&lock->held_list_entry));
-
- spin_lock_irqsave(&owner->held_list_lock, flags);
- list_del_init(&lock->held_list_entry);
- spin_unlock_irqrestore(&owner->held_list_lock, flags);
- }
+ TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
}
void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
@@ -493,17 +224,15 @@ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
void debug_rt_mutex_init(struct rt_mutex *lock, const char *name)
{
- void *addr = lock;
-
- if (rt_trace_on) {
- rt_mutex_debug_check_no_locks_freed(addr,
- sizeof(struct rt_mutex));
- INIT_LIST_HEAD(&lock->held_list_entry);
- lock->name = name;
- }
+ /*
+ * Make sure we are not reinitializing a held lock:
+ */
+ debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ lock->name = name;
}
-void rt_mutex_deadlock_account_lock(struct rt_mutex *lock, task_t *task)
+void
+rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
{
}
diff --git a/kernel/rtmutex-debug.h b/kernel/rtmutex-debug.h
index 7612fbc62d7..14193d596d7 100644
--- a/kernel/rtmutex-debug.h
+++ b/kernel/rtmutex-debug.h
@@ -9,20 +9,16 @@
* This file contains macros used solely by rtmutex.c. Debug version.
*/
-#define __IP_DECL__ , unsigned long ip
-#define __IP__ , ip
-#define __RET_IP__ , (unsigned long)__builtin_return_address(0)
-
extern void
rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task);
extern void rt_mutex_deadlock_account_unlock(struct task_struct *task);
extern void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter);
extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
-extern void debug_rt_mutex_lock(struct rt_mutex *lock __IP_DECL__);
+extern void debug_rt_mutex_lock(struct rt_mutex *lock);
extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
- struct task_struct *powner __IP_DECL__);
+ struct task_struct *powner);
extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
struct rt_mutex *lock);
diff --git a/kernel/rtmutex-tester.c b/kernel/rtmutex-tester.c
index e82c2f84824..494dac872a1 100644
--- a/kernel/rtmutex-tester.c
+++ b/kernel/rtmutex-tester.c
@@ -33,7 +33,7 @@ struct test_thread_data {
};
static struct test_thread_data thread_data[MAX_RT_TEST_THREADS];
-static task_t *threads[MAX_RT_TEST_THREADS];
+static struct task_struct *threads[MAX_RT_TEST_THREADS];
static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES];
enum test_opcodes {
@@ -361,8 +361,8 @@ static ssize_t sysfs_test_command(struct sys_device *dev, const char *buf,
static ssize_t sysfs_test_status(struct sys_device *dev, char *buf)
{
struct test_thread_data *td;
+ struct task_struct *tsk;
char *curr = buf;
- task_t *tsk;
int i;
td = container_of(dev, struct test_thread_data, sysdev);
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 45d61016da5..d2ef13b485e 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -157,12 +157,11 @@ int max_lock_depth = 1024;
* Decreases task's usage by one - may thus free the task.
* Returns 0 or -EDEADLK.
*/
-static int rt_mutex_adjust_prio_chain(task_t *task,
+static int rt_mutex_adjust_prio_chain(struct task_struct *task,
int deadlock_detect,
struct rt_mutex *orig_lock,
struct rt_mutex_waiter *orig_waiter,
- struct task_struct *top_task
- __IP_DECL__)
+ struct task_struct *top_task)
{
struct rt_mutex *lock;
struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter;
@@ -283,6 +282,7 @@ static int rt_mutex_adjust_prio_chain(task_t *task,
spin_unlock_irqrestore(&task->pi_lock, flags);
out_put_task:
put_task_struct(task);
+
return ret;
}
@@ -357,7 +357,7 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
*
* Must be called with lock->wait_lock held.
*/
-static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
+static int try_to_take_rt_mutex(struct rt_mutex *lock)
{
/*
* We have to be careful here if the atomic speedups are
@@ -384,7 +384,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
return 0;
/* We got the lock. */
- debug_rt_mutex_lock(lock __IP__);
+ debug_rt_mutex_lock(lock);
rt_mutex_set_owner(lock, current, 0);
@@ -402,13 +402,12 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock __IP_DECL__)
*/
static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
- int detect_deadlock
- __IP_DECL__)
+ int detect_deadlock)
{
+ struct task_struct *owner = rt_mutex_owner(lock);
struct rt_mutex_waiter *top_waiter = waiter;
- task_t *owner = rt_mutex_owner(lock);
- int boost = 0, res;
unsigned long flags;
+ int boost = 0, res;
spin_lock_irqsave(&current->pi_lock, flags);
__rt_mutex_adjust_prio(current);
@@ -454,7 +453,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
spin_unlock(&lock->wait_lock);
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
- current __IP__);
+ current);
spin_lock(&lock->wait_lock);
@@ -526,12 +525,12 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
* Must be called with lock->wait_lock held
*/
static void remove_waiter(struct rt_mutex *lock,
- struct rt_mutex_waiter *waiter __IP_DECL__)
+ struct rt_mutex_waiter *waiter)
{
int first = (waiter == rt_mutex_top_waiter(lock));
- int boost = 0;
- task_t *owner = rt_mutex_owner(lock);
+ struct task_struct *owner = rt_mutex_owner(lock);
unsigned long flags;
+ int boost = 0;
spin_lock_irqsave(&current->pi_lock, flags);
plist_del(&waiter->list_entry, &lock->wait_list);
@@ -568,7 +567,7 @@ static void remove_waiter(struct rt_mutex *lock,
spin_unlock(&lock->wait_lock);
- rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current __IP__);
+ rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
spin_lock(&lock->wait_lock);
}
@@ -595,7 +594,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
get_task_struct(task);
spin_unlock_irqrestore(&task->pi_lock, flags);
- rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task __RET_IP__);
+ rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
}
/*
@@ -604,7 +603,7 @@ void rt_mutex_adjust_pi(struct task_struct *task)
static int __sched
rt_mutex_slowlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- int detect_deadlock __IP_DECL__)
+ int detect_deadlock)
{
struct rt_mutex_waiter waiter;
int ret = 0;
@@ -615,7 +614,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
spin_lock(&lock->wait_lock);
/* Try to acquire the lock again: */
- if (try_to_take_rt_mutex(lock __IP__)) {
+ if (try_to_take_rt_mutex(lock)) {
spin_unlock(&lock->wait_lock);
return 0;
}
@@ -629,7 +628,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
for (;;) {
/* Try to acquire the lock: */
- if (try_to_take_rt_mutex(lock __IP__))
+ if (try_to_take_rt_mutex(lock))
break;
/*
@@ -653,7 +652,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
*/
if (!waiter.task) {
ret = task_blocks_on_rt_mutex(lock, &waiter,
- detect_deadlock __IP__);
+ detect_deadlock);
/*
* If we got woken up by the owner then start loop
* all over without going into schedule to try
@@ -680,7 +679,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
set_current_state(TASK_RUNNING);
if (unlikely(waiter.task))
- remove_waiter(lock, &waiter __IP__);
+ remove_waiter(lock, &waiter);
/*
* try_to_take_rt_mutex() sets the waiter bit
@@ -711,7 +710,7 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
* Slow path try-lock function:
*/
static inline int
-rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
+rt_mutex_slowtrylock(struct rt_mutex *lock)
{
int ret = 0;
@@ -719,7 +718,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lock __IP_DECL__)
if (likely(rt_mutex_owner(lock) != current)) {
- ret = try_to_take_rt_mutex(lock __IP__);
+ ret = try_to_take_rt_mutex(lock);
/*
* try_to_take_rt_mutex() sets the lock waiters
* bit unconditionally. Clean this up.
@@ -769,13 +768,13 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
int detect_deadlock,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- int detect_deadlock __IP_DECL__))
+ int detect_deadlock))
{
if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
- return slowfn(lock, state, NULL, detect_deadlock __RET_IP__);
+ return slowfn(lock, state, NULL, detect_deadlock);
}
static inline int
@@ -783,24 +782,24 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout, int detect_deadlock,
int (*slowfn)(struct rt_mutex *lock, int state,
struct hrtimer_sleeper *timeout,
- int detect_deadlock __IP_DECL__))
+ int detect_deadlock))
{
if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 0;
} else
- return slowfn(lock, state, timeout, detect_deadlock __RET_IP__);
+ return slowfn(lock, state, timeout, detect_deadlock);
}
static inline int
rt_mutex_fasttrylock(struct rt_mutex *lock,
- int (*slowfn)(struct rt_mutex *lock __IP_DECL__))
+ int (*slowfn)(struct rt_mutex *lock))
{
if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
rt_mutex_deadlock_account_lock(lock, current);
return 1;
}
- return slowfn(lock __RET_IP__);
+ return slowfn(lock);
}
static inline void
@@ -948,7 +947,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner)
{
__rt_mutex_init(lock, NULL);
- debug_rt_mutex_proxy_lock(lock, proxy_owner __RET_IP__);
+ debug_rt_mutex_proxy_lock(lock, proxy_owner);
rt_mutex_set_owner(lock, proxy_owner, 0);
rt_mutex_deadlock_account_lock(lock, proxy_owner);
}
diff --git a/kernel/rtmutex.h b/kernel/rtmutex.h
index 1e0fca13ff7..a1a1dd06421 100644
--- a/kernel/rtmutex.h
+++ b/kernel/rtmutex.h
@@ -10,9 +10,6 @@
* Non-debug version.
*/
-#define __IP_DECL__
-#define __IP__
-#define __RET_IP__
#define rt_mutex_deadlock_check(l) (0)
#define rt_mutex_deadlock_account_lock(m, t) do { } while (0)
#define rt_mutex_deadlock_account_unlock(l) do { } while (0)
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
new file mode 100644
index 00000000000..291ded556aa
--- /dev/null
+++ b/kernel/rwsem.c
@@ -0,0 +1,147 @@
+/* kernel/rwsem.c: R/W semaphores, public implementation
+ *
+ * Written by David Howells (dhowells@redhat.com).
+ * Derived from asm-i386/semaphore.h
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rwsem.h>
+
+#include <asm/system.h>
+#include <asm/atomic.h>
+
+/*
+ * lock for reading
+ */
+void down_read(struct rw_semaphore *sem)
+{
+ might_sleep();
+ rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
+
+ __down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read);
+
+/*
+ * trylock for reading -- returns 1 if successful, 0 if contention
+ */
+int down_read_trylock(struct rw_semaphore *sem)
+{
+ int ret = __down_read_trylock(sem);
+
+ if (ret == 1)
+ rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
+ return ret;
+}
+
+EXPORT_SYMBOL(down_read_trylock);
+
+/*
+ * lock for writing
+ */
+void down_write(struct rw_semaphore *sem)
+{
+ might_sleep();
+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+
+ __down_write(sem);
+}
+
+EXPORT_SYMBOL(down_write);
+
+/*
+ * trylock for writing -- returns 1 if successful, 0 if contention
+ */
+int down_write_trylock(struct rw_semaphore *sem)
+{
+ int ret = __down_write_trylock(sem);
+
+ if (ret == 1)
+ rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+ return ret;
+}
+
+EXPORT_SYMBOL(down_write_trylock);
+
+/*
+ * release a read lock
+ */
+void up_read(struct rw_semaphore *sem)
+{
+ rwsem_release(&sem->dep_map, 1, _RET_IP_);
+
+ __up_read(sem);
+}
+
+EXPORT_SYMBOL(up_read);
+
+/*
+ * release a write lock
+ */
+void up_write(struct rw_semaphore *sem)
+{
+ rwsem_release(&sem->dep_map, 1, _RET_IP_);
+
+ __up_write(sem);
+}
+
+EXPORT_SYMBOL(up_write);
+
+/*
+ * downgrade write lock to read lock
+ */
+void downgrade_write(struct rw_semaphore *sem)
+{
+ /*
+ * lockdep: a downgraded write will live on as a write
+ * dependency.
+ */
+ __downgrade_write(sem);
+}
+
+EXPORT_SYMBOL(downgrade_write);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+void down_read_nested(struct rw_semaphore *sem, int subclass)
+{
+ might_sleep();
+ rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
+
+ __down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read_nested);
+
+void down_read_non_owner(struct rw_semaphore *sem)
+{
+ might_sleep();
+
+ __down_read(sem);
+}
+
+EXPORT_SYMBOL(down_read_non_owner);
+
+void down_write_nested(struct rw_semaphore *sem, int subclass)
+{
+ might_sleep();
+ rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
+
+ __down_write_nested(sem, subclass);
+}
+
+EXPORT_SYMBOL(down_write_nested);
+
+void up_read_non_owner(struct rw_semaphore *sem)
+{
+ __up_read(sem);
+}
+
+EXPORT_SYMBOL(up_read_non_owner);
+
+#endif
+
+
diff --git a/kernel/sched.c b/kernel/sched.c
index 2629c1711fd..4ee400f9d56 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -30,6 +30,7 @@
#include <linux/capability.h>
#include <linux/completion.h>
#include <linux/kernel_stat.h>
+#include <linux/debug_locks.h>
#include <linux/security.h>
#include <linux/notifier.h>
#include <linux/profile.h>
@@ -178,20 +179,15 @@ static unsigned int static_prio_timeslice(int static_prio)
return SCALE_PRIO(DEF_TIMESLICE, static_prio);
}
-static inline unsigned int task_timeslice(task_t *p)
+static inline unsigned int task_timeslice(struct task_struct *p)
{
return static_prio_timeslice(p->static_prio);
}
-#define task_hot(p, now, sd) ((long long) ((now) - (p)->last_ran) \
- < (long long) (sd)->cache_hot_time)
-
/*
* These are the runqueue data structures:
*/
-typedef struct runqueue runqueue_t;
-
struct prio_array {
unsigned int nr_active;
DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
@@ -205,7 +201,7 @@ struct prio_array {
* (such as the load balancing or the thread migration code), lock
* acquire operations must be ordered by ascending &runqueue.
*/
-struct runqueue {
+struct rq {
spinlock_t lock;
/*
@@ -229,9 +225,9 @@ struct runqueue {
unsigned long expired_timestamp;
unsigned long long timestamp_last_tick;
- task_t *curr, *idle;
+ struct task_struct *curr, *idle;
struct mm_struct *prev_mm;
- prio_array_t *active, *expired, arrays[2];
+ struct prio_array *active, *expired, arrays[2];
int best_expired_prio;
atomic_t nr_iowait;
@@ -242,7 +238,7 @@ struct runqueue {
int active_balance;
int push_cpu;
- task_t *migration_thread;
+ struct task_struct *migration_thread;
struct list_head migration_queue;
#endif
@@ -265,9 +261,10 @@ struct runqueue {
unsigned long ttwu_cnt;
unsigned long ttwu_local;
#endif
+ struct lock_class_key rq_lock_key;
};
-static DEFINE_PER_CPU(struct runqueue, runqueues);
+static DEFINE_PER_CPU(struct rq, runqueues);
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
@@ -276,8 +273,8 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
* The domain tree of any CPU may only be accessed from within
* preempt-disabled sections.
*/
-#define for_each_domain(cpu, domain) \
-for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
+#define for_each_domain(cpu, __sd) \
+ for (__sd = rcu_dereference(cpu_rq(cpu)->sd); __sd; __sd = __sd->parent)
#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
#define this_rq() (&__get_cpu_var(runqueues))
@@ -292,26 +289,33 @@ for (domain = rcu_dereference(cpu_rq(cpu)->sd); domain; domain = domain->parent)
#endif
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
-static inline int task_running(runqueue_t *rq, task_t *p)
+static inline int task_running(struct rq *rq, struct task_struct *p)
{
return rq->curr == p;
}
-static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
}
-static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
{
#ifdef CONFIG_DEBUG_SPINLOCK
/* this is a valid case when another task releases the spinlock */
rq->lock.owner = current;
#endif
+ /*
+ * If we are tracking spinlock dependencies then we have to
+ * fix up the runqueue lock - which gets 'carried over' from
+ * prev into current:
+ */
+ spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+
spin_unlock_irq(&rq->lock);
}
#else /* __ARCH_WANT_UNLOCKED_CTXSW */
-static inline int task_running(runqueue_t *rq, task_t *p)
+static inline int task_running(struct rq *rq, struct task_struct *p)
{
#ifdef CONFIG_SMP
return p->oncpu;
@@ -320,7 +324,7 @@ static inline int task_running(runqueue_t *rq, task_t *p)
#endif
}
-static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
+static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
{
#ifdef CONFIG_SMP
/*
@@ -337,7 +341,7 @@ static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
#endif
}
-static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
+static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
{
#ifdef CONFIG_SMP
/*
@@ -358,10 +362,10 @@ static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
* __task_rq_lock - lock the runqueue a given task resides on.
* Must be called interrupts disabled.
*/
-static inline runqueue_t *__task_rq_lock(task_t *p)
+static inline struct rq *__task_rq_lock(struct task_struct *p)
__acquires(rq->lock)
{
- struct runqueue *rq;
+ struct rq *rq;
repeat_lock_task:
rq = task_rq(p);
@@ -378,10 +382,10 @@ repeat_lock_task:
* interrupts. Note the ordering: we can safely lookup the task_rq without
* explicitly disabling preemption.
*/
-static runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
+static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
__acquires(rq->lock)
{
- struct runqueue *rq;
+ struct rq *rq;
repeat_lock_task:
local_irq_save(*flags);
@@ -394,13 +398,13 @@ repeat_lock_task:
return rq;
}
-static inline void __task_rq_unlock(runqueue_t *rq)
+static inline void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
spin_unlock(&rq->lock);
}
-static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
+static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
__releases(rq->lock)
{
spin_unlock_irqrestore(&rq->lock, *flags);
@@ -420,7 +424,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
seq_printf(seq, "timestamp %lu\n", jiffies);
for_each_online_cpu(cpu) {
- runqueue_t *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu);
#ifdef CONFIG_SMP
struct sched_domain *sd;
int dcnt = 0;
@@ -507,10 +511,10 @@ struct file_operations proc_schedstat_operations = {
/*
* rq_lock - lock a given runqueue and disable interrupts.
*/
-static inline runqueue_t *this_rq_lock(void)
+static inline struct rq *this_rq_lock(void)
__acquires(rq->lock)
{
- runqueue_t *rq;
+ struct rq *rq;
local_irq_disable();
rq = this_rq();
@@ -535,7 +539,7 @@ static inline runqueue_t *this_rq_lock(void)
* long it was from the *first* time it was queued to the time that it
* finally hit a cpu.
*/
-static inline void sched_info_dequeued(task_t *t)
+static inline void sched_info_dequeued(struct task_struct *t)
{
t->sched_info.last_queued = 0;
}
@@ -545,10 +549,10 @@ static inline void sched_info_dequeued(task_t *t)
* long it was waiting to run. We also note when it began so that we
* can keep stats on how long its timeslice is.
*/
-static void sched_info_arrive(task_t *t)
+static void sched_info_arrive(struct task_struct *t)
{
unsigned long now = jiffies, diff = 0;
- struct runqueue *rq = task_rq(t);
+ struct rq *rq = task_rq(t);
if (t->sched_info.last_queued)
diff = now - t->sched_info.last_queued;
@@ -579,7 +583,7 @@ static void sched_info_arrive(task_t *t)
* the timestamp if it is already not set. It's assumed that
* sched_info_dequeued() will clear that stamp when appropriate.
*/
-static inline void sched_info_queued(task_t *t)
+static inline void sched_info_queued(struct task_struct *t)
{
if (!t->sched_info.last_queued)
t->sched_info.last_queued = jiffies;
@@ -589,9 +593,9 @@ static inline void sched_info_queued(task_t *t)
* Called when a process ceases being the active-running process, either
* voluntarily or involuntarily. Now we can calculate how long we ran.
*/
-static inline void sched_info_depart(task_t *t)
+static inline void sched_info_depart(struct task_struct *t)
{
- struct runqueue *rq = task_rq(t);
+ struct rq *rq = task_rq(t);
unsigned long diff = jiffies - t->sched_info.last_arrival;
t->sched_info.cpu_time += diff;
@@ -605,9 +609,10 @@ static inline void sched_info_depart(task_t *t)
* their time slice. (This may also be called when switching to or from
* the idle task.) We are only called when prev != next.
*/
-static inline void sched_info_switch(task_t *prev, task_t *next)
+static inline void
+sched_info_switch(struct task_struct *prev, struct task_struct *next)
{
- struct runqueue *rq = task_rq(prev);
+ struct rq *rq = task_rq(prev);
/*
* prev now departs the cpu. It's not interesting to record
@@ -628,7 +633,7 @@ static inline void sched_info_switch(task_t *prev, task_t *next)
/*
* Adding/removing a task to/from a priority array:
*/
-static void dequeue_task(struct task_struct *p, prio_array_t *array)
+static void dequeue_task(struct task_struct *p, struct prio_array *array)
{
array->nr_active--;
list_del(&p->run_list);
@@ -636,7 +641,7 @@ static void dequeue_task(struct task_struct *p, prio_array_t *array)
__clear_bit(p->prio, array->bitmap);
}
-static void enqueue_task(struct task_struct *p, prio_array_t *array)
+static void enqueue_task(struct task_struct *p, struct prio_array *array)
{
sched_info_queued(p);
list_add_tail(&p->run_list, array->queue + p->prio);
@@ -649,12 +654,13 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
* Put task to the end of the run list without the overhead of dequeue
* followed by enqueue.
*/
-static void requeue_task(struct task_struct *p, prio_array_t *array)
+static void requeue_task(struct task_struct *p, struct prio_array *array)
{
list_move_tail(&p->run_list, array->queue + p->prio);
}
-static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
+static inline void
+enqueue_task_head(struct task_struct *p, struct prio_array *array)
{
list_add(&p->run_list, array->queue + p->prio);
__set_bit(p->prio, array->bitmap);
@@ -677,7 +683,7 @@ static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
* Both properties are important to certain workloads.
*/
-static inline int __normal_prio(task_t *p)
+static inline int __normal_prio(struct task_struct *p)
{
int bonus, prio;
@@ -713,7 +719,7 @@ static inline int __normal_prio(task_t *p)
#define RTPRIO_TO_LOAD_WEIGHT(rp) \
(PRIO_TO_LOAD_WEIGHT(MAX_RT_PRIO) + LOAD_WEIGHT(rp))
-static void set_load_weight(task_t *p)
+static void set_load_weight(struct task_struct *p)
{
if (has_rt_policy(p)) {
#ifdef CONFIG_SMP
@@ -731,23 +737,25 @@ static void set_load_weight(task_t *p)
p->load_weight = PRIO_TO_LOAD_WEIGHT(p->static_prio);
}
-static inline void inc_raw_weighted_load(runqueue_t *rq, const task_t *p)
+static inline void
+inc_raw_weighted_load(struct rq *rq, const struct task_struct *p)
{
rq->raw_weighted_load += p->load_weight;
}
-static inline void dec_raw_weighted_load(runqueue_t *rq, const task_t *p)
+static inline void
+dec_raw_weighted_load(struct rq *rq, const struct task_struct *p)
{
rq->raw_weighted_load -= p->load_weight;
}
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
+static inline void inc_nr_running(struct task_struct *p, struct rq *rq)
{
rq->nr_running++;
inc_raw_weighted_load(rq, p);
}
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
+static inline void dec_nr_running(struct task_struct *p, struct rq *rq)
{
rq->nr_running--;
dec_raw_weighted_load(rq, p);
@@ -760,7 +768,7 @@ static inline void dec_nr_running(task_t *p, runqueue_t *rq)
* setprio syscalls, and whenever the interactivity
* estimator recalculates.
*/
-static inline int normal_prio(task_t *p)
+static inline int normal_prio(struct task_struct *p)
{
int prio;
@@ -778,7 +786,7 @@ static inline int normal_prio(task_t *p)
* interactivity modifiers. Will be RT if the task got
* RT-boosted. If not then it returns p->normal_prio.
*/
-static int effective_prio(task_t *p)
+static int effective_prio(struct task_struct *p)
{
p->normal_prio = normal_prio(p);
/*
@@ -794,9 +802,9 @@ static int effective_prio(task_t *p)
/*
* __activate_task - move a task to the runqueue.
*/
-static void __activate_task(task_t *p, runqueue_t *rq)
+static void __activate_task(struct task_struct *p, struct rq *rq)
{
- prio_array_t *target = rq->active;
+ struct prio_array *target = rq->active;
if (batch_task(p))
target = rq->expired;
@@ -807,7 +815,7 @@ static void __activate_task(task_t *p, runqueue_t *rq)
/*
* __activate_idle_task - move idle task to the _front_ of runqueue.
*/
-static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
+static inline void __activate_idle_task(struct task_struct *p, struct rq *rq)
{
enqueue_task_head(p, rq->active);
inc_nr_running(p, rq);
@@ -817,7 +825,7 @@ static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
* Recalculate p->normal_prio and p->prio after having slept,
* updating the sleep-average too:
*/
-static int recalc_task_prio(task_t *p, unsigned long long now)
+static int recalc_task_prio(struct task_struct *p, unsigned long long now)
{
/* Caller must always ensure 'now >= p->timestamp' */
unsigned long sleep_time = now - p->timestamp;
@@ -889,7 +897,7 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
* Update all the scheduling statistics stuff. (sleep average
* calculation, priority modifiers, etc.)
*/
-static void activate_task(task_t *p, runqueue_t *rq, int local)
+static void activate_task(struct task_struct *p, struct rq *rq, int local)
{
unsigned long long now;
@@ -897,7 +905,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
#ifdef CONFIG_SMP
if (!local) {
/* Compensate for drifting sched_clock */
- runqueue_t *this_rq = this_rq();
+ struct rq *this_rq = this_rq();
now = (now - this_rq->timestamp_last_tick)
+ rq->timestamp_last_tick;
}
@@ -936,7 +944,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
/*
* deactivate_task - remove a task from the runqueue.
*/
-static void deactivate_task(struct task_struct *p, runqueue_t *rq)
+static void deactivate_task(struct task_struct *p, struct rq *rq)
{
dec_nr_running(p, rq);
dequeue_task(p, p->array);
@@ -956,7 +964,7 @@ static void deactivate_task(struct task_struct *p, runqueue_t *rq)
#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
#endif
-static void resched_task(task_t *p)
+static void resched_task(struct task_struct *p)
{
int cpu;
@@ -977,7 +985,7 @@ static void resched_task(task_t *p)
smp_send_reschedule(cpu);
}
#else
-static inline void resched_task(task_t *p)
+static inline void resched_task(struct task_struct *p)
{
assert_spin_locked(&task_rq(p)->lock);
set_tsk_need_resched(p);
@@ -988,7 +996,7 @@ static inline void resched_task(task_t *p)
* task_curr - is this task currently executing on a CPU?
* @p: the task in question.
*/
-inline int task_curr(const task_t *p)
+inline int task_curr(const struct task_struct *p)
{
return cpu_curr(task_cpu(p)) == p;
}
@@ -1000,22 +1008,23 @@ unsigned long weighted_cpuload(const int cpu)
}
#ifdef CONFIG_SMP
-typedef struct {
+struct migration_req {
struct list_head list;
- task_t *task;
+ struct task_struct *task;
int dest_cpu;
struct completion done;
-} migration_req_t;
+};
/*
* The task's runqueue lock must be held.
* Returns true if you have to wait for migration thread.
*/
-static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
+static int
+migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
{
- runqueue_t *rq = task_rq(p);
+ struct rq *rq = task_rq(p);
/*
* If the task is not on a runqueue (and not running), then
@@ -1030,6 +1039,7 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
req->task = p;
req->dest_cpu = dest_cpu;
list_add(&req->list, &rq->migration_queue);
+
return 1;
}
@@ -1042,10 +1052,10 @@ static int migrate_task(task_t *p, int dest_cpu, migration_req_t *req)
* smp_call_function() if an IPI is sent by the same process we are
* waiting to become inactive.
*/
-void wait_task_inactive(task_t *p)
+void wait_task_inactive(struct task_struct *p)
{
unsigned long flags;
- runqueue_t *rq;
+ struct rq *rq;
int preempted;
repeat:
@@ -1076,7 +1086,7 @@ repeat:
* to another CPU then no harm is done and the purpose has been
* achieved as well.
*/
-void kick_process(task_t *p)
+void kick_process(struct task_struct *p)
{
int cpu;
@@ -1096,7 +1106,7 @@ void kick_process(task_t *p)
*/
static inline unsigned long source_load(int cpu, int type)
{
- runqueue_t *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu);
if (type == 0)
return rq->raw_weighted_load;
@@ -1110,7 +1120,7 @@ static inline unsigned long source_load(int cpu, int type)
*/
static inline unsigned long target_load(int cpu, int type)
{
- runqueue_t *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu);
if (type == 0)
return rq->raw_weighted_load;
@@ -1123,10 +1133,10 @@ static inline unsigned long target_load(int cpu, int type)
*/
static inline unsigned long cpu_avg_load_per_task(int cpu)
{
- runqueue_t *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu);
unsigned long n = rq->nr_running;
- return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
+ return n ? rq->raw_weighted_load / n : SCHED_LOAD_SCALE;
}
/*
@@ -1279,7 +1289,7 @@ nextlevel:
* Returns the CPU we should wake onto.
*/
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-static int wake_idle(int cpu, task_t *p)
+static int wake_idle(int cpu, struct task_struct *p)
{
cpumask_t tmp;
struct sched_domain *sd;
@@ -1302,7 +1312,7 @@ static int wake_idle(int cpu, task_t *p)
return cpu;
}
#else
-static inline int wake_idle(int cpu, task_t *p)
+static inline int wake_idle(int cpu, struct task_struct *p)
{
return cpu;
}
@@ -1322,15 +1332,15 @@ static inline int wake_idle(int cpu, task_t *p)
*
* returns failure only if the task is already active.
*/
-static int try_to_wake_up(task_t *p, unsigned int state, int sync)
+static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
{
int cpu, this_cpu, success = 0;
unsigned long flags;
long old_state;
- runqueue_t *rq;
+ struct rq *rq;
#ifdef CONFIG_SMP
- unsigned long load, this_load;
struct sched_domain *sd, *this_sd = NULL;
+ unsigned long load, this_load;
int new_cpu;
#endif
@@ -1480,15 +1490,14 @@ out:
return success;
}
-int fastcall wake_up_process(task_t *p)
+int fastcall wake_up_process(struct task_struct *p)
{
return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
}
-
EXPORT_SYMBOL(wake_up_process);
-int fastcall wake_up_state(task_t *p, unsigned int state)
+int fastcall wake_up_state(struct task_struct *p, unsigned int state)
{
return try_to_wake_up(p, state, 0);
}
@@ -1497,7 +1506,7 @@ int fastcall wake_up_state(task_t *p, unsigned int state)
* Perform scheduler related setup for a newly forked process p.
* p is forked by current.
*/
-void fastcall sched_fork(task_t *p, int clone_flags)
+void fastcall sched_fork(struct task_struct *p, int clone_flags)
{
int cpu = get_cpu();
@@ -1565,11 +1574,11 @@ void fastcall sched_fork(task_t *p, int clone_flags)
* that must be done for every newly created context, then puts the task
* on the runqueue and wakes it.
*/
-void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
+void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
{
+ struct rq *rq, *this_rq;
unsigned long flags;
int this_cpu, cpu;
- runqueue_t *rq, *this_rq;
rq = task_rq_lock(p, &flags);
BUG_ON(p->state != TASK_RUNNING);
@@ -1649,10 +1658,10 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
* artificially, because any timeslice recovered here
* was given away by the parent in the first place.)
*/
-void fastcall sched_exit(task_t *p)
+void fastcall sched_exit(struct task_struct *p)
{
unsigned long flags;
- runqueue_t *rq;
+ struct rq *rq;
/*
* If the child was a (relative-) CPU hog then decrease
@@ -1683,7 +1692,7 @@ void fastcall sched_exit(task_t *p)
* prepare_task_switch sets up locking and calls architecture specific
* hooks.
*/
-static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
+static inline void prepare_task_switch(struct rq *rq, struct task_struct *next)
{
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
@@ -1704,7 +1713,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
* with the lock held can cause deadlocks; see schedule() for
* details.)
*/
-static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
+static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
__releases(rq->lock)
{
struct mm_struct *mm = rq->prev_mm;
@@ -1742,10 +1751,11 @@ static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
* schedule_tail - first thing a freshly forked thread must call.
* @prev: the thread we just switched away from.
*/
-asmlinkage void schedule_tail(task_t *prev)
+asmlinkage void schedule_tail(struct task_struct *prev)
__releases(rq->lock)
{
- runqueue_t *rq = this_rq();
+ struct rq *rq = this_rq();
+
finish_task_switch(rq, prev);
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
/* In this case, finish_task_switch does not reenable preemption */
@@ -1759,8 +1769,9 @@ asmlinkage void schedule_tail(task_t *prev)
* context_switch - switch to the new MM and the new
* thread's register state.
*/
-static inline
-task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
+static inline struct task_struct *
+context_switch(struct rq *rq, struct task_struct *prev,
+ struct task_struct *next)
{
struct mm_struct *mm = next->mm;
struct mm_struct *oldmm = prev->active_mm;
@@ -1777,6 +1788,7 @@ task_t * context_switch(runqueue_t *rq, task_t *prev, task_t *next)
WARN_ON(rq->prev_mm);
rq->prev_mm = oldmm;
}
+ spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);
@@ -1857,12 +1869,21 @@ unsigned long nr_active(void)
#ifdef CONFIG_SMP
/*
+ * Is this task likely cache-hot:
+ */
+static inline int
+task_hot(struct task_struct *p, unsigned long long now, struct sched_domain *sd)
+{
+ return (long long)(now - p->last_ran) < (long long)sd->cache_hot_time;
+}
+
+/*
* double_rq_lock - safely lock two runqueues
*
* Note this does not disable interrupts like task_rq_lock,
* you need to do so manually before calling.
*/
-static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
+static void double_rq_lock(struct rq *rq1, struct rq *rq2)
__acquires(rq1->lock)
__acquires(rq2->lock)
{
@@ -1886,7 +1907,7 @@ static void double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
* Note this does not restore interrupts like task_rq_unlock,
* you need to do so manually after calling.
*/
-static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
__releases(rq1->lock)
__releases(rq2->lock)
{
@@ -1900,7 +1921,7 @@ static void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
/*
* double_lock_balance - lock the busiest runqueue, this_rq is locked already.
*/
-static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
+static void double_lock_balance(struct rq *this_rq, struct rq *busiest)
__releases(this_rq->lock)
__acquires(busiest->lock)
__acquires(this_rq->lock)
@@ -1921,11 +1942,11 @@ static void double_lock_balance(runqueue_t *this_rq, runqueue_t *busiest)
* allow dest_cpu, which will force the cpu onto dest_cpu. Then
* the cpu_allowed mask is restored.
*/
-static void sched_migrate_task(task_t *p, int dest_cpu)
+static void sched_migrate_task(struct task_struct *p, int dest_cpu)
{
- migration_req_t req;
- runqueue_t *rq;
+ struct migration_req req;
unsigned long flags;
+ struct rq *rq;
rq = task_rq_lock(p, &flags);
if (!cpu_isset(dest_cpu, p->cpus_allowed)
@@ -1936,11 +1957,13 @@ static void sched_migrate_task(task_t *p, int dest_cpu)
if (migrate_task(p, dest_cpu, &req)) {
/* Need to wait for migration thread (might exit: take ref). */
struct task_struct *mt = rq->migration_thread;
+
get_task_struct(mt);
task_rq_unlock(rq, &flags);
wake_up_process(mt);
put_task_struct(mt);
wait_for_completion(&req.done);
+
return;
}
out:
@@ -1964,9 +1987,9 @@ void sched_exec(void)
* pull_task - move a task from a remote runqueue to the local runqueue.
* Both runqueues must be locked.
*/
-static
-void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
- runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
+static void pull_task(struct rq *src_rq, struct prio_array *src_array,
+ struct task_struct *p, struct rq *this_rq,
+ struct prio_array *this_array, int this_cpu)
{
dequeue_task(p, src_array);
dec_nr_running(p, src_rq);
@@ -1987,7 +2010,7 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
* can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
*/
static
-int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
+int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
struct sched_domain *sd, enum idle_type idle,
int *all_pinned)
{
@@ -2019,6 +2042,7 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
}
#define rq_best_prio(rq) min((rq)->curr->prio, (rq)->best_expired_prio)
+
/*
* move_tasks tries to move up to max_nr_move tasks and max_load_move weighted
* load from busiest to this_rq, as part of a balancing operation within
@@ -2026,18 +2050,17 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
*
* Called with both runqueues locked.
*/
-static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
+static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum idle_type idle,
int *all_pinned)
{
- prio_array_t *array, *dst_array;
+ int idx, pulled = 0, pinned = 0, this_best_prio, best_prio,
+ best_prio_seen, skip_for_load;
+ struct prio_array *array, *dst_array;
struct list_head *head, *curr;
- int idx, pulled = 0, pinned = 0, this_best_prio, busiest_best_prio;
- int busiest_best_prio_seen;
- int skip_for_load; /* skip the task based on weighted load issues */
+ struct task_struct *tmp;
long rem_load_move;
- task_t *tmp;
if (max_nr_move == 0 || max_load_move == 0)
goto out;
@@ -2045,15 +2068,15 @@ static int move_tasks(runqueue_t *this_rq, int this_cpu, runqueue_t *busiest,
rem_load_move = max_load_move;
pinned = 1;
this_best_prio = rq_best_prio(this_rq);
- busiest_best_prio = rq_best_prio(busiest);
+ best_prio = rq_best_prio(busiest);
/*
* Enable handling of the case where there is more than one task
* with the best priority. If the current running task is one
- * of those with prio==busiest_best_prio we know it won't be moved
+ * of those with prio==best_prio we know it won't be moved
* and therefore it's safe to override the skip (based on load) of
* any task we find with that prio.
*/
- busiest_best_prio_seen = busiest_best_prio == busiest->curr->prio;
+ best_prio_seen = best_prio == busiest->curr->prio;
/*
* We first consider expired tasks. Those will likely not be
@@ -2089,7 +2112,7 @@ skip_bitmap:
head = array->queue + idx;
curr = head->prev;
skip_queue:
- tmp = list_entry(curr, task_t, run_list);
+ tmp = list_entry(curr, struct task_struct, run_list);
curr = curr->prev;
@@ -2100,10 +2123,11 @@ skip_queue:
*/
skip_for_load = tmp->load_weight > rem_load_move;
if (skip_for_load && idx < this_best_prio)
- skip_for_load = !busiest_best_prio_seen && idx == busiest_best_prio;
+ skip_for_load = !best_prio_seen && idx == best_prio;
if (skip_for_load ||
!can_migrate_task(tmp, busiest, this_cpu, sd, idle, &pinned)) {
- busiest_best_prio_seen |= idx == busiest_best_prio;
+
+ best_prio_seen |= idx == best_prio;
if (curr != head)
goto skip_queue;
idx++;
@@ -2146,8 +2170,8 @@ out:
/*
* find_busiest_group finds and returns the busiest CPU group within the
- * domain. It calculates and returns the amount of weighted load which should be
- * moved to restore balance via the imbalance parameter.
+ * domain. It calculates and returns the amount of weighted load which
+ * should be moved to restore balance via the imbalance parameter.
*/
static struct sched_group *
find_busiest_group(struct sched_domain *sd, int this_cpu,
@@ -2188,7 +2212,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
sum_weighted_load = sum_nr_running = avg_load = 0;
for_each_cpu_mask(i, group->cpumask) {
- runqueue_t *rq = cpu_rq(i);
+ struct rq *rq = cpu_rq(i);
if (*sd_idle && !idle_cpu(i))
*sd_idle = 0;
@@ -2269,7 +2293,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
* capacity but still has some space to pick up some load
* from other group and save more power
*/
- if (sum_nr_running <= group_capacity - 1)
+ if (sum_nr_running <= group_capacity - 1) {
if (sum_nr_running > leader_nr_running ||
(sum_nr_running == leader_nr_running &&
first_cpu(group->cpumask) >
@@ -2277,7 +2301,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
group_leader = group;
leader_nr_running = sum_nr_running;
}
-
+ }
group_next:
#endif
group = group->next;
@@ -2332,8 +2356,7 @@ group_next:
* moved
*/
if (*imbalance < busiest_load_per_task) {
- unsigned long pwr_now, pwr_move;
- unsigned long tmp;
+ unsigned long tmp, pwr_now, pwr_move;
unsigned int imbn;
small_imbalance:
@@ -2405,22 +2428,23 @@ ret:
/*
* find_busiest_queue - find the busiest runqueue among the cpus in group.
*/
-static runqueue_t *find_busiest_queue(struct sched_group *group,
- enum idle_type idle, unsigned long imbalance)
+static struct rq *
+find_busiest_queue(struct sched_group *group, enum idle_type idle,
+ unsigned long imbalance)
{
+ struct rq *busiest = NULL, *rq;
unsigned long max_load = 0;
- runqueue_t *busiest = NULL, *rqi;
int i;
for_each_cpu_mask(i, group->cpumask) {
- rqi = cpu_rq(i);
+ rq = cpu_rq(i);
- if (rqi->nr_running == 1 && rqi->raw_weighted_load > imbalance)
+ if (rq->nr_running == 1 && rq->raw_weighted_load > imbalance)
continue;
- if (rqi->raw_weighted_load > max_load) {
- max_load = rqi->raw_weighted_load;
- busiest = rqi;
+ if (rq->raw_weighted_load > max_load) {
+ max_load = rq->raw_weighted_load;
+ busiest = rq;
}
}
@@ -2433,22 +2457,24 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
*/
#define MAX_PINNED_INTERVAL 512
-#define minus_1_or_zero(n) ((n) > 0 ? (n) - 1 : 0)
+static inline unsigned long minus_1_or_zero(unsigned long n)
+{
+ return n > 0 ? n - 1 : 0;
+}
+
/*
* Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance.
*
* Called with this_rq unlocked.
*/
-static int load_balance(int this_cpu, runqueue_t *this_rq,
+static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum idle_type idle)
{
+ int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group;
- runqueue_t *busiest;
unsigned long imbalance;
- int nr_moved, all_pinned = 0;
- int active_balance = 0;
- int sd_idle = 0;
+ struct rq *busiest;
if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
!sched_smt_power_savings)
@@ -2482,8 +2508,8 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
*/
double_rq_lock(this_rq, busiest);
nr_moved = move_tasks(this_rq, this_cpu, busiest,
- minus_1_or_zero(busiest->nr_running),
- imbalance, sd, idle, &all_pinned);
+ minus_1_or_zero(busiest->nr_running),
+ imbalance, sd, idle, &all_pinned);
double_rq_unlock(this_rq, busiest);
/* All tasks on this runqueue were pinned by CPU affinity */
@@ -2556,7 +2582,8 @@ out_one_pinned:
(sd->balance_interval < sd->max_interval))
sd->balance_interval *= 2;
- if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+ if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+ !sched_smt_power_savings)
return -1;
return 0;
}
@@ -2568,11 +2595,11 @@ out_one_pinned:
* Called from schedule when this_rq is about to become idle (NEWLY_IDLE).
* this_rq is locked.
*/
-static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
- struct sched_domain *sd)
+static int
+load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
{
struct sched_group *group;
- runqueue_t *busiest = NULL;
+ struct rq *busiest = NULL;
unsigned long imbalance;
int nr_moved = 0;
int sd_idle = 0;
@@ -2618,9 +2645,11 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
out_balanced:
schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
- if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings)
+ if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
+ !sched_smt_power_savings)
return -1;
sd->nr_balance_failed = 0;
+
return 0;
}
@@ -2628,16 +2657,15 @@ out_balanced:
* idle_balance is called by schedule() if this_cpu is about to become
* idle. Attempts to pull tasks from other CPUs.
*/
-static void idle_balance(int this_cpu, runqueue_t *this_rq)
+static void idle_balance(int this_cpu, struct rq *this_rq)
{
struct sched_domain *sd;
for_each_domain(this_cpu, sd) {
if (sd->flags & SD_BALANCE_NEWIDLE) {
- if (load_balance_newidle(this_cpu, this_rq, sd)) {
- /* We've pulled tasks over so stop searching */
+ /* If we've pulled tasks over stop searching: */
+ if (load_balance_newidle(this_cpu, this_rq, sd))
break;
- }
}
}
}
@@ -2650,14 +2678,14 @@ static void idle_balance(int this_cpu, runqueue_t *this_rq)
*
* Called with busiest_rq locked.
*/
-static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
+static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
{
- struct sched_domain *sd;
- runqueue_t *target_rq;
int target_cpu = busiest_rq->push_cpu;
+ struct sched_domain *sd;
+ struct rq *target_rq;
+ /* Is there any task to move? */
if (busiest_rq->nr_running <= 1)
- /* no task to move */
return;
target_rq = cpu_rq(target_cpu);
@@ -2675,21 +2703,20 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
/* Search for an sd spanning us and the target CPU. */
for_each_domain(target_cpu, sd) {
if ((sd->flags & SD_LOAD_BALANCE) &&
- cpu_isset(busiest_cpu, sd->span))
+ cpu_isset(busiest_cpu, sd->span))
break;
}
- if (unlikely(sd == NULL))
- goto out;
-
- schedstat_inc(sd, alb_cnt);
+ if (likely(sd)) {
+ schedstat_inc(sd, alb_cnt);
- if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
- RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE, NULL))
- schedstat_inc(sd, alb_pushed);
- else
- schedstat_inc(sd, alb_failed);
-out:
+ if (move_tasks(target_rq, target_cpu, busiest_rq, 1,
+ RTPRIO_TO_LOAD_WEIGHT(100), sd, SCHED_IDLE,
+ NULL))
+ schedstat_inc(sd, alb_pushed);
+ else
+ schedstat_inc(sd, alb_failed);
+ }
spin_unlock(&target_rq->lock);
}
@@ -2702,23 +2729,27 @@ out:
* Balancing parameters are set up in arch_init_sched_domains.
*/
-/* Don't have all balancing operations going off at once */
-#define CPU_OFFSET(cpu) (HZ * cpu / NR_CPUS)
+/* Don't have all balancing operations going off at once: */
+static inline unsigned long cpu_offset(int cpu)
+{
+ return jiffies + cpu * HZ / NR_CPUS;
+}
-static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
- enum idle_type idle)
+static void
+rebalance_tick(int this_cpu, struct rq *this_rq, enum idle_type idle)
{
- unsigned long old_load, this_load;
- unsigned long j = jiffies + CPU_OFFSET(this_cpu);
+ unsigned long this_load, interval, j = cpu_offset(this_cpu);
struct sched_domain *sd;
- int i;
+ int i, scale;
this_load = this_rq->raw_weighted_load;
- /* Update our load */
- for (i = 0; i < 3; i++) {
- unsigned long new_load = this_load;
- int scale = 1 << i;
+
+ /* Update our load: */
+ for (i = 0, scale = 1; i < 3; i++, scale <<= 1) {
+ unsigned long old_load, new_load;
+
old_load = this_rq->cpu_load[i];
+ new_load = this_load;
/*
* Round up the averaging division if load is increasing. This
* prevents us from getting stuck on 9 if the load is 10, for
@@ -2730,8 +2761,6 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
}
for_each_domain(this_cpu, sd) {
- unsigned long interval;
-
if (!(sd->flags & SD_LOAD_BALANCE))
continue;
@@ -2761,17 +2790,18 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq,
/*
* on UP we do not need to balance between CPUs:
*/
-static inline void rebalance_tick(int cpu, runqueue_t *rq, enum idle_type idle)
+static inline void rebalance_tick(int cpu, struct rq *rq, enum idle_type idle)
{
}
-static inline void idle_balance(int cpu, runqueue_t *rq)
+static inline void idle_balance(int cpu, struct rq *rq)
{
}
#endif
-static inline int wake_priority_sleeper(runqueue_t *rq)
+static inline int wake_priority_sleeper(struct rq *rq)
{
int ret = 0;
+
#ifdef CONFIG_SCHED_SMT
spin_lock(&rq->lock);
/*
@@ -2795,25 +2825,26 @@ EXPORT_PER_CPU_SYMBOL(kstat);
* This is called on clock ticks and on context switches.
* Bank in p->sched_time the ns elapsed since the last tick or switch.
*/
-static inline void update_cpu_clock(task_t *p, runqueue_t *rq,
- unsigned long long now)
+static inline void
+update_cpu_clock(struct task_struct *p, struct rq *rq, unsigned long long now)
{
- unsigned long long last = max(p->timestamp, rq->timestamp_last_tick);
- p->sched_time += now - last;
+ p->sched_time += now - max(p->timestamp, rq->timestamp_last_tick);
}
/*
* Return current->sched_time plus any more ns on the sched_clock
* that have not yet been banked.
*/
-unsigned long long current_sched_time(const task_t *tsk)
+unsigned long long current_sched_time(const struct task_struct *p)
{
unsigned long long ns;
unsigned long flags;
+
local_irq_save(flags);
- ns = max(tsk->timestamp, task_rq(tsk)->timestamp_last_tick);
- ns = tsk->sched_time + (sched_clock() - ns);
+ ns = max(p->timestamp, task_rq(p)->timestamp_last_tick);
+ ns = p->sched_time + sched_clock() - ns;
local_irq_restore(flags);
+
return ns;
}
@@ -2827,11 +2858,16 @@ unsigned long long current_sched_time(const task_t *tsk)
* increasing number of running tasks. We also ignore the interactivity
* if a better static_prio task has expired:
*/
-#define EXPIRED_STARVING(rq) \
- ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
- (jiffies - (rq)->expired_timestamp >= \
- STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
- ((rq)->curr->static_prio > (rq)->best_expired_prio))
+static inline int expired_starving(struct rq *rq)
+{
+ if (rq->curr->static_prio > rq->best_expired_prio)
+ return 1;
+ if (!STARVATION_LIMIT || !rq->expired_timestamp)
+ return 0;
+ if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
+ return 1;
+ return 0;
+}
/*
* Account user cpu time to a process.
@@ -2864,7 +2900,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
- runqueue_t *rq = this_rq();
+ struct rq *rq = this_rq();
cputime64_t tmp;
p->stime = cputime_add(p->stime, cputime);
@@ -2894,7 +2930,7 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
{
struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
cputime64_t tmp = cputime_to_cputime64(steal);
- runqueue_t *rq = this_rq();
+ struct rq *rq = this_rq();
if (p == rq->idle) {
p->stime = cputime_add(p->stime, steal);
@@ -2915,10 +2951,10 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
*/
void scheduler_tick(void)
{
- int cpu = smp_processor_id();
- runqueue_t *rq = this_rq();
- task_t *p = current;
unsigned long long now = sched_clock();
+ struct task_struct *p = current;
+ int cpu = smp_processor_id();
+ struct rq *rq = cpu_rq(cpu);
update_cpu_clock(p, rq, now);
@@ -2968,7 +3004,7 @@ void scheduler_tick(void)
if (!rq->expired_timestamp)
rq->expired_timestamp = jiffies;
- if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
+ if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
enqueue_task(p, rq->expired);
if (p->static_prio < rq->best_expired_prio)
rq->best_expired_prio = p->static_prio;
@@ -3007,7 +3043,7 @@ out:
}
#ifdef CONFIG_SCHED_SMT
-static inline void wakeup_busy_runqueue(runqueue_t *rq)
+static inline void wakeup_busy_runqueue(struct rq *rq)
{
/* If an SMT runqueue is sleeping due to priority reasons wake it up */
if (rq->curr == rq->idle && rq->nr_running)
@@ -3033,7 +3069,7 @@ static void wake_sleeping_dependent(int this_cpu)
return;
for_each_cpu_mask(i, sd->span) {
- runqueue_t *smt_rq = cpu_rq(i);
+ struct rq *smt_rq = cpu_rq(i);
if (i == this_cpu)
continue;
@@ -3050,7 +3086,8 @@ static void wake_sleeping_dependent(int this_cpu)
* utilize, if another task runs on a sibling. This models the
* slowdown effect of other tasks running on siblings:
*/
-static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
+static inline unsigned long
+smt_slice(struct task_struct *p, struct sched_domain *sd)
{
return p->time_slice * (100 - sd->per_cpu_gain) / 100;
}
@@ -3061,7 +3098,8 @@ static inline unsigned long smt_slice(task_t *p, struct sched_domain *sd)
* acquire their lock. As we only trylock the normal locking order does not
* need to be obeyed.
*/
-static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
+static int
+dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
{
struct sched_domain *tmp, *sd = NULL;
int ret = 0, i;
@@ -3081,8 +3119,8 @@ static int dependent_sleeper(int this_cpu, runqueue_t *this_rq, task_t *p)
return 0;
for_each_cpu_mask(i, sd->span) {
- runqueue_t *smt_rq;
- task_t *smt_curr;
+ struct task_struct *smt_curr;
+ struct rq *smt_rq;
if (i == this_cpu)
continue;
@@ -3127,9 +3165,8 @@ unlock:
static inline void wake_sleeping_dependent(int this_cpu)
{
}
-
-static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq,
- task_t *p)
+static inline int
+dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
{
return 0;
}
@@ -3142,12 +3179,13 @@ void fastcall add_preempt_count(int val)
/*
* Underflow?
*/
- BUG_ON((preempt_count() < 0));
+ if (DEBUG_LOCKS_WARN_ON((preempt_count() < 0)))
+ return;
preempt_count() += val;
/*
* Spinlock count overflowing soon?
*/
- BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+ DEBUG_LOCKS_WARN_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
}
EXPORT_SYMBOL(add_preempt_count);
@@ -3156,11 +3194,15 @@ void fastcall sub_preempt_count(int val)
/*
* Underflow?
*/
- BUG_ON(val > preempt_count());
+ if (DEBUG_LOCKS_WARN_ON(val > preempt_count()))
+ return;
/*
* Is the spinlock portion underflowing?
*/
- BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK));
+ if (DEBUG_LOCKS_WARN_ON((val < PREEMPT_MASK) &&
+ !(preempt_count() & PREEMPT_MASK)))
+ return;
+
preempt_count() -= val;
}
EXPORT_SYMBOL(sub_preempt_count);
@@ -3178,14 +3220,14 @@ static inline int interactive_sleep(enum sleep_type sleep_type)
*/
asmlinkage void __sched schedule(void)
{
- long *switch_count;
- task_t *prev, *next;
- runqueue_t *rq;
- prio_array_t *array;
+ struct task_struct *prev, *next;
+ struct prio_array *array;
struct list_head *queue;
unsigned long long now;
unsigned long run_time;
int cpu, idx, new_prio;
+ long *switch_count;
+ struct rq *rq;
/*
* Test if we are atomic. Since do_exit() needs to call into
@@ -3275,7 +3317,7 @@ need_resched_nonpreemptible:
idx = sched_find_first_bit(array->bitmap);
queue = array->queue + idx;
- next = list_entry(queue->next, task_t, run_list);
+ next = list_entry(queue->next, struct task_struct, run_list);
if (!rt_task(next) && interactive_sleep(next->sleep_type)) {
unsigned long long delta = now - next->timestamp;
@@ -3338,7 +3380,6 @@ switch_tasks:
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
-
EXPORT_SYMBOL(schedule);
#ifdef CONFIG_PREEMPT
@@ -3383,7 +3424,6 @@ need_resched:
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;
}
-
EXPORT_SYMBOL(preempt_schedule);
/*
@@ -3432,10 +3472,8 @@ need_resched:
int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
void *key)
{
- task_t *p = curr->private;
- return try_to_wake_up(p, mode, sync);
+ return try_to_wake_up(curr->private, mode, sync);
}
-
EXPORT_SYMBOL(default_wake_function);
/*
@@ -3453,13 +3491,11 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, &q->task_list) {
- wait_queue_t *curr;
- unsigned flags;
- curr = list_entry(tmp, wait_queue_t, task_list);
- flags = curr->flags;
+ wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
+ unsigned flags = curr->flags;
+
if (curr->func(curr, mode, sync, key) &&
- (flags & WQ_FLAG_EXCLUSIVE) &&
- !--nr_exclusive)
+ (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
break;
}
}
@@ -3480,7 +3516,6 @@ void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
__wake_up_common(q, mode, nr_exclusive, 0, key);
spin_unlock_irqrestore(&q->lock, flags);
}
-
EXPORT_SYMBOL(__wake_up);
/*
@@ -3549,6 +3584,7 @@ EXPORT_SYMBOL(complete_all);
void fastcall __sched wait_for_completion(struct completion *x)
{
might_sleep();
+
spin_lock_irq(&x->wait.lock);
if (!x->done) {
DECLARE_WAITQUEUE(wait, current);
@@ -3693,7 +3729,6 @@ void fastcall __sched interruptible_sleep_on(wait_queue_head_t *q)
schedule();
SLEEP_ON_TAIL
}
-
EXPORT_SYMBOL(interruptible_sleep_on);
long fastcall __sched
@@ -3709,7 +3744,6 @@ interruptible_sleep_on_timeout(wait_queue_head_t *q, long timeout)
return timeout;
}
-
EXPORT_SYMBOL(interruptible_sleep_on_timeout);
void fastcall __sched sleep_on(wait_queue_head_t *q)
@@ -3722,7 +3756,6 @@ void fastcall __sched sleep_on(wait_queue_head_t *q)
schedule();
SLEEP_ON_TAIL
}
-
EXPORT_SYMBOL(sleep_on);
long fastcall __sched sleep_on_timeout(wait_queue_head_t *q, long timeout)
@@ -3752,11 +3785,11 @@ EXPORT_SYMBOL(sleep_on_timeout);
*
* Used by the rt_mutex code to implement priority inheritance logic.
*/
-void rt_mutex_setprio(task_t *p, int prio)
+void rt_mutex_setprio(struct task_struct *p, int prio)
{
+ struct prio_array *array;
unsigned long flags;
- prio_array_t *array;
- runqueue_t *rq;
+ struct rq *rq;
int oldprio;
BUG_ON(prio < 0 || prio > MAX_PRIO);
@@ -3793,12 +3826,12 @@ void rt_mutex_setprio(task_t *p, int prio)
#endif
-void set_user_nice(task_t *p, long nice)
+void set_user_nice(struct task_struct *p, long nice)
{
- unsigned long flags;
- prio_array_t *array;
- runqueue_t *rq;
+ struct prio_array *array;
int old_prio, delta;
+ unsigned long flags;
+ struct rq *rq;
if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
return;
@@ -3849,10 +3882,11 @@ EXPORT_SYMBOL(set_user_nice);
* @p: task
* @nice: nice value
*/
-int can_nice(const task_t *p, const int nice)
+int can_nice(const struct task_struct *p, const int nice)
{
/* convert nice value [19,-20] to rlimit style value [1,40] */
int nice_rlim = 20 - nice;
+
return (nice_rlim <= p->signal->rlim[RLIMIT_NICE].rlim_cur ||
capable(CAP_SYS_NICE));
}
@@ -3868,8 +3902,7 @@ int can_nice(const task_t *p, const int nice)
*/
asmlinkage long sys_nice(int increment)
{
- int retval;
- long nice;
+ long nice, retval;
/*
* Setpriority might change our priority at the same moment.
@@ -3908,7 +3941,7 @@ asmlinkage long sys_nice(int increment)
* RT tasks are offset by -200. Normal tasks are centered
* around 0, value goes from -16 to +15.
*/
-int task_prio(const task_t *p)
+int task_prio(const struct task_struct *p)
{
return p->prio - MAX_RT_PRIO;
}
@@ -3917,7 +3950,7 @@ int task_prio(const task_t *p)
* task_nice - return the nice value of a given task.
* @p: the task in question.
*/
-int task_nice(const task_t *p)
+int task_nice(const struct task_struct *p)
{
return TASK_NICE(p);
}
@@ -3936,7 +3969,7 @@ int idle_cpu(int cpu)
* idle_task - return the idle task for a given cpu.
* @cpu: the processor in question.
*/
-task_t *idle_task(int cpu)
+struct task_struct *idle_task(int cpu)
{
return cpu_rq(cpu)->idle;
}
@@ -3945,7 +3978,7 @@ task_t *idle_task(int cpu)
* find_process_by_pid - find a process with a matching PID value.
* @pid: the pid in question.
*/
-static inline task_t *find_process_by_pid(pid_t pid)
+static inline struct task_struct *find_process_by_pid(pid_t pid)
{
return pid ? find_task_by_pid(pid) : current;
}
@@ -3954,6 +3987,7 @@ static inline task_t *find_process_by_pid(pid_t pid)
static void __setscheduler(struct task_struct *p, int policy, int prio)
{
BUG_ON(p->array);
+
p->policy = policy;
p->rt_priority = prio;
p->normal_prio = normal_prio(p);
@@ -3977,11 +4011,10 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
int sched_setscheduler(struct task_struct *p, int policy,
struct sched_param *param)
{
- int retval;
- int oldprio, oldpolicy = -1;
- prio_array_t *array;
+ int retval, oldprio, oldpolicy = -1;
+ struct prio_array *array;
unsigned long flags;
- runqueue_t *rq;
+ struct rq *rq;
/* may grab non-irq protected spin_locks */
BUG_ON(in_interrupt());
@@ -4079,9 +4112,9 @@ EXPORT_SYMBOL_GPL(sched_setscheduler);
static int
do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
{
- int retval;
struct sched_param lparam;
struct task_struct *p;
+ int retval;
if (!param || pid < 0)
return -EINVAL;
@@ -4097,6 +4130,7 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
read_unlock_irq(&tasklist_lock);
retval = sched_setscheduler(p, policy, &lparam);
put_task_struct(p);
+
return retval;
}
@@ -4132,8 +4166,8 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
*/
asmlinkage long sys_sched_getscheduler(pid_t pid)
{
+ struct task_struct *p;
int retval = -EINVAL;
- task_t *p;
if (pid < 0)
goto out_nounlock;
@@ -4160,8 +4194,8 @@ out_nounlock:
asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param)
{
struct sched_param lp;
+ struct task_struct *p;
int retval = -EINVAL;
- task_t *p;
if (!param || pid < 0)
goto out_nounlock;
@@ -4194,9 +4228,9 @@ out_unlock:
long sched_setaffinity(pid_t pid, cpumask_t new_mask)
{
- task_t *p;
- int retval;
cpumask_t cpus_allowed;
+ struct task_struct *p;
+ int retval;
lock_cpu_hotplug();
read_lock(&tasklist_lock);
@@ -4282,8 +4316,8 @@ cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
long sched_getaffinity(pid_t pid, cpumask_t *mask)
{
+ struct task_struct *p;
int retval;
- task_t *p;
lock_cpu_hotplug();
read_lock(&tasklist_lock);
@@ -4342,9 +4376,8 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
*/
asmlinkage long sys_sched_yield(void)
{
- runqueue_t *rq = this_rq_lock();
- prio_array_t *array = current->array;
- prio_array_t *target = rq->expired;
+ struct rq *rq = this_rq_lock();
+ struct prio_array *array = current->array, *target = rq->expired;
schedstat_inc(rq, yld_cnt);
/*
@@ -4378,6 +4411,7 @@ asmlinkage long sys_sched_yield(void)
* no need to preempt or enable interrupts:
*/
__release(rq->lock);
+ spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
_raw_spin_unlock(&rq->lock);
preempt_enable_no_resched();
@@ -4386,7 +4420,16 @@ asmlinkage long sys_sched_yield(void)
return 0;
}
-static inline void __cond_resched(void)
+static inline int __resched_legal(void)
+{
+ if (unlikely(preempt_count()))
+ return 0;
+ if (unlikely(system_state != SYSTEM_RUNNING))
+ return 0;
+ return 1;
+}
+
+static void __cond_resched(void)
{
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
__might_sleep(__FILE__, __LINE__);
@@ -4396,10 +4439,6 @@ static inline void __cond_resched(void)
* PREEMPT_ACTIVE, which could trigger a second
* cond_resched() call.
*/
- if (unlikely(preempt_count()))
- return;
- if (unlikely(system_state != SYSTEM_RUNNING))
- return;
do {
add_preempt_count(PREEMPT_ACTIVE);
schedule();
@@ -4409,13 +4448,12 @@ static inline void __cond_resched(void)
int __sched cond_resched(void)
{
- if (need_resched()) {
+ if (need_resched() && __resched_legal()) {
__cond_resched();
return 1;
}
return 0;
}
-
EXPORT_SYMBOL(cond_resched);
/*
@@ -4436,7 +4474,8 @@ int cond_resched_lock(spinlock_t *lock)
ret = 1;
spin_lock(lock);
}
- if (need_resched()) {
+ if (need_resched() && __resched_legal()) {
+ spin_release(&lock->dep_map, 1, _THIS_IP_);
_raw_spin_unlock(lock);
preempt_enable_no_resched();
__cond_resched();
@@ -4445,25 +4484,24 @@ int cond_resched_lock(spinlock_t *lock)
}
return ret;
}
-
EXPORT_SYMBOL(cond_resched_lock);
int __sched cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
- if (need_resched()) {
- __local_bh_enable();
+ if (need_resched() && __resched_legal()) {
+ raw_local_irq_disable();
+ _local_bh_enable();
+ raw_local_irq_enable();
__cond_resched();
local_bh_disable();
return 1;
}
return 0;
}
-
EXPORT_SYMBOL(cond_resched_softirq);
-
/**
* yield - yield the current processor to other threads.
*
@@ -4475,7 +4513,6 @@ void __sched yield(void)
set_current_state(TASK_RUNNING);
sys_sched_yield();
}
-
EXPORT_SYMBOL(yield);
/*
@@ -4487,18 +4524,17 @@ EXPORT_SYMBOL(yield);
*/
void __sched io_schedule(void)
{
- struct runqueue *rq = &__raw_get_cpu_var(runqueues);
+ struct rq *rq = &__raw_get_cpu_var(runqueues);
atomic_inc(&rq->nr_iowait);
schedule();
atomic_dec(&rq->nr_iowait);
}
-
EXPORT_SYMBOL(io_schedule);
long __sched io_schedule_timeout(long timeout)
{
- struct runqueue *rq = &__raw_get_cpu_var(runqueues);
+ struct rq *rq = &__raw_get_cpu_var(runqueues);
long ret;
atomic_inc(&rq->nr_iowait);
@@ -4565,9 +4601,9 @@ asmlinkage long sys_sched_get_priority_min(int policy)
asmlinkage
long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval)
{
+ struct task_struct *p;
int retval = -EINVAL;
struct timespec t;
- task_t *p;
if (pid < 0)
goto out_nounlock;
@@ -4595,28 +4631,32 @@ out_unlock:
static inline struct task_struct *eldest_child(struct task_struct *p)
{
- if (list_empty(&p->children)) return NULL;
+ if (list_empty(&p->children))
+ return NULL;
return list_entry(p->children.next,struct task_struct,sibling);
}
static inline struct task_struct *older_sibling(struct task_struct *p)
{
- if (p->sibling.prev==&p->parent->children) return NULL;
+ if (p->sibling.prev==&p->parent->children)
+ return NULL;
return list_entry(p->sibling.prev,struct task_struct,sibling);
}
static inline struct task_struct *younger_sibling(struct task_struct *p)
{
- if (p->sibling.next==&p->parent->children) return NULL;
+ if (p->sibling.next==&p->parent->children)
+ return NULL;
return list_entry(p->sibling.next,struct task_struct,sibling);
}
-static void show_task(task_t *p)
+static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
+
+static void show_task(struct task_struct *p)
{
- task_t *relative;
- unsigned state;
+ struct task_struct *relative;
unsigned long free = 0;
- static const char *stat_nam[] = { "R", "S", "D", "T", "t", "Z", "X" };
+ unsigned state;
printk("%-13.13s ", p->comm);
state = p->state ? __ffs(p->state) + 1 : 0;
@@ -4667,7 +4707,7 @@ static void show_task(task_t *p)
void show_state(void)
{
- task_t *g, *p;
+ struct task_struct *g, *p;
#if (BITS_PER_LONG == 32)
printk("\n"
@@ -4689,7 +4729,7 @@ void show_state(void)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
- mutex_debug_show_all_locks();
+ debug_show_all_locks();
}
/**
@@ -4700,9 +4740,9 @@ void show_state(void)
* NOTE: this function does not set the idle thread's NEED_RESCHED
* flag, to make booting more robust.
*/
-void __devinit init_idle(task_t *idle, int cpu)
+void __devinit init_idle(struct task_struct *idle, int cpu)
{
- runqueue_t *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu);
unsigned long flags;
idle->timestamp = sched_clock();
@@ -4741,7 +4781,7 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
/*
* This is how migration works:
*
- * 1) we queue a migration_req_t structure in the source CPU's
+ * 1) we queue a struct migration_req structure in the source CPU's
* runqueue and wake up that CPU's migration thread.
* 2) we down() the locked semaphore => thread blocks.
* 3) migration thread wakes up (implicitly it forces the migrated
@@ -4763,12 +4803,12 @@ cpumask_t nohz_cpu_mask = CPU_MASK_NONE;
* task must not exit() & deallocate itself prematurely. The
* call is not atomic; no spinlocks may be held.
*/
-int set_cpus_allowed(task_t *p, cpumask_t new_mask)
+int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
{
+ struct migration_req req;
unsigned long flags;
+ struct rq *rq;
int ret = 0;
- migration_req_t req;
- runqueue_t *rq;
rq = task_rq_lock(p, &flags);
if (!cpus_intersects(new_mask, cpu_online_map)) {
@@ -4791,9 +4831,9 @@ int set_cpus_allowed(task_t *p, cpumask_t new_mask)
}
out:
task_rq_unlock(rq, &flags);
+
return ret;
}
-
EXPORT_SYMBOL_GPL(set_cpus_allowed);
/*
@@ -4809,7 +4849,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed);
*/
static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
{
- runqueue_t *rq_dest, *rq_src;
+ struct rq *rq_dest, *rq_src;
int ret = 0;
if (unlikely(cpu_is_offline(dest_cpu)))
@@ -4854,16 +4894,16 @@ out:
*/
static int migration_thread(void *data)
{
- runqueue_t *rq;
int cpu = (long)data;
+ struct rq *rq;
rq = cpu_rq(cpu);
BUG_ON(rq->migration_thread != current);
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
+ struct migration_req *req;
struct list_head *head;
- migration_req_t *req;
try_to_freeze();
@@ -4887,7 +4927,7 @@ static int migration_thread(void *data)
set_current_state(TASK_INTERRUPTIBLE);
continue;
}
- req = list_entry(head->next, migration_req_t, list);
+ req = list_entry(head->next, struct migration_req, list);
list_del_init(head->next);
spin_unlock(&rq->lock);
@@ -4912,28 +4952,28 @@ wait_to_die:
#ifdef CONFIG_HOTPLUG_CPU
/* Figure out where task on dead CPU should go, use force if neccessary. */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *tsk)
+static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
{
- runqueue_t *rq;
unsigned long flags;
- int dest_cpu;
cpumask_t mask;
+ struct rq *rq;
+ int dest_cpu;
restart:
/* On same node? */
mask = node_to_cpumask(cpu_to_node(dead_cpu));
- cpus_and(mask, mask, tsk->cpus_allowed);
+ cpus_and(mask, mask, p->cpus_allowed);
dest_cpu = any_online_cpu(mask);
/* On any allowed CPU? */
if (dest_cpu == NR_CPUS)
- dest_cpu = any_online_cpu(tsk->cpus_allowed);
+ dest_cpu = any_online_cpu(p->cpus_allowed);
/* No more Mr. Nice Guy. */
if (dest_cpu == NR_CPUS) {
- rq = task_rq_lock(tsk, &flags);
- cpus_setall(tsk->cpus_allowed);
- dest_cpu = any_online_cpu(tsk->cpus_allowed);
+ rq = task_rq_lock(p, &flags);
+ cpus_setall(p->cpus_allowed);
+ dest_cpu = any_online_cpu(p->cpus_allowed);
task_rq_unlock(rq, &flags);
/*
@@ -4941,12 +4981,12 @@ restart:
* kernel threads (both mm NULL), since they never
* leave kernel.
*/
- if (tsk->mm && printk_ratelimit())
+ if (p->mm && printk_ratelimit())
printk(KERN_INFO "process %d (%s) no "
"longer affine to cpu%d\n",
- tsk->pid, tsk->comm, dead_cpu);
+ p->pid, p->comm, dead_cpu);
}
- if (!__migrate_task(tsk, dead_cpu, dest_cpu))
+ if (!__migrate_task(p, dead_cpu, dest_cpu))
goto restart;
}
@@ -4957,9 +4997,9 @@ restart:
* their home CPUs. So we just add the counter to another CPU's counter,
* to keep the global sum constant after CPU-down:
*/
-static void migrate_nr_uninterruptible(runqueue_t *rq_src)
+static void migrate_nr_uninterruptible(struct rq *rq_src)
{
- runqueue_t *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
+ struct rq *rq_dest = cpu_rq(any_online_cpu(CPU_MASK_ALL));
unsigned long flags;
local_irq_save(flags);
@@ -4973,48 +5013,51 @@ static void migrate_nr_uninterruptible(runqueue_t *rq_src)
/* Run through task list and migrate tasks from the dead cpu. */
static void migrate_live_tasks(int src_cpu)
{
- struct task_struct *tsk, *t;
+ struct task_struct *p, *t;
write_lock_irq(&tasklist_lock);
- do_each_thread(t, tsk) {
- if (tsk == current)
+ do_each_thread(t, p) {
+ if (p == current)
continue;
- if (task_cpu(tsk) == src_cpu)
- move_task_off_dead_cpu(src_cpu, tsk);
- } while_each_thread(t, tsk);
+ if (task_cpu(p) == src_cpu)
+ move_task_off_dead_cpu(src_cpu, p);
+ } while_each_thread(t, p);
write_unlock_irq(&tasklist_lock);
}
/* Schedules idle task to be the next runnable task on current CPU.
* It does so by boosting its priority to highest possible and adding it to
- * the _front_ of runqueue. Used by CPU offline code.
+ * the _front_ of the runqueue. Used by CPU offline code.
*/
void sched_idle_next(void)
{
- int cpu = smp_processor_id();
- runqueue_t *rq = this_rq();
+ int this_cpu = smp_processor_id();
+ struct rq *rq = cpu_rq(this_cpu);
struct task_struct *p = rq->idle;
unsigned long flags;
/* cpu has to be offline */
- BUG_ON(cpu_online(cpu));
+ BUG_ON(cpu_online(this_cpu));
- /* Strictly not necessary since rest of the CPUs are stopped by now
- * and interrupts disabled on current cpu.
+ /*
+ * Strictly not necessary since rest of the CPUs are stopped by now
+ * and interrupts disabled on the current cpu.
*/
spin_lock_irqsave(&rq->lock, flags);
__setscheduler(p, SCHED_FIFO, MAX_RT_PRIO-1);
- /* Add idle task to _front_ of it's priority queue */
+
+ /* Add idle task to the _front_ of its priority queue: */
__activate_idle_task(p, rq);
spin_unlock_irqrestore(&rq->lock, flags);
}
-/* Ensures that the idle task is using init_mm right before its cpu goes
+/*
+ * Ensures that the idle task is using init_mm right before its cpu goes
* offline.
*/
void idle_task_exit(void)
@@ -5028,17 +5071,17 @@ void idle_task_exit(void)
mmdrop(mm);
}
-static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
+static void migrate_dead(unsigned int dead_cpu, struct task_struct *p)
{
- struct runqueue *rq = cpu_rq(dead_cpu);
+ struct rq *rq = cpu_rq(dead_cpu);
/* Must be exiting, otherwise would be on tasklist. */
- BUG_ON(tsk->exit_state != EXIT_ZOMBIE && tsk->exit_state != EXIT_DEAD);
+ BUG_ON(p->exit_state != EXIT_ZOMBIE && p->exit_state != EXIT_DEAD);
/* Cannot have done final schedule yet: would have vanished. */
- BUG_ON(tsk->flags & PF_DEAD);
+ BUG_ON(p->flags & PF_DEAD);
- get_task_struct(tsk);
+ get_task_struct(p);
/*
* Drop lock around migration; if someone else moves it,
@@ -5046,25 +5089,25 @@ static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
* fine.
*/
spin_unlock_irq(&rq->lock);
- move_task_off_dead_cpu(dead_cpu, tsk);
+ move_task_off_dead_cpu(dead_cpu, p);
spin_lock_irq(&rq->lock);
- put_task_struct(tsk);
+ put_task_struct(p);
}
/* release_task() removes task from tasklist, so we won't find dead tasks. */
static void migrate_dead_tasks(unsigned int dead_cpu)
{
- unsigned arr, i;
- struct runqueue *rq = cpu_rq(dead_cpu);
+ struct rq *rq = cpu_rq(dead_cpu);
+ unsigned int arr, i;
for (arr = 0; arr < 2; arr++) {
for (i = 0; i < MAX_PRIO; i++) {
struct list_head *list = &rq->arrays[arr].queue[i];
+
while (!list_empty(list))
- migrate_dead(dead_cpu,
- list_entry(list->next, task_t,
- run_list));
+ migrate_dead(dead_cpu, list_entry(list->next,
+ struct task_struct, run_list));
}
}
}
@@ -5074,14 +5117,13 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
* migration_call - callback that gets triggered when a CPU is added.
* Here we can start up the necessary migration thread for the new CPU.
*/
-static int __cpuinit migration_call(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+static int __cpuinit
+migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
- int cpu = (long)hcpu;
struct task_struct *p;
- struct runqueue *rq;
+ int cpu = (long)hcpu;
unsigned long flags;
+ struct rq *rq;
switch (action) {
case CPU_UP_PREPARE:
@@ -5096,10 +5138,12 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
task_rq_unlock(rq, &flags);
cpu_rq(cpu)->migration_thread = p;
break;
+
case CPU_ONLINE:
/* Strictly unneccessary, as first user will wake it. */
wake_up_process(cpu_rq(cpu)->migration_thread);
break;
+
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
if (!cpu_rq(cpu)->migration_thread)
@@ -5110,6 +5154,7 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
kthread_stop(cpu_rq(cpu)->migration_thread);
cpu_rq(cpu)->migration_thread = NULL;
break;
+
case CPU_DEAD:
migrate_live_tasks(cpu);
rq = cpu_rq(cpu);
@@ -5130,9 +5175,10 @@ static int __cpuinit migration_call(struct notifier_block *nfb,
* the requestors. */
spin_lock_irq(&rq->lock);
while (!list_empty(&rq->migration_queue)) {
- migration_req_t *req;
+ struct migration_req *req;
+
req = list_entry(rq->migration_queue.next,
- migration_req_t, list);
+ struct migration_req, list);
list_del_init(&req->list);
complete(&req->done);
}
@@ -5154,10 +5200,12 @@ static struct notifier_block __cpuinitdata migration_notifier = {
int __init migration_init(void)
{
void *cpu = (void *)(long)smp_processor_id();
- /* Start one for boot CPU. */
+
+ /* Start one for the boot CPU: */
migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
migration_call(&migration_notifier, CPU_ONLINE, cpu);
register_cpu_notifier(&migration_notifier);
+
return 0;
}
#endif
@@ -5253,7 +5301,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
} while (sd);
}
#else
-#define sched_domain_debug(sd, cpu) {}
+# define sched_domain_debug(sd, cpu) do { } while (0)
#endif
static int sd_degenerate(struct sched_domain *sd)
@@ -5279,8 +5327,8 @@ static int sd_degenerate(struct sched_domain *sd)
return 1;
}
-static int sd_parent_degenerate(struct sched_domain *sd,
- struct sched_domain *parent)
+static int
+sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
{
unsigned long cflags = sd->flags, pflags = parent->flags;
@@ -5313,7 +5361,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
*/
static void cpu_attach_domain(struct sched_domain *sd, int cpu)
{
- runqueue_t *rq = cpu_rq(cpu);
+ struct rq *rq = cpu_rq(cpu);
struct sched_domain *tmp;
/* Remove the sched domains which do not contribute to scheduling. */
@@ -5575,8 +5623,8 @@ static void touch_cache(void *__cache, unsigned long __size)
/*
* Measure the cache-cost of one task migration. Returns in units of nsec.
*/
-static unsigned long long measure_one(void *cache, unsigned long size,
- int source, int target)
+static unsigned long long
+measure_one(void *cache, unsigned long size, int source, int target)
{
cpumask_t mask, saved_mask;
unsigned long long t0, t1, t2, t3, cost;
@@ -5926,9 +5974,9 @@ static int find_next_best_node(int node, unsigned long *used_nodes)
*/
static cpumask_t sched_domain_node_span(int node)
{
- int i;
- cpumask_t span, nodemask;
DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+ cpumask_t span, nodemask;
+ int i;
cpus_clear(span);
bitmap_zero(used_nodes, MAX_NUMNODES);
@@ -5939,6 +5987,7 @@ static cpumask_t sched_domain_node_span(int node)
for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
int next_node = find_next_best_node(node, used_nodes);
+
nodemask = node_to_cpumask(next_node);
cpus_or(span, span, nodemask);
}
@@ -5948,19 +5997,23 @@ static cpumask_t sched_domain_node_span(int node)
#endif
int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
+
/*
- * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
- * can switch it on easily if needed.
+ * SMT sched-domains:
*/
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
static struct sched_group sched_group_cpus[NR_CPUS];
+
static int cpu_to_cpu_group(int cpu)
{
return cpu;
}
#endif
+/*
+ * multi-core sched-domains:
+ */
#ifdef CONFIG_SCHED_MC
static DEFINE_PER_CPU(struct sched_domain, core_domains);
static struct sched_group *sched_group_core_bycpu[NR_CPUS];
@@ -5980,9 +6033,10 @@ static int cpu_to_core_group(int cpu)
static DEFINE_PER_CPU(struct sched_domain, phys_domains);
static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
+
static int cpu_to_phys_group(int cpu)
{
-#if defined(CONFIG_SCHED_MC)
+#ifdef CONFIG_SCHED_MC
cpumask_t mask = cpu_coregroup_map(cpu);
return first_cpu(mask);
#elif defined(CONFIG_SCHED_SMT)
@@ -6528,6 +6582,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
{
int err = 0;
+
#ifdef CONFIG_SCHED_SMT
if (smt_capable())
err = sysfs_create_file(&cls->kset.kobj,
@@ -6547,7 +6602,8 @@ static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page)
{
return sprintf(page, "%u\n", sched_mc_power_savings);
}
-static ssize_t sched_mc_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
+static ssize_t sched_mc_power_savings_store(struct sys_device *dev,
+ const char *buf, size_t count)
{
return sched_power_savings_store(buf, count, 0);
}
@@ -6560,7 +6616,8 @@ static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page)
{
return sprintf(page, "%u\n", sched_smt_power_savings);
}
-static ssize_t sched_smt_power_savings_store(struct sys_device *dev, const char *buf, size_t count)
+static ssize_t sched_smt_power_savings_store(struct sys_device *dev,
+ const char *buf, size_t count)
{
return sched_power_savings_store(buf, count, 1);
}
@@ -6622,6 +6679,7 @@ int in_sched_functions(unsigned long addr)
{
/* Linker adds these: start and end of __sched functions */
extern char __sched_text_start[], __sched_text_end[];
+
return in_lock_functions(addr) ||
(addr >= (unsigned long)__sched_text_start
&& addr < (unsigned long)__sched_text_end);
@@ -6629,14 +6687,15 @@ int in_sched_functions(unsigned long addr)
void __init sched_init(void)
{
- runqueue_t *rq;
int i, j, k;
for_each_possible_cpu(i) {
- prio_array_t *array;
+ struct prio_array *array;
+ struct rq *rq;
rq = cpu_rq(i);
spin_lock_init(&rq->lock);
+ lockdep_set_class(&rq->lock, &rq->rq_lock_key);
rq->nr_running = 0;
rq->active = rq->arrays;
rq->expired = rq->arrays + 1;
@@ -6683,7 +6742,7 @@ void __init sched_init(void)
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
void __might_sleep(char *file, int line)
{
-#if defined(in_atomic)
+#ifdef in_atomic
static unsigned long prev_jiffy; /* ratelimiting */
if ((in_atomic() || irqs_disabled()) &&
@@ -6705,10 +6764,10 @@ EXPORT_SYMBOL(__might_sleep);
#ifdef CONFIG_MAGIC_SYSRQ
void normalize_rt_tasks(void)
{
+ struct prio_array *array;
struct task_struct *p;
- prio_array_t *array;
unsigned long flags;
- runqueue_t *rq;
+ struct rq *rq;
read_lock_irq(&tasklist_lock);
for_each_process(p) {
@@ -6752,7 +6811,7 @@ void normalize_rt_tasks(void)
*
* ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
*/
-task_t *curr_task(int cpu)
+struct task_struct *curr_task(int cpu)
{
return cpu_curr(cpu);
}
@@ -6772,7 +6831,7 @@ task_t *curr_task(int cpu)
*
* ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
*/
-void set_curr_task(int cpu, task_t *p)
+void set_curr_task(int cpu, struct task_struct *p)
{
cpu_curr(cpu) = p;
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 52adf53929f..7fe874d12fa 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -10,7 +10,6 @@
* to allow signals to be sent reliably.
*/
-#include <linux/config.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/smp_lock.h>
@@ -584,7 +583,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
&& !capable(CAP_KILL))
return error;
- error = security_task_kill(t, info, sig);
+ error = security_task_kill(t, info, sig, 0);
if (!error)
audit_signal_info(sig, t); /* Let audit system see the signal */
return error;
@@ -1107,7 +1106,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
/* like kill_proc_info(), but doesn't use uid/euid of "current" */
int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
- uid_t uid, uid_t euid)
+ uid_t uid, uid_t euid, u32 secid)
{
int ret = -EINVAL;
struct task_struct *p;
@@ -1127,6 +1126,9 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
ret = -EPERM;
goto out_unlock;
}
+ ret = security_task_kill(p, info, sig, secid);
+ if (ret)
+ goto out_unlock;
if (sig && p->sighand) {
unsigned long flags;
spin_lock_irqsave(&p->sighand->siglock, flags);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 8f03e3b89b5..215541e26c1 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -62,6 +62,119 @@ static inline void wakeup_softirqd(void)
}
/*
+ * This one is for softirq.c-internal use,
+ * where hardirqs are disabled legitimately:
+ */
+static void __local_bh_disable(unsigned long ip)
+{
+ unsigned long flags;
+
+ WARN_ON_ONCE(in_irq());
+
+ raw_local_irq_save(flags);
+ add_preempt_count(SOFTIRQ_OFFSET);
+ /*
+ * Were softirqs turned off above:
+ */
+ if (softirq_count() == SOFTIRQ_OFFSET)
+ trace_softirqs_off(ip);
+ raw_local_irq_restore(flags);
+}
+
+void local_bh_disable(void)
+{
+ __local_bh_disable((unsigned long)__builtin_return_address(0));
+}
+
+EXPORT_SYMBOL(local_bh_disable);
+
+void __local_bh_enable(void)
+{
+ WARN_ON_ONCE(in_irq());
+
+ /*
+ * softirqs should never be enabled by __local_bh_enable(),
+ * it always nests inside local_bh_enable() sections:
+ */
+ WARN_ON_ONCE(softirq_count() == SOFTIRQ_OFFSET);
+
+ sub_preempt_count(SOFTIRQ_OFFSET);
+}
+EXPORT_SYMBOL_GPL(__local_bh_enable);
+
+/*
+ * Special-case - softirqs can safely be enabled in
+ * cond_resched_softirq(), or by __do_softirq(),
+ * without processing still-pending softirqs:
+ */
+void _local_bh_enable(void)
+{
+ WARN_ON_ONCE(in_irq());
+ WARN_ON_ONCE(!irqs_disabled());
+
+ if (softirq_count() == SOFTIRQ_OFFSET)
+ trace_softirqs_on((unsigned long)__builtin_return_address(0));
+ sub_preempt_count(SOFTIRQ_OFFSET);
+}
+
+EXPORT_SYMBOL(_local_bh_enable);
+
+void local_bh_enable(void)
+{
+ unsigned long flags;
+
+ WARN_ON_ONCE(in_irq());
+ WARN_ON_ONCE(irqs_disabled());
+
+ local_irq_save(flags);
+ /*
+ * Are softirqs going to be turned on now:
+ */
+ if (softirq_count() == SOFTIRQ_OFFSET)
+ trace_softirqs_on((unsigned long)__builtin_return_address(0));
+ /*
+ * Keep preemption disabled until we are done with
+ * softirq processing:
+ */
+ sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+ if (unlikely(!in_interrupt() && local_softirq_pending()))
+ do_softirq();
+
+ dec_preempt_count();
+ local_irq_restore(flags);
+ preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable);
+
+void local_bh_enable_ip(unsigned long ip)
+{
+ unsigned long flags;
+
+ WARN_ON_ONCE(in_irq());
+
+ local_irq_save(flags);
+ /*
+ * Are softirqs going to be turned on now:
+ */
+ if (softirq_count() == SOFTIRQ_OFFSET)
+ trace_softirqs_on(ip);
+ /*
+ * Keep preemption disabled until we are done with
+ * softirq processing:
+ */
+ sub_preempt_count(SOFTIRQ_OFFSET - 1);
+
+ if (unlikely(!in_interrupt() && local_softirq_pending()))
+ do_softirq();
+
+ dec_preempt_count();
+ local_irq_restore(flags);
+ preempt_check_resched();
+}
+EXPORT_SYMBOL(local_bh_enable_ip);
+
+/*
* We restart softirq processing MAX_SOFTIRQ_RESTART times,
* and we fall back to softirqd after that.
*
@@ -80,8 +193,11 @@ asmlinkage void __do_softirq(void)
int cpu;
pending = local_softirq_pending();
+ account_system_vtime(current);
+
+ __local_bh_disable((unsigned long)__builtin_return_address(0));
+ trace_softirq_enter();
- local_bh_disable();
cpu = smp_processor_id();
restart:
/* Reset the pending bitmask before enabling irqs */
@@ -109,7 +225,10 @@ restart:
if (pending)
wakeup_softirqd();
- __local_bh_enable();
+ trace_softirq_exit();
+
+ account_system_vtime(current);
+ _local_bh_enable();
}
#ifndef __ARCH_HAS_DO_SOFTIRQ
@@ -136,23 +255,6 @@ EXPORT_SYMBOL(do_softirq);
#endif
-void local_bh_enable(void)
-{
- WARN_ON(irqs_disabled());
- /*
- * Keep preemption disabled until we are done with
- * softirq processing:
- */
- sub_preempt_count(SOFTIRQ_OFFSET - 1);
-
- if (unlikely(!in_interrupt() && local_softirq_pending()))
- do_softirq();
-
- dec_preempt_count();
- preempt_check_resched();
-}
-EXPORT_SYMBOL(local_bh_enable);
-
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
# define invoke_softirq() __do_softirq()
#else
@@ -165,6 +267,7 @@ EXPORT_SYMBOL(local_bh_enable);
void irq_exit(void)
{
account_system_vtime(current);
+ trace_hardirq_exit();
sub_preempt_count(IRQ_EXIT_OFFSET);
if (!in_interrupt() && local_softirq_pending())
invoke_softirq();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index d1b810782bc..bfd6ad9c033 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -9,11 +9,11 @@
* SMP and the DEBUG_SPINLOCK cases. (UP-nondebug inlines them)
*/
-#include <linux/config.h>
#include <linux/linkage.h>
#include <linux/preempt.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
+#include <linux/debug_locks.h>
#include <linux/module.h>
/*
@@ -30,8 +30,10 @@ EXPORT_SYMBOL(generic__raw_read_trylock);
int __lockfunc _spin_trylock(spinlock_t *lock)
{
preempt_disable();
- if (_raw_spin_trylock(lock))
+ if (_raw_spin_trylock(lock)) {
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
return 1;
+ }
preempt_enable();
return 0;
@@ -41,8 +43,10 @@ EXPORT_SYMBOL(_spin_trylock);
int __lockfunc _read_trylock(rwlock_t *lock)
{
preempt_disable();
- if (_raw_read_trylock(lock))
+ if (_raw_read_trylock(lock)) {
+ rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
return 1;
+ }
preempt_enable();
return 0;
@@ -52,19 +56,28 @@ EXPORT_SYMBOL(_read_trylock);
int __lockfunc _write_trylock(rwlock_t *lock)
{
preempt_disable();
- if (_raw_write_trylock(lock))
+ if (_raw_write_trylock(lock)) {
+ rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
return 1;
+ }
preempt_enable();
return 0;
}
EXPORT_SYMBOL(_write_trylock);
-#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP)
+/*
+ * If lockdep is enabled then we use the non-preemption spin-ops
+ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+ */
+#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \
+ defined(CONFIG_PROVE_LOCKING)
void __lockfunc _read_lock(rwlock_t *lock)
{
preempt_disable();
+ rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
_raw_read_lock(lock);
}
EXPORT_SYMBOL(_read_lock);
@@ -75,7 +88,17 @@ unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
local_irq_save(flags);
preempt_disable();
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ /*
+ * On lockdep we dont want the hand-coded irq-enable of
+ * _raw_spin_lock_flags() code, because lockdep assumes
+ * that interrupts are not re-enabled during lock-acquire:
+ */
+#ifdef CONFIG_PROVE_LOCKING
+ _raw_spin_lock(lock);
+#else
_raw_spin_lock_flags(lock, &flags);
+#endif
return flags;
}
EXPORT_SYMBOL(_spin_lock_irqsave);
@@ -84,6 +107,7 @@ void __lockfunc _spin_lock_irq(spinlock_t *lock)
{
local_irq_disable();
preempt_disable();
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_spin_lock(lock);
}
EXPORT_SYMBOL(_spin_lock_irq);
@@ -92,6 +116,7 @@ void __lockfunc _spin_lock_bh(spinlock_t *lock)
{
local_bh_disable();
preempt_disable();
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_spin_lock(lock);
}
EXPORT_SYMBOL(_spin_lock_bh);
@@ -102,6 +127,7 @@ unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
local_irq_save(flags);
preempt_disable();
+ rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
_raw_read_lock(lock);
return flags;
}
@@ -111,6 +137,7 @@ void __lockfunc _read_lock_irq(rwlock_t *lock)
{
local_irq_disable();
preempt_disable();
+ rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
_raw_read_lock(lock);
}
EXPORT_SYMBOL(_read_lock_irq);
@@ -119,6 +146,7 @@ void __lockfunc _read_lock_bh(rwlock_t *lock)
{
local_bh_disable();
preempt_disable();
+ rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
_raw_read_lock(lock);
}
EXPORT_SYMBOL(_read_lock_bh);
@@ -129,6 +157,7 @@ unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
local_irq_save(flags);
preempt_disable();
+ rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_write_lock(lock);
return flags;
}
@@ -138,6 +167,7 @@ void __lockfunc _write_lock_irq(rwlock_t *lock)
{
local_irq_disable();
preempt_disable();
+ rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_write_lock(lock);
}
EXPORT_SYMBOL(_write_lock_irq);
@@ -146,6 +176,7 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
{
local_bh_disable();
preempt_disable();
+ rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_write_lock(lock);
}
EXPORT_SYMBOL(_write_lock_bh);
@@ -153,6 +184,7 @@ EXPORT_SYMBOL(_write_lock_bh);
void __lockfunc _spin_lock(spinlock_t *lock)
{
preempt_disable();
+ spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_spin_lock(lock);
}
@@ -161,6 +193,7 @@ EXPORT_SYMBOL(_spin_lock);
void __lockfunc _write_lock(rwlock_t *lock)
{
preempt_disable();
+ rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
_raw_write_lock(lock);
}
@@ -256,8 +289,22 @@ BUILD_LOCK_OPS(write, rwlock);
#endif /* CONFIG_PREEMPT */
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
+{
+ preempt_disable();
+ spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ _raw_spin_lock(lock);
+}
+
+EXPORT_SYMBOL(_spin_lock_nested);
+
+#endif
+
void __lockfunc _spin_unlock(spinlock_t *lock)
{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
_raw_spin_unlock(lock);
preempt_enable();
}
@@ -265,6 +312,7 @@ EXPORT_SYMBOL(_spin_unlock);
void __lockfunc _write_unlock(rwlock_t *lock)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_write_unlock(lock);
preempt_enable();
}
@@ -272,6 +320,7 @@ EXPORT_SYMBOL(_write_unlock);
void __lockfunc _read_unlock(rwlock_t *lock)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_read_unlock(lock);
preempt_enable();
}
@@ -279,6 +328,7 @@ EXPORT_SYMBOL(_read_unlock);
void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
_raw_spin_unlock(lock);
local_irq_restore(flags);
preempt_enable();
@@ -287,6 +337,7 @@ EXPORT_SYMBOL(_spin_unlock_irqrestore);
void __lockfunc _spin_unlock_irq(spinlock_t *lock)
{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
_raw_spin_unlock(lock);
local_irq_enable();
preempt_enable();
@@ -295,14 +346,16 @@ EXPORT_SYMBOL(_spin_unlock_irq);
void __lockfunc _spin_unlock_bh(spinlock_t *lock)
{
+ spin_release(&lock->dep_map, 1, _RET_IP_);
_raw_spin_unlock(lock);
preempt_enable_no_resched();
- local_bh_enable();
+ local_bh_enable_ip((unsigned long)__builtin_return_address(0));
}
EXPORT_SYMBOL(_spin_unlock_bh);
void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_read_unlock(lock);
local_irq_restore(flags);
preempt_enable();
@@ -311,6 +364,7 @@ EXPORT_SYMBOL(_read_unlock_irqrestore);
void __lockfunc _read_unlock_irq(rwlock_t *lock)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_read_unlock(lock);
local_irq_enable();
preempt_enable();
@@ -319,14 +373,16 @@ EXPORT_SYMBOL(_read_unlock_irq);
void __lockfunc _read_unlock_bh(rwlock_t *lock)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_read_unlock(lock);
preempt_enable_no_resched();
- local_bh_enable();
+ local_bh_enable_ip((unsigned long)__builtin_return_address(0));
}
EXPORT_SYMBOL(_read_unlock_bh);
void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_write_unlock(lock);
local_irq_restore(flags);
preempt_enable();
@@ -335,6 +391,7 @@ EXPORT_SYMBOL(_write_unlock_irqrestore);
void __lockfunc _write_unlock_irq(rwlock_t *lock)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_write_unlock(lock);
local_irq_enable();
preempt_enable();
@@ -343,9 +400,10 @@ EXPORT_SYMBOL(_write_unlock_irq);
void __lockfunc _write_unlock_bh(rwlock_t *lock)
{
+ rwlock_release(&lock->dep_map, 1, _RET_IP_);
_raw_write_unlock(lock);
preempt_enable_no_resched();
- local_bh_enable();
+ local_bh_enable_ip((unsigned long)__builtin_return_address(0));
}
EXPORT_SYMBOL(_write_unlock_bh);
@@ -353,11 +411,13 @@ int __lockfunc _spin_trylock_bh(spinlock_t *lock)
{
local_bh_disable();
preempt_disable();
- if (_raw_spin_trylock(lock))
+ if (_raw_spin_trylock(lock)) {
+ spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
return 1;
+ }
preempt_enable_no_resched();
- local_bh_enable();
+ local_bh_enable_ip((unsigned long)__builtin_return_address(0));
return 0;
}
EXPORT_SYMBOL(_spin_trylock_bh);
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
new file mode 100644
index 00000000000..b71816e47a3
--- /dev/null
+++ b/kernel/stacktrace.c
@@ -0,0 +1,24 @@
+/*
+ * kernel/stacktrace.c
+ *
+ * Stack trace management functions
+ *
+ * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ */
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <linux/stacktrace.h>
+
+void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+ int i, j;
+
+ for (i = 0; i < trace->nr_entries; i++) {
+ unsigned long ip = trace->entries[i];
+
+ for (j = 0; j < spaces + 1; j++)
+ printk(" ");
+ print_ip_sym(ip);
+ }
+}
+
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2c0aacc37c5..dcfb5d73146 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -4,7 +4,6 @@
#include <linux/cpu.h>
#include <linux/err.h>
#include <linux/syscalls.h>
-#include <linux/kthread.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
#include <asm/uaccess.h>
@@ -26,11 +25,13 @@ static unsigned int stopmachine_num_threads;
static atomic_t stopmachine_thread_ack;
static DECLARE_MUTEX(stopmachine_mutex);
-static int stopmachine(void *unused)
+static int stopmachine(void *cpu)
{
int irqs_disabled = 0;
int prepared = 0;
+ set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
+
/* Ack: we are alive */
smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
atomic_inc(&stopmachine_thread_ack);
@@ -84,8 +85,7 @@ static void stopmachine_set_state(enum stopmachine_state state)
static int stop_machine(void)
{
- int ret = 0;
- unsigned int i;
+ int i, ret = 0;
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
/* One high-prio thread per cpu. We'll do this one. */
@@ -96,16 +96,11 @@ static int stop_machine(void)
stopmachine_state = STOPMACHINE_WAIT;
for_each_online_cpu(i) {
- struct task_struct *tsk;
if (i == raw_smp_processor_id())
continue;
- tsk = kthread_create(stopmachine, NULL, "stopmachine");
- if (IS_ERR(tsk)) {
- ret = PTR_ERR(tsk);
+ ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
+ if (ret < 0)
break;
- }
- kthread_bind(tsk, i);
- wake_up_process(tsk);
stopmachine_num_threads++;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 2d5179c67ce..dbb3b9c7ea6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -4,7 +4,6 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/utsname.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 93a2c539864..362a0cc3713 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -18,7 +18,6 @@
* Removed it and replaced it with older style, 03/23/00, Bill Wendling
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/swap.h>
@@ -934,13 +933,15 @@ static ctl_table vm_table[] = {
.extra1 = &zero,
},
{
- .ctl_name = VM_ZONE_RECLAIM_INTERVAL,
- .procname = "zone_reclaim_interval",
- .data = &zone_reclaim_interval,
- .maxlen = sizeof(zone_reclaim_interval),
+ .ctl_name = VM_MIN_UNMAPPED,
+ .procname = "min_unmapped_ratio",
+ .data = &sysctl_min_unmapped_ratio,
+ .maxlen = sizeof(sysctl_min_unmapped_ratio),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- .strategy = &sysctl_jiffies,
+ .proc_handler = &sysctl_min_unmapped_ratio_sysctl_handler,
+ .strategy = &sysctl_intvec,
+ .extra1 = &zero,
+ .extra2 = &one_hundred,
},
#endif
#ifdef CONFIG_X86_32
diff --git a/kernel/timer.c b/kernel/timer.c
index 5a896025306..396a3c024c2 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1208,7 +1208,7 @@ unsigned long wall_jiffies = INITIAL_JIFFIES;
* playing with xtime and avenrun.
*/
#ifndef ARCH_HAVE_XTIME_LOCK
-seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
EXPORT_SYMBOL(xtime_lock);
#endif
@@ -1368,7 +1368,7 @@ asmlinkage long sys_getegid(void)
static void process_timeout(unsigned long __data)
{
- wake_up_process((task_t *)__data);
+ wake_up_process((struct task_struct *)__data);
}
/**
@@ -1559,6 +1559,13 @@ asmlinkage long sys_sysinfo(struct sysinfo __user *info)
return 0;
}
+/*
+ * lockdep: we want to track each per-CPU base as a separate lock-class,
+ * but timer-bases are kmalloc()-ed, so we need to attach separate
+ * keys to them:
+ */
+static struct lock_class_key base_lock_keys[NR_CPUS];
+
static int __devinit init_timers_cpu(int cpu)
{
int j;
@@ -1594,6 +1601,8 @@ static int __devinit init_timers_cpu(int cpu)
}
spin_lock_init(&base->lock);
+ lockdep_set_class(&base->lock, base_lock_keys + cpu);
+
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
INIT_LIST_HEAD(base->tv4.vec + j);
diff --git a/kernel/wait.c b/kernel/wait.c
index 791681cfea9..a1d57aeb7f7 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -3,7 +3,6 @@
*
* (C) 2004 William Irwin, Oracle
*/
-#include <linux/config.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/sched.h>
@@ -11,6 +10,10 @@
#include <linux/wait.h>
#include <linux/hash.h>
+struct lock_class_key waitqueue_lock_key;
+
+EXPORT_SYMBOL(waitqueue_lock_key);
+
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{
unsigned long flags;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42bd89..90d2c600165 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,7 +51,7 @@ struct cpu_workqueue_struct {
wait_queue_head_t work_done;
struct workqueue_struct *wq;
- task_t *thread;
+ struct task_struct *thread;
int run_depth; /* Detect run_workqueue() recursion depth */
} ____cacheline_aligned;