aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit.c160
-rw-r--r--kernel/audit.h10
-rw-r--r--kernel/auditfilter.c289
-rw-r--r--kernel/auditsc.c269
-rw-r--r--kernel/cpuset.c25
-rw-r--r--kernel/exit.c11
-rw-r--r--kernel/extable.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/hrtimer.c10
-rw-r--r--kernel/intermodule.c184
-rw-r--r--kernel/irq/manage.c6
-rw-r--r--kernel/module.c12
-rw-r--r--kernel/posix-cpu-timers.c48
-rw-r--r--kernel/power/main.c4
-rw-r--r--kernel/printk.c28
-rw-r--r--kernel/profile.c2
-rw-r--r--kernel/ptrace.c57
-rw-r--r--kernel/rcupdate.c23
-rw-r--r--kernel/sched.c64
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/timer.c20
-rw-r--r--kernel/workqueue.c2
24 files changed, 726 insertions, 511 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 58908f9d156..f6ef00f4f90 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_SMP) += cpu.o spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
-obj-$(CONFIG_OBSOLETE_INTERMODULE) += intermodule.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
diff --git a/kernel/audit.c b/kernel/audit.c
index c8ccbd09048..df57b493e1c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -55,6 +55,9 @@
#include <net/netlink.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
+#include <linux/selinux.h>
+
+#include "audit.h"
/* No auditing will take place until audit_initialized != 0.
* (Initialization happens after skb_init is called.) */
@@ -227,49 +230,103 @@ void audit_log_lost(const char *message)
}
}
-static int audit_set_rate_limit(int limit, uid_t loginuid)
+static int audit_set_rate_limit(int limit, uid_t loginuid, u32 sid)
{
- int old = audit_rate_limit;
- audit_rate_limit = limit;
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ int old = audit_rate_limit;
+
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ int rc;
+ if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ return rc;
+ else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "audit_rate_limit=%d old=%d by auid=%u subj=%s",
+ limit, old, loginuid, ctx);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
"audit_rate_limit=%d old=%d by auid=%u",
- audit_rate_limit, old, loginuid);
+ limit, old, loginuid);
+ audit_rate_limit = limit;
return old;
}
-static int audit_set_backlog_limit(int limit, uid_t loginuid)
+static int audit_set_backlog_limit(int limit, uid_t loginuid, u32 sid)
{
- int old = audit_backlog_limit;
- audit_backlog_limit = limit;
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ int old = audit_backlog_limit;
+
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ int rc;
+ if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ return rc;
+ else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "audit_backlog_limit=%d old=%d by auid=%u subj=%s",
+ limit, old, loginuid, ctx);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
"audit_backlog_limit=%d old=%d by auid=%u",
- audit_backlog_limit, old, loginuid);
+ limit, old, loginuid);
+ audit_backlog_limit = limit;
return old;
}
-static int audit_set_enabled(int state, uid_t loginuid)
+static int audit_set_enabled(int state, uid_t loginuid, u32 sid)
{
- int old = audit_enabled;
+ int old = audit_enabled;
+
if (state != 0 && state != 1)
return -EINVAL;
- audit_enabled = state;
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ int rc;
+ if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ return rc;
+ else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "audit_enabled=%d old=%d by auid=%u subj=%s",
+ state, old, loginuid, ctx);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
"audit_enabled=%d old=%d by auid=%u",
- audit_enabled, old, loginuid);
+ state, old, loginuid);
+ audit_enabled = state;
return old;
}
-static int audit_set_failure(int state, uid_t loginuid)
+static int audit_set_failure(int state, uid_t loginuid, u32 sid)
{
- int old = audit_failure;
+ int old = audit_failure;
+
if (state != AUDIT_FAIL_SILENT
&& state != AUDIT_FAIL_PRINTK
&& state != AUDIT_FAIL_PANIC)
return -EINVAL;
- audit_failure = state;
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ int rc;
+ if ((rc = selinux_ctxid_to_string(sid, &ctx, &len)))
+ return rc;
+ else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "audit_failure=%d old=%d by auid=%u subj=%s",
+ state, old, loginuid, ctx);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
"audit_failure=%d old=%d by auid=%u",
- audit_failure, old, loginuid);
+ state, old, loginuid);
+ audit_failure = state;
return old;
}
@@ -387,7 +444,7 @@ static int audit_netlink_ok(kernel_cap_t eff_cap, u16 msg_type)
static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- u32 uid, pid, seq;
+ u32 uid, pid, seq, sid;
void *data;
struct audit_status *status_get, status_set;
int err;
@@ -413,6 +470,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
pid = NETLINK_CREDS(skb)->pid;
uid = NETLINK_CREDS(skb)->uid;
loginuid = NETLINK_CB(skb).loginuid;
+ sid = NETLINK_CB(skb).sid;
seq = nlh->nlmsg_seq;
data = NLMSG_DATA(nlh);
@@ -433,25 +491,43 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
status_get = (struct audit_status *)data;
if (status_get->mask & AUDIT_STATUS_ENABLED) {
- err = audit_set_enabled(status_get->enabled, loginuid);
+ err = audit_set_enabled(status_get->enabled,
+ loginuid, sid);
if (err < 0) return err;
}
if (status_get->mask & AUDIT_STATUS_FAILURE) {
- err = audit_set_failure(status_get->failure, loginuid);
+ err = audit_set_failure(status_get->failure,
+ loginuid, sid);
if (err < 0) return err;
}
if (status_get->mask & AUDIT_STATUS_PID) {
int old = audit_pid;
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ int rc;
+ if ((rc = selinux_ctxid_to_string(
+ sid, &ctx, &len)))
+ return rc;
+ else
+ audit_log(NULL, GFP_KERNEL,
+ AUDIT_CONFIG_CHANGE,
+ "audit_pid=%d old=%d by auid=%u subj=%s",
+ status_get->pid, old,
+ loginuid, ctx);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "audit_pid=%d old=%d by auid=%u",
+ status_get->pid, old, loginuid);
audit_pid = status_get->pid;
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "audit_pid=%d old=%d by auid=%u",
- audit_pid, old, loginuid);
}
if (status_get->mask & AUDIT_STATUS_RATE_LIMIT)
- audit_set_rate_limit(status_get->rate_limit, loginuid);
+ audit_set_rate_limit(status_get->rate_limit,
+ loginuid, sid);
if (status_get->mask & AUDIT_STATUS_BACKLOG_LIMIT)
audit_set_backlog_limit(status_get->backlog_limit,
- loginuid);
+ loginuid, sid);
break;
case AUDIT_USER:
case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG:
@@ -465,8 +541,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
if (ab) {
audit_log_format(ab,
- "user pid=%d uid=%u auid=%u msg='%.1024s'",
- pid, uid, loginuid, (char *)data);
+ "user pid=%d uid=%u auid=%u",
+ pid, uid, loginuid);
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(
+ sid, &ctx, &len)) {
+ audit_log_format(ab,
+ " ssid=%u", sid);
+ /* Maybe call audit_panic? */
+ } else
+ audit_log_format(ab,
+ " subj=%s", ctx);
+ kfree(ctx);
+ }
+ audit_log_format(ab, " msg='%.1024s'",
+ (char *)data);
audit_set_pid(ab, pid);
audit_log_end(ab);
}
@@ -480,7 +571,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_LIST:
err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
uid, seq, data, nlmsg_len(nlh),
- loginuid);
+ loginuid, sid);
break;
case AUDIT_ADD_RULE:
case AUDIT_DEL_RULE:
@@ -490,7 +581,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_LIST_RULES:
err = audit_receive_filter(nlh->nlmsg_type, NETLINK_CB(skb).pid,
uid, seq, data, nlmsg_len(nlh),
- loginuid);
+ loginuid, sid);
break;
case AUDIT_SIGNAL_INFO:
sig_data.uid = audit_sig_uid;
@@ -564,6 +655,11 @@ static int __init audit_init(void)
skb_queue_head_init(&audit_skb_queue);
audit_initialized = 1;
audit_enabled = audit_default;
+
+ /* Register the callback with selinux. This callback will be invoked
+ * when a new policy is loaded. */
+ selinux_audit_set_callback(&selinux_audit_rule_update);
+
audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized");
return 0;
}
diff --git a/kernel/audit.h b/kernel/audit.h
index bc5392076e2..6f733920fd3 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -54,9 +54,11 @@ enum audit_state {
/* Rule lists */
struct audit_field {
- u32 type;
- u32 val;
- u32 op;
+ u32 type;
+ u32 val;
+ u32 op;
+ char *se_str;
+ struct selinux_audit_rule *se_rule;
};
struct audit_krule {
@@ -86,3 +88,5 @@ extern void audit_send_reply(int pid, int seq, int type,
extern void audit_log_lost(const char *message);
extern void audit_panic(const char *message);
extern struct mutex audit_netlink_mutex;
+
+extern int selinux_audit_rule_update(void);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d3a8539f3a8..7c134906d68 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -23,6 +23,7 @@
#include <linux/audit.h>
#include <linux/kthread.h>
#include <linux/netlink.h>
+#include <linux/selinux.h>
#include "audit.h"
/* There are three lists of rules -- one to search at task creation
@@ -42,6 +43,13 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
static inline void audit_free_rule(struct audit_entry *e)
{
+ int i;
+ if (e->rule.fields)
+ for (i = 0; i < e->rule.field_count; i++) {
+ struct audit_field *f = &e->rule.fields[i];
+ kfree(f->se_str);
+ selinux_audit_rule_free(f->se_rule);
+ }
kfree(e->rule.fields);
kfree(e);
}
@@ -52,9 +60,29 @@ static inline void audit_free_rule_rcu(struct rcu_head *head)
audit_free_rule(e);
}
+/* Initialize an audit filterlist entry. */
+static inline struct audit_entry *audit_init_entry(u32 field_count)
+{
+ struct audit_entry *entry;
+ struct audit_field *fields;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (unlikely(!entry))
+ return NULL;
+
+ fields = kzalloc(sizeof(*fields) * field_count, GFP_KERNEL);
+ if (unlikely(!fields)) {
+ kfree(entry);
+ return NULL;
+ }
+ entry->rule.fields = fields;
+
+ return entry;
+}
+
/* Unpack a filter field's string representation from user-space
* buffer. */
-static __attribute__((unused)) char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
+static char *audit_unpack_string(void **bufp, size_t *remain, size_t len)
{
char *str;
@@ -84,7 +112,6 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
{
unsigned listnr;
struct audit_entry *entry;
- struct audit_field *fields;
int i, err;
err = -EINVAL;
@@ -108,23 +135,14 @@ static inline struct audit_entry *audit_to_entry_common(struct audit_rule *rule)
goto exit_err;
err = -ENOMEM;
- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
- if (unlikely(!entry))
- goto exit_err;
- fields = kmalloc(sizeof(*fields) * rule->field_count, GFP_KERNEL);
- if (unlikely(!fields)) {
- kfree(entry);
+ entry = audit_init_entry(rule->field_count);
+ if (!entry)
goto exit_err;
- }
-
- memset(&entry->rule, 0, sizeof(struct audit_krule));
- memset(fields, 0, sizeof(struct audit_field));
entry->rule.flags = rule->flags & AUDIT_FILTER_PREPEND;
entry->rule.listnr = listnr;
entry->rule.action = rule->action;
entry->rule.field_count = rule->field_count;
- entry->rule.fields = fields;
for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
entry->rule.mask[i] = rule->mask[i];
@@ -150,15 +168,20 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &entry->rule.fields[i];
- if (rule->fields[i] & AUDIT_UNUSED_BITS) {
- err = -EINVAL;
- goto exit_free;
- }
-
f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
f->val = rule->values[i];
+ if (f->type & AUDIT_UNUSED_BITS ||
+ f->type == AUDIT_SE_USER ||
+ f->type == AUDIT_SE_ROLE ||
+ f->type == AUDIT_SE_TYPE ||
+ f->type == AUDIT_SE_SEN ||
+ f->type == AUDIT_SE_CLR) {
+ err = -EINVAL;
+ goto exit_free;
+ }
+
entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
/* Support for legacy operators where
@@ -188,8 +211,9 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
int err = 0;
struct audit_entry *entry;
void *bufp;
- /* size_t remain = datasz - sizeof(struct audit_rule_data); */
+ size_t remain = datasz - sizeof(struct audit_rule_data);
int i;
+ char *str;
entry = audit_to_entry_common((struct audit_rule *)data);
if (IS_ERR(entry))
@@ -207,10 +231,35 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
f->op = data->fieldflags[i] & AUDIT_OPERATORS;
f->type = data->fields[i];
+ f->val = data->values[i];
+ f->se_str = NULL;
+ f->se_rule = NULL;
switch(f->type) {
- /* call type-specific conversion routines here */
- default:
- f->val = data->values[i];
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ str = audit_unpack_string(&bufp, &remain, f->val);
+ if (IS_ERR(str))
+ goto exit_free;
+ entry->rule.buflen += f->val;
+
+ err = selinux_audit_rule_init(f->type, f->op, str,
+ &f->se_rule);
+ /* Keep currently invalid fields around in case they
+ * become valid after a policy reload. */
+ if (err == -EINVAL) {
+ printk(KERN_WARNING "audit rule for selinux "
+ "\'%s\' is invalid\n", str);
+ err = 0;
+ }
+ if (err) {
+ kfree(str);
+ goto exit_free;
+ } else
+ f->se_str = str;
+ break;
}
}
@@ -286,7 +335,14 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->fields[i] = f->type;
data->fieldflags[i] = f->op;
switch(f->type) {
- /* call type-specific conversion routines here */
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ data->buflen += data->values[i] =
+ audit_pack_string(&bufp, f->se_str);
+ break;
default:
data->values[i] = f->val;
}
@@ -314,7 +370,14 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
return 1;
switch(a->fields[i].type) {
- /* call type-specific comparison routines here */
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ if (strcmp(a->fields[i].se_str, b->fields[i].se_str))
+ return 1;
+ break;
default:
if (a->fields[i].val != b->fields[i].val)
return 1;
@@ -328,6 +391,81 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
return 0;
}
+/* Duplicate selinux field information. The se_rule is opaque, so must be
+ * re-initialized. */
+static inline int audit_dupe_selinux_field(struct audit_field *df,
+ struct audit_field *sf)
+{
+ int ret = 0;
+ char *se_str;
+
+ /* our own copy of se_str */
+ se_str = kstrdup(sf->se_str, GFP_KERNEL);
+ if (unlikely(IS_ERR(se_str)))
+ return -ENOMEM;
+ df->se_str = se_str;
+
+ /* our own (refreshed) copy of se_rule */
+ ret = selinux_audit_rule_init(df->type, df->op, df->se_str,
+ &df->se_rule);
+ /* Keep currently invalid fields around in case they
+ * become valid after a policy reload. */
+ if (ret == -EINVAL) {
+ printk(KERN_WARNING "audit rule for selinux \'%s\' is "
+ "invalid\n", df->se_str);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/* Duplicate an audit rule. This will be a deep copy with the exception
+ * of the watch - that pointer is carried over. The selinux specific fields
+ * will be updated in the copy. The point is to be able to replace the old
+ * rule with the new rule in the filterlist, then free the old rule. */
+static struct audit_entry *audit_dupe_rule(struct audit_krule *old)
+{
+ u32 fcount = old->field_count;
+ struct audit_entry *entry;
+ struct audit_krule *new;
+ int i, err = 0;
+
+ entry = audit_init_entry(fcount);
+ if (unlikely(!entry))
+ return ERR_PTR(-ENOMEM);
+
+ new = &entry->rule;
+ new->vers_ops = old->vers_ops;
+ new->flags = old->flags;
+ new->listnr = old->listnr;
+ new->action = old->action;
+ for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
+ new->mask[i] = old->mask[i];
+ new->buflen = old->buflen;
+ new->field_count = old->field_count;
+ memcpy(new->fields, old->fields, sizeof(struct audit_field) * fcount);
+
+ /* deep copy this information, updating the se_rule fields, because
+ * the originals will all be freed when the old rule is freed. */
+ for (i = 0; i < fcount; i++) {
+ switch (new->fields[i].type) {
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ err = audit_dupe_selinux_field(&new->fields[i],
+ &old->fields[i]);
+ }
+ if (err) {
+ audit_free_rule(entry);
+ return ERR_PTR(err);
+ }
+ }
+
+ return entry;
+}
+
/* Add rule to given filterlist if not a duplicate. Protected by
* audit_netlink_mutex. */
static inline int audit_add_rule(struct audit_entry *entry,
@@ -448,9 +586,10 @@ static int audit_list_rules(void *_dest)
* @data: payload data
* @datasz: size of payload data
* @loginuid: loginuid of sender
+ * @sid: SE Linux Security ID of sender
*/
int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
- size_t datasz, uid_t loginuid)
+ size_t datasz, uid_t loginuid, u32 sid)
{
struct task_struct *tsk;
int *dest;
@@ -493,9 +632,23 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
err = audit_add_rule(entry,
&audit_filter_list[entry->rule.listnr]);
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u add rule to list=%d res=%d\n",
- loginuid, entry->rule.listnr, !err);
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(sid, &ctx, &len)) {
+ /* Maybe call audit_panic? */
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "auid=%u ssid=%u add rule to list=%d res=%d",
+ loginuid, sid, entry->rule.listnr, !err);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "auid=%u subj=%s add rule to list=%d res=%d",
+ loginuid, ctx, entry->rule.listnr, !err);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "auid=%u add rule to list=%d res=%d",
+ loginuid, entry->rule.listnr, !err);
if (err)
audit_free_rule(entry);
@@ -511,9 +664,24 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
err = audit_del_rule(entry,
&audit_filter_list[entry->rule.listnr]);
- audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
- "auid=%u remove rule from list=%d res=%d\n",
- loginuid, entry->rule.listnr, !err);
+
+ if (sid) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(sid, &ctx, &len)) {
+ /* Maybe call audit_panic? */
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "auid=%u ssid=%u remove rule from list=%d res=%d",
+ loginuid, sid, entry->rule.listnr, !err);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "auid=%u subj=%s remove rule from list=%d res=%d",
+ loginuid, ctx, entry->rule.listnr, !err);
+ kfree(ctx);
+ } else
+ audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE,
+ "auid=%u remove rule from list=%d res=%d",
+ loginuid, entry->rule.listnr, !err);
audit_free_rule(entry);
break;
@@ -628,3 +796,62 @@ unlock_and_return:
rcu_read_unlock();
return result;
}
+
+/* Check to see if the rule contains any selinux fields. Returns 1 if there
+ are selinux fields specified in the rule, 0 otherwise. */
+static inline int audit_rule_has_selinux(struct audit_krule *rule)
+{
+ int i;
+
+ for (i = 0; i < rule->field_count; i++) {
+ struct audit_field *f = &rule->fields[i];
+ switch (f->type) {
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/* This function will re-initialize the se_rule field of all applicable rules.
+ * It will traverse the filter lists serarching for rules that contain selinux
+ * specific filter fields. When such a rule is found, it is copied, the
+ * selinux field is re-initialized, and the old rule is replaced with the
+ * updated rule. */
+int selinux_audit_rule_update(void)
+{
+ struct audit_entry *entry, *n, *nentry;
+ int i, err = 0;
+
+ /* audit_netlink_mutex synchronizes the writers */
+ mutex_lock(&audit_netlink_mutex);
+
+ for (i = 0; i < AUDIT_NR_FILTERS; i++) {
+ list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
+ if (!audit_rule_has_selinux(&entry->rule))
+ continue;
+
+ nentry = audit_dupe_rule(&entry->rule);
+ if (unlikely(IS_ERR(nentry))) {
+ /* save the first error encountered for the
+ * return value */
+ if (!err)
+ err = PTR_ERR(nentry);
+ audit_panic("error updating selinux filters");
+ list_del_rcu(&entry->list);
+ } else {
+ list_replace_rcu(&entry->list, &nentry->list);
+ }
+ call_rcu(&entry->rcu, audit_free_rule_rcu);
+ }
+ }
+
+ mutex_unlock(&audit_netlink_mutex);
+
+ return err;
+}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7f160df21a2..1c03a4ed1b2 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -58,6 +58,7 @@
#include <linux/security.h>
#include <linux/list.h>
#include <linux/tty.h>
+#include <linux/selinux.h>
#include "audit.h"
@@ -89,7 +90,7 @@ struct audit_names {
uid_t uid;
gid_t gid;
dev_t rdev;
- char *ctx;
+ u32 osid;
};
struct audit_aux_data {
@@ -106,7 +107,7 @@ struct audit_aux_data_ipcctl {
uid_t uid;
gid_t gid;
mode_t mode;
- char *ctx;
+ u32 osid;
};
struct audit_aux_data_socketcall {
@@ -167,7 +168,8 @@ static int audit_filter_rules(struct task_struct *tsk,
struct audit_context *ctx,
enum audit_state *state)
{
- int i, j;
+ int i, j, need_sid = 1;
+ u32 sid;
for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &rule->fields[i];
@@ -257,6 +259,27 @@ static int audit_filter_rules(struct task_struct *tsk,
if (ctx)
result = audit_comparator(ctx->loginuid, f->op, f->val);
break;
+ case AUDIT_SE_USER:
+ case AUDIT_SE_ROLE:
+ case AUDIT_SE_TYPE:
+ case AUDIT_SE_SEN:
+ case AUDIT_SE_CLR:
+ /* NOTE: this may return negative values indicating
+ a temporary error. We simply treat this as a
+ match for now to avoid losing information that
+ may be wanted. An error message will also be
+ logged upon error */
+ if (f->se_rule) {
+ if (need_sid) {
+ selinux_task_ctxid(tsk, &sid);
+ need_sid = 0;
+ }
+ result = selinux_audit_rule_match(sid, f->type,
+ f->op,
+ f->se_rule,
+ ctx);
+ }
+ break;
case AUDIT_ARG0:
case AUDIT_ARG1:
case AUDIT_ARG2:
@@ -329,7 +352,6 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
return AUDIT_BUILD_CONTEXT;
}
-/* This should be called with task_lock() held. */
static inline struct audit_context *audit_get_context(struct task_struct *tsk,
int return_valid,
int return_code)
@@ -391,9 +413,6 @@ static inline void audit_free_names(struct audit_context *context)
#endif
for (i = 0; i < context->name_count; i++) {
- char *p = context->names[i].ctx;
- context->names[i].ctx = NULL;
- kfree(p);
if (context->names[i].name)
__putname(context->names[i].name);
}
@@ -416,11 +435,6 @@ static inline void audit_free_aux(struct audit_context *context)
dput(axi->dentry);
mntput(axi->mnt);
}
- if ( aux->type == AUDIT_IPC ) {
- struct audit_aux_data_ipcctl *axi = (void *)aux;
- if (axi->ctx)
- kfree(axi->ctx);
- }
context->aux = aux->next;
kfree(aux);
@@ -506,7 +520,7 @@ static inline void audit_free_context(struct audit_context *context)
printk(KERN_ERR "audit: freed %d contexts\n", count);
}
-static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask)
+static void audit_log_task_context(struct audit_buffer *ab)
{
char *ctx = NULL;
ssize_t len = 0;
@@ -518,7 +532,7 @@ static void audit_log_task_context(struct audit_buffer *ab, gfp_t gfp_mask)
return;
}
- ctx = kmalloc(len, gfp_mask);
+ ctx = kmalloc(len, GFP_KERNEL);
if (!ctx)
goto error_path;
@@ -536,47 +550,46 @@ error_path:
return;
}
-static void audit_log_task_info(struct audit_buffer *ab, gfp_t gfp_mask)
+static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk)
{
- char name[sizeof(current->comm)];
- struct mm_struct *mm = current->mm;
+ char name[sizeof(tsk->comm)];
+ struct mm_struct *mm = tsk->mm;
struct vm_area_struct *vma;
- get_task_comm(name, current);
+ /* tsk == current */
+
+ get_task_comm(name, tsk);
audit_log_format(ab, " comm=");
audit_log_untrustedstring(ab, name);
- if (!mm)
- return;
-
- /*
- * this is brittle; all callers that pass GFP_ATOMIC will have
- * NULL current->mm and we won't get here.
- */
- down_read(&mm->mmap_sem);
- vma = mm->mmap;
- while (vma) {
- if ((vma->vm_flags & VM_EXECUTABLE) &&
- vma->vm_file) {
- audit_log_d_path(ab, "exe=",
- vma->vm_file->f_dentry,
- vma->vm_file->f_vfsmnt);
- break;
+ if (mm) {
+ down_read(&mm->mmap_sem);
+ vma = mm->mmap;
+ while (vma) {
+ if ((vma->vm_flags & VM_EXECUTABLE) &&
+ vma->vm_file) {
+ audit_log_d_path(ab, "exe=",
+ vma->vm_file->f_dentry,
+ vma->vm_file->f_vfsmnt);
+ break;
+ }
+ vma = vma->vm_next;
}
- vma = vma->vm_next;
+ up_read(&mm->mmap_sem);
}
- up_read(&mm->mmap_sem);
- audit_log_task_context(ab, gfp_mask);
+ audit_log_task_context(ab);
}
-static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
+static void audit_log_exit(struct audit_context *context, struct task_struct *tsk)
{
- int i;
+ int i, call_panic = 0;
struct audit_buffer *ab;
struct audit_aux_data *aux;
const char *tty;
- ab = audit_log_start(context, gfp_mask, AUDIT_SYSCALL);
+ /* tsk == current */
+
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
if (!ab)
return; /* audit_panic has been called */
audit_log_format(ab, "arch=%x syscall=%d",
@@ -587,8 +600,8 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
audit_log_format(ab, " success=%s exit=%ld",
(context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
context->return_code);
- if (current->signal->tty && current->signal->tty->name)
- tty = current->signal->tty->name;
+ if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
+ tty = tsk->signal->tty->name;
else
tty = "(none)";
audit_log_format(ab,
@@ -607,12 +620,12 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
context->gid,
context->euid, context->suid, context->fsuid,
context->egid, context->sgid, context->fsgid, tty);
- audit_log_task_info(ab, gfp_mask);
+ audit_log_task_info(ab, tsk);
audit_log_end(ab);
for (aux = context->aux; aux; aux = aux->next) {
- ab = audit_log_start(context, gfp_mask, aux->type);
+ ab = audit_log_start(context, GFP_KERNEL, aux->type);
if (!ab)
continue; /* audit_panic has been called */
@@ -620,8 +633,39 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
case AUDIT_IPC: {
struct audit_aux_data_ipcctl *axi = (void *)aux;
audit_log_format(ab,
- " qbytes=%lx iuid=%u igid=%u mode=%x obj=%s",
- axi->qbytes, axi->uid, axi->gid, axi->mode, axi->ctx);
+ " qbytes=%lx iuid=%u igid=%u mode=%x",
+ axi->qbytes, axi->uid, axi->gid, axi->mode);
+ if (axi->osid != 0) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(
+ axi->osid, &ctx, &len)) {
+ audit_log_format(ab, " osid=%u",
+ axi->osid);
+ call_panic = 1;
+ } else
+ audit_log_format(ab, " obj=%s", ctx);
+ kfree(ctx);
+ }
+ break; }
+
+ case AUDIT_IPC_SET_PERM: {
+ struct audit_aux_data_ipcctl *axi = (void *)aux;
+ audit_log_format(ab,
+ " new qbytes=%lx new iuid=%u new igid=%u new mode=%x",
+ axi->qbytes, axi->uid, axi->gid, axi->mode);
+ if (axi->osid != 0) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(
+ axi->osid, &ctx, &len)) {
+ audit_log_format(ab, " osid=%u",
+ axi->osid);
+ call_panic = 1;
+ } else
+ audit_log_format(ab, " obj=%s", ctx);
+ kfree(ctx);
+ }
break; }
case AUDIT_SOCKETCALL: {
@@ -649,7 +693,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
}
if (context->pwd && context->pwdmnt) {
- ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
if (ab) {
audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
audit_log_end(ab);
@@ -659,7 +703,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
unsigned long ino = context->names[i].ino;
unsigned long pino = context->names[i].pino;
- ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
if (!ab)
continue; /* audit_panic has been called */
@@ -685,32 +729,35 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
context->names[i].gid,
MAJOR(context->names[i].rdev),
MINOR(context->names[i].rdev));
- if (context->names[i].ctx) {
- audit_log_format(ab, " obj=%s",
- context->names[i].ctx);
+ if (context->names[i].osid != 0) {
+ char *ctx = NULL;
+ u32 len;
+ if (selinux_ctxid_to_string(
+ context->names[i].osid, &ctx, &len)) {
+ audit_log_format(ab, " osid=%u",
+ context->names[i].osid);
+ call_panic = 2;
+ } else
+ audit_log_format(ab, " obj=%s", ctx);
+ kfree(ctx);
}
audit_log_end(ab);
}
+ if (call_panic)
+ audit_panic("error converting sid to string");
}
/**
* audit_free - free a per-task audit context
* @tsk: task whose audit context block to free
*
- * Called from copy_process and __put_task_struct.
+ * Called from copy_process and do_exit
*/
void audit_free(struct task_struct *tsk)
{
struct audit_context *context;
- /*
- * No need to lock the task - when we execute audit_free()
- * then the task has no external references anymore, and
- * we are tearing it down. (The locking also confuses
- * DEBUG_LOCKDEP - this freeing may occur in softirq
- * contexts as well, via RCU.)
- */
context = audit_get_context(tsk, 0, 0);
if (likely(!context))
return;
@@ -719,8 +766,9 @@ void audit_free(struct task_struct *tsk)
* function (e.g., exit_group), then free context block.
* We use GFP_ATOMIC here because we might be doing this
* in the context of the idle thread */
+ /* that can happen only if we are called from do_exit() */
if (context->in_syscall && context->auditable)
- audit_log_exit(context, GFP_ATOMIC);
+ audit_log_exit(context, tsk);
audit_free_context(context);
}
@@ -743,10 +791,11 @@ void audit_free(struct task_struct *tsk)
* will only be written if another part of the kernel requests that it
* be written).
*/
-void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
+void audit_syscall_entry(int arch, int major,
unsigned long a1, unsigned long a2,
unsigned long a3, unsigned long a4)
{
+ struct task_struct *tsk = current;
struct audit_context *context = tsk->audit_context;
enum audit_state state;
@@ -824,22 +873,18 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major,
* message), then write out the syscall information. In call cases,
* free the names stored from getname().
*/
-void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
+void audit_syscall_exit(int valid, long return_code)
{
+ struct task_struct *tsk = current;
struct audit_context *context;
- get_task_struct(tsk);
- task_lock(tsk);
context = audit_get_context(tsk, valid, return_code);
- task_unlock(tsk);
- /* Not having a context here is ok, since the parent may have
- * called __put_task_struct. */
if (likely(!context))
- goto out;
+ return;
if (context->in_syscall && context->auditable)
- audit_log_exit(context, GFP_KERNEL);
+ audit_log_exit(context, tsk);
context->in_syscall = 0;
context->auditable = 0;
@@ -854,8 +899,6 @@ void audit_syscall_exit(struct task_struct *tsk, int valid, long return_code)
audit_free_aux(context);
tsk->audit_context = context;
}
- out:
- put_task_struct(tsk);
}
/**
@@ -936,40 +979,11 @@ void audit_putname(const char *name)
#endif
}
-void audit_inode_context(int idx, const struct inode *inode)
+static void audit_inode_context(int idx, const struct inode *inode)
{
struct audit_context *context = current->audit_context;
- const char *suffix = security_inode_xattr_getsuffix();
- char *ctx = NULL;
- int len = 0;
-
- if (!suffix)
- goto ret;
-
- len = security_inode_getsecurity(inode, suffix, NULL, 0, 0);
- if (len == -EOPNOTSUPP)
- goto ret;
- if (len < 0)
- goto error_path;
-
- ctx = kmalloc(len, GFP_KERNEL);
- if (!ctx)
- goto error_path;
- len = security_inode_getsecurity(inode, suffix, ctx, len, 0);
- if (len < 0)
- goto error_path;
-
- kfree(context->names[idx].ctx);
- context->names[idx].ctx = ctx;
- goto ret;
-
-error_path:
- if (ctx)
- kfree(ctx);
- audit_panic("error in audit_inode_context");
-ret:
- return;
+ selinux_get_inode_sid(inode, &context->names[idx].osid);
}
@@ -1155,40 +1169,37 @@ uid_t audit_get_loginuid(struct audit_context *ctx)
return ctx ? ctx->loginuid : -1;
}
-static char *audit_ipc_context(struct kern_ipc_perm *ipcp)
+/**
+ * audit_ipc_obj - record audit data for ipc object
+ * @ipcp: ipc permissions
+ *
+ * Returns 0 for success or NULL context or < 0 on error.
+ */
+int audit_ipc_obj(struct kern_ipc_perm *ipcp)
{
+ struct audit_aux_data_ipcctl *ax;
struct audit_context *context = current->audit_context;
- char *ctx = NULL;
- int len = 0;
if (likely(!context))
- return NULL;
-
- len = security_ipc_getsecurity(ipcp, NULL, 0);
- if (len == -EOPNOTSUPP)
- goto ret;
- if (len < 0)
- goto error_path;
-
- ctx = kmalloc(len, GFP_ATOMIC);
- if (!ctx)
- goto error_path;
+ return 0;
- len = security_ipc_getsecurity(ipcp, ctx, len);
- if (len < 0)
- goto error_path;
+ ax = kmalloc(sizeof(*ax), GFP_ATOMIC);
+ if (!ax)
+ return -ENOMEM;
- return ctx;
+ ax->uid = ipcp->uid;
+ ax->gid = ipcp->gid;
+ ax->mode = ipcp->mode;
+ selinux_get_ipc_sid(ipcp, &ax->osid);
-error_path:
- kfree(ctx);
- audit_panic("error in audit_ipc_context");
-ret:
- return NULL;
+ ax->d.type = AUDIT_IPC;
+ ax->d.next = context->aux;
+ context->aux = (void *)ax;
+ return 0;
}
/**
- * audit_ipc_perms - record audit data for ipc
+ * audit_ipc_set_perm - record audit data for new ipc permissions
* @qbytes: msgq bytes
* @uid: msgq user id
* @gid: msgq group id
@@ -1196,7 +1207,7 @@ ret:
*
* Returns 0 for success or NULL context or < 0 on error.
*/
-int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
+int audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, struct kern_ipc_perm *ipcp)
{
struct audit_aux_data_ipcctl *ax;
struct audit_context *context = current->audit_context;
@@ -1212,9 +1223,9 @@ int audit_ipc_perms(unsigned long qbytes, uid_t uid, gid_t gid, mode_t mode, str
ax->uid = uid;
ax->gid = gid;
ax->mode = mode;
- ax->ctx = audit_ipc_context(ipcp);
+ selinux_get_ipc_sid(ipcp, &ax->osid);
- ax->d.type = AUDIT_IPC;
+ ax->d.type = AUDIT_IPC_SET_PERM;
ax->d.next = context->aux;
context->aux = (void *)ax;
return 0;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 72248d1b9e3..ab81fdd4572 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2231,19 +2231,25 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
* So only GFP_KERNEL allocations, if all nodes in the cpuset are
* short of memory, might require taking the callback_mutex mutex.
*
- * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
- * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
- * hardwall cpusets - no allocation on a node outside the cpuset is
- * allowed (unless in interrupt, of course).
- *
- * The second loop doesn't even call here for GFP_ATOMIC requests
- * (if the __alloc_pages() local variable 'wait' is set). That check
- * and the checks below have the combined affect in the second loop of
- * the __alloc_pages() routine that:
+ * The first call here from mm/page_alloc:get_page_from_freelist()
+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
+ * no allocation on a node outside the cpuset is allowed (unless in
+ * interrupt, of course).
+ *
+ * The second pass through get_page_from_freelist() doesn't even call
+ * here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
+ * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
+ * in alloc_flags. That logic and the checks below have the combined
+ * affect that:
* in_interrupt - any node ok (current task context irrelevant)
* GFP_ATOMIC - any node ok
* GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
* GFP_USER - only nodes in current tasks mems allowed ok.
+ *
+ * Rule:
+ * Don't call cpuset_zone_allowed() if you can't sleep, unless you
+ * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
+ * the code that might scan up ancestor cpusets and sleep.
**/
int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
@@ -2255,6 +2261,7 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
if (in_interrupt())
return 1;
node = z->zone_pgdat->node_id;
+ might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
if (node_isset(node, current->mems_allowed))
return 1;
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
diff --git a/kernel/exit.c b/kernel/exit.c
index f86434d7b3d..e06d0c10a24 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -35,6 +35,7 @@
#include <linux/futex.h>
#include <linux/compat.h>
#include <linux/pipe_fs_i.h>
+#include <linux/audit.h> /* for audit_free() */
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -880,14 +881,6 @@ fastcall NORET_TYPE void do_exit(long code)
tsk->flags |= PF_EXITING;
- /*
- * Make sure we don't try to process any timer firings
- * while we are already exiting.
- */
- tsk->it_virt_expires = cputime_zero;
- tsk->it_prof_expires = cputime_zero;
- tsk->it_sched_expires = 0;
-
if (unlikely(in_atomic()))
printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
current->comm, current->pid,
@@ -910,6 +903,8 @@ fastcall NORET_TYPE void do_exit(long code)
if (unlikely(tsk->compat_robust_list))
compat_exit_robust_list(tsk);
#endif
+ if (unlikely(tsk->audit_context))
+ audit_free(tsk);
exit_mm(tsk);
exit_sem(tsk);
diff --git a/kernel/extable.c b/kernel/extable.c
index 7501b531cee..7fe26285531 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -40,7 +40,7 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
return e;
}
-static int core_kernel_text(unsigned long addr)
+int core_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_stext &&
addr <= (unsigned long)_etext)
diff --git a/kernel/fork.c b/kernel/fork.c
index d2fa57d480d..ac8100e3088 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -114,8 +114,6 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(atomic_read(&tsk->usage));
WARN_ON(tsk == current);
- if (unlikely(tsk->audit_context))
- audit_free(tsk);
security_task_free(tsk);
free_uid(tsk->user);
put_group_info(tsk->group_info);
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 04ab27ddfd9..18324305724 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -456,6 +456,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
return ret;
}
+EXPORT_SYMBOL_GPL(hrtimer_start);
/**
* hrtimer_try_to_cancel - try to deactivate a timer
@@ -484,6 +485,7 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
return ret;
}
+EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
/**
* hrtimer_cancel - cancel a timer and wait for the handler to finish.
@@ -504,6 +506,7 @@ int hrtimer_cancel(struct hrtimer *timer)
cpu_relax();
}
}
+EXPORT_SYMBOL_GPL(hrtimer_cancel);
/**
* hrtimer_get_remaining - get remaining time for the timer
@@ -522,6 +525,7 @@ ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
return rem;
}
+EXPORT_SYMBOL_GPL(hrtimer_get_remaining);
#ifdef CONFIG_NO_IDLE_HZ
/**
@@ -580,6 +584,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
timer->base = &bases[clock_id];
rb_set_parent(&timer->node, &timer->node);
}
+EXPORT_SYMBOL_GPL(hrtimer_init);
/**
* hrtimer_get_res - get the timer resolution for a clock
@@ -599,6 +604,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
return 0;
}
+EXPORT_SYMBOL_GPL(hrtimer_get_res);
/*
* Expire the per base hrtimer-queue:
@@ -836,7 +842,7 @@ static void migrate_hrtimers(int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
+static int hrtimer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -860,7 +866,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __devinitdata hrtimers_nb = {
+static struct notifier_block hrtimers_nb = {
.notifier_call = hrtimer_cpu_notify,
};
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
deleted file mode 100644
index 55b1e5b85db..00000000000
--- a/kernel/intermodule.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/* Deprecated, do not use. Moved from module.c to here. --RR */
-
-/* Written by Keith Owens <kaos@ocs.com.au> Oct 2000 */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-
-/* inter_module functions are always available, even when the kernel is
- * compiled without modules. Consumers of inter_module_xxx routines
- * will always work, even when both are built into the kernel, this
- * approach removes lots of #ifdefs in mainline code.
- */
-
-static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
-static DEFINE_SPINLOCK(ime_lock);
-static int kmalloc_failed;
-
-struct inter_module_entry {
- struct list_head list;
- const char *im_name;
- struct module *owner;
- const void *userdata;
-};
-
-/**
- * inter_module_register - register a new set of inter module data.
- * @im_name: an arbitrary string to identify the data, must be unique
- * @owner: module that is registering the data, always use THIS_MODULE
- * @userdata: pointer to arbitrary userdata to be registered
- *
- * Description: Check that the im_name has not already been registered,
- * complain if it has. For new data, add it to the inter_module_entry
- * list.
- */
-void inter_module_register(const char *im_name, struct module *owner, const void *userdata)
-{
- struct list_head *tmp;
- struct inter_module_entry *ime, *ime_new;
-
- if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) {
- /* Overloaded kernel, not fatal */
- printk(KERN_ERR
- "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
- im_name);
- kmalloc_failed = 1;
- return;
- }
- ime_new->im_name = im_name;
- ime_new->owner = owner;
- ime_new->userdata = userdata;
-
- spin_lock(&ime_lock);
- list_for_each(tmp, &ime_list) {
- ime = list_entry(tmp, struct inter_module_entry, list);
- if (strcmp(ime->im_name, im_name) == 0) {
- spin_unlock(&ime_lock);
- kfree(ime_new);
- /* Program logic error, fatal */
- printk(KERN_ERR "inter_module_register: duplicate im_name '%s'", im_name);
- BUG();
- }
- }
- list_add(&(ime_new->list), &ime_list);
- spin_unlock(&ime_lock);
-}
-
-/**
- * inter_module_unregister - unregister a set of inter module data.
- * @im_name: an arbitrary string to identify the data, must be unique
- *
- * Description: Check that the im_name has been registered, complain if
- * it has not. For existing data, remove it from the
- * inter_module_entry list.
- */
-void inter_module_unregister(const char *im_name)
-{
- struct list_head *tmp;
- struct inter_module_entry *ime;
-
- spin_lock(&ime_lock);
- list_for_each(tmp, &ime_list) {
- ime = list_entry(tmp, struct inter_module_entry, list);
- if (strcmp(ime->im_name, im_name) == 0) {
- list_del(&(ime->list));
- spin_unlock(&ime_lock);
- kfree(ime);
- return;
- }
- }
- spin_unlock(&ime_lock);
- if (kmalloc_failed) {
- printk(KERN_ERR
- "inter_module_unregister: no entry for '%s', "
- "probably caused by previous kmalloc failure\n",
- im_name);
- return;
- }
- else {
- /* Program logic error, fatal */
- printk(KERN_ERR "inter_module_unregister: no entry for '%s'", im_name);
- BUG();
- }
-}
-
-/**
- * inter_module_get - return arbitrary userdata from another module.
- * @im_name: an arbitrary string to identify the data, must be unique
- *
- * Description: If the im_name has not been registered, return NULL.
- * Try to increment the use count on the owning module, if that fails
- * then return NULL. Otherwise return the userdata.
- */
-static const void *inter_module_get(const char *im_name)
-{
- struct list_head *tmp;
- struct inter_module_entry *ime;
- const void *result = NULL;
-
- spin_lock(&ime_lock);
- list_for_each(tmp, &ime_list) {
- ime = list_entry(tmp, struct inter_module_entry, list);
- if (strcmp(ime->im_name, im_name) == 0) {
- if (try_module_get(ime->owner))
- result = ime->userdata;
- break;
- }
- }
- spin_unlock(&ime_lock);
- return(result);
-}
-
-/**
- * inter_module_get_request - im get with automatic request_module.
- * @im_name: an arbitrary string to identify the data, must be unique
- * @modname: module that is expected to register im_name
- *
- * Description: If inter_module_get fails, do request_module then retry.
- */
-const void *inter_module_get_request(const char *im_name, const char *modname)
-{
- const void *result = inter_module_get(im_name);
- if (!result) {
- request_module("%s", modname);
- result = inter_module_get(im_name);
- }
- return(result);
-}
-
-/**
- * inter_module_put - release use of data from another module.
- * @im_name: an arbitrary string to identify the data, must be unique
- *
- * Description: If the im_name has not been registered, complain,
- * otherwise decrement the use count on the owning module.
- */
-void inter_module_put(const char *im_name)
-{
- struct list_head *tmp;
- struct inter_module_entry *ime;
-
- spin_lock(&ime_lock);
- list_for_each(tmp, &ime_list) {
- ime = list_entry(tmp, struct inter_module_entry, list);
- if (strcmp(ime->im_name, im_name) == 0) {
- if (ime->owner)
- module_put(ime->owner);
- spin_unlock(&ime_lock);
- return;
- }
- }
- spin_unlock(&ime_lock);
- printk(KERN_ERR "inter_module_put: no entry for '%s'", im_name);
- BUG();
-}
-
-EXPORT_SYMBOL(inter_module_register);
-EXPORT_SYMBOL(inter_module_unregister);
-EXPORT_SYMBOL(inter_module_get_request);
-EXPORT_SYMBOL(inter_module_put);
-
-MODULE_LICENSE("GPL");
-
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ac766ad573e..1279e349953 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -246,8 +246,10 @@ int setup_irq(unsigned int irq, struct irqaction * new)
mismatch:
spin_unlock_irqrestore(&desc->lock, flags);
- printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
- dump_stack();
+ if (!(new->flags & SA_PROBEIRQ)) {
+ printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
+ dump_stack();
+ }
return -EBUSY;
}
diff --git a/kernel/module.c b/kernel/module.c
index d24deb0dbbc..bbe04862e1b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -705,14 +705,14 @@ EXPORT_SYMBOL(__symbol_put);
void symbol_put_addr(void *addr)
{
- unsigned long flags;
+ struct module *modaddr;
- spin_lock_irqsave(&modlist_lock, flags);
- if (!kernel_text_address((unsigned long)addr))
- BUG();
+ if (core_kernel_text((unsigned long)addr))
+ return;
- module_put(module_text_address((unsigned long)addr));
- spin_unlock_irqrestore(&modlist_lock, flags);
+ if (!(modaddr = module_text_address((unsigned long)addr)))
+ BUG();
+ module_put(modaddr);
}
EXPORT_SYMBOL_GPL(symbol_put_addr);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 520f6c59948..d38d9ec3276 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -555,9 +555,6 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
struct cpu_timer_list *next;
unsigned long i;
- if (CPUCLOCK_PERTHREAD(timer->it_clock) && (p->flags & PF_EXITING))
- return;
-
head = (CPUCLOCK_PERTHREAD(timer->it_clock) ?
p->cpu_timers : p->signal->cpu_timers);
head += CPUCLOCK_WHICH(timer->it_clock);
@@ -1173,6 +1170,9 @@ static void check_process_timers(struct task_struct *tsk,
}
t = tsk;
do {
+ if (unlikely(t->flags & PF_EXITING))
+ continue;
+
ticks = cputime_add(cputime_add(t->utime, t->stime),
prof_left);
if (!cputime_eq(prof_expires, cputime_zero) &&
@@ -1193,11 +1193,7 @@ static void check_process_timers(struct task_struct *tsk,
t->it_sched_expires > sched)) {
t->it_sched_expires = sched;
}
-
- do {
- t = next_thread(t);
- } while (unlikely(t->flags & PF_EXITING));
- } while (t != tsk);
+ } while ((t = next_thread(t)) != tsk);
}
}
@@ -1289,30 +1285,30 @@ void run_posix_cpu_timers(struct task_struct *tsk)
#undef UNEXPIRED
- BUG_ON(tsk->exit_state);
-
/*
* Double-check with locks held.
*/
read_lock(&tasklist_lock);
- spin_lock(&tsk->sighand->siglock);
+ if (likely(tsk->signal != NULL)) {
+ spin_lock(&tsk->sighand->siglock);
- /*
- * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
- * all the timers that are firing, and put them on the firing list.
- */
- check_thread_timers(tsk, &firing);
- check_process_timers(tsk, &firing);
+ /*
+ * Here we take off tsk->cpu_timers[N] and tsk->signal->cpu_timers[N]
+ * all the timers that are firing, and put them on the firing list.
+ */
+ check_thread_timers(tsk, &firing);
+ check_process_timers(tsk, &firing);
- /*
- * We must release these locks before taking any timer's lock.
- * There is a potential race with timer deletion here, as the
- * siglock now protects our private firing list. We have set
- * the firing flag in each timer, so that a deletion attempt
- * that gets the timer lock before we do will give it up and
- * spin until we've taken care of that timer below.
- */
- spin_unlock(&tsk->sighand->siglock);
+ /*
+ * We must release these locks before taking any timer's lock.
+ * There is a potential race with timer deletion here, as the
+ * siglock now protects our private firing list. We have set
+ * the firing flag in each timer, so that a deletion attempt
+ * that gets the timer lock before we do will give it up and
+ * spin until we've taken care of that timer below.
+ */
+ spin_unlock(&tsk->sighand->siglock);
+ }
read_unlock(&tasklist_lock);
/*
diff --git a/kernel/power/main.c b/kernel/power/main.c
index ee371f50cca..0a907f0dc56 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -86,6 +86,7 @@ static int suspend_prepare(suspend_state_t state)
goto Thaw;
}
+ suspend_console();
if ((error = device_suspend(PMSG_SUSPEND))) {
printk(KERN_ERR "Some devices failed to suspend\n");
goto Finish;
@@ -133,6 +134,7 @@ int suspend_enter(suspend_state_t state)
static void suspend_finish(suspend_state_t state)
{
device_resume();
+ resume_console();
thaw_processes();
enable_nonboot_cpus();
if (pm_ops && pm_ops->finish)
@@ -272,7 +274,7 @@ static ssize_t state_store(struct subsystem * subsys, const char * buf, size_t n
if (*s && !strncmp(buf, *s, len))
break;
}
- if (*s)
+ if (state < PM_SUSPEND_MAX && *s)
error = enter_state(state);
else
error = -EINVAL;
diff --git a/kernel/printk.c b/kernel/printk.c
index c056f332443..19a95561929 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -67,6 +67,7 @@ EXPORT_SYMBOL(oops_in_progress);
* driver system.
*/
static DECLARE_MUTEX(console_sem);
+static DECLARE_MUTEX(secondary_console_sem);
struct console *console_drivers;
/*
* This is used for debugging the mess that is the VT code by
@@ -76,7 +77,7 @@ struct console *console_drivers;
* path in the console code where we end up in places I want
* locked without the console sempahore held
*/
-static int console_locked;
+static int console_locked, console_suspended;
/*
* logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
@@ -698,6 +699,23 @@ int __init add_preferred_console(char *name, int idx, char *options)
}
/**
+ * suspend_console - suspend the console subsystem
+ *
+ * This disables printk() while we go into suspend states
+ */
+void suspend_console(void)
+{
+ acquire_console_sem();
+ console_suspended = 1;
+}
+
+void resume_console(void)
+{
+ console_suspended = 0;
+ release_console_sem();
+}
+
+/**
* acquire_console_sem - lock the console system for exclusive use.
*
* Acquires a semaphore which guarantees that the caller has
@@ -708,6 +726,10 @@ int __init add_preferred_console(char *name, int idx, char *options)
void acquire_console_sem(void)
{
BUG_ON(in_interrupt());
+ if (console_suspended) {
+ down(&secondary_console_sem);
+ return;
+ }
down(&console_sem);
console_locked = 1;
console_may_schedule = 1;
@@ -750,6 +772,10 @@ void release_console_sem(void)
unsigned long _con_start, _log_end;
unsigned long wake_klogd = 0;
+ if (console_suspended) {
+ up(&secondary_console_sem);
+ return;
+ }
for ( ; ; ) {
spin_lock_irqsave(&logbuf_lock, flags);
wake_klogd |= log_start - log_end;
diff --git a/kernel/profile.c b/kernel/profile.c
index 5a730fdb1a2..68afe121e50 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -299,7 +299,7 @@ out:
}
#ifdef CONFIG_HOTPLUG_CPU
-static int __devinit profile_cpu_callback(struct notifier_block *info,
+static int profile_cpu_callback(struct notifier_block *info,
unsigned long action, void *__cpu)
{
int node, cpu = (unsigned long)__cpu;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4e0f0ec003f..921c22ad16e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -148,12 +148,34 @@ int ptrace_may_attach(struct task_struct *task)
int ptrace_attach(struct task_struct *task)
{
int retval;
- task_lock(task);
+
retval = -EPERM;
if (task->pid <= 1)
- goto bad;
+ goto out;
if (task->tgid == current->tgid)
- goto bad;
+ goto out;
+
+repeat:
+ /*
+ * Nasty, nasty.
+ *
+ * We want to hold both the task-lock and the
+ * tasklist_lock for writing at the same time.
+ * But that's against the rules (tasklist_lock
+ * is taken for reading by interrupts on other
+ * cpu's that may have task_lock).
+ */
+ task_lock(task);
+ local_irq_disable();
+ if (!write_trylock(&tasklist_lock)) {
+ local_irq_enable();
+ task_unlock(task);
+ do {
+ cpu_relax();
+ } while (!write_can_lock(&tasklist_lock));
+ goto repeat;
+ }
+
/* the same process cannot be attached many times */
if (task->ptrace & PT_PTRACED)
goto bad;
@@ -166,17 +188,15 @@ int ptrace_attach(struct task_struct *task)
? PT_ATTACHED : 0);
if (capable(CAP_SYS_PTRACE))
task->ptrace |= PT_PTRACE_CAP;
- task_unlock(task);
- write_lock_irq(&tasklist_lock);
__ptrace_link(task, current);
- write_unlock_irq(&tasklist_lock);
force_sig_specific(SIGSTOP, task);
- return 0;
bad:
+ write_unlock_irq(&tasklist_lock);
task_unlock(task);
+out:
return retval;
}
@@ -417,21 +437,22 @@ int ptrace_request(struct task_struct *child, long request,
*/
int ptrace_traceme(void)
{
- int ret;
+ int ret = -EPERM;
/*
* Are we already being traced?
*/
- if (current->ptrace & PT_PTRACED)
- return -EPERM;
- ret = security_ptrace(current->parent, current);
- if (ret)
- return -EPERM;
- /*
- * Set the ptrace bit in the process ptrace flags.
- */
- current->ptrace |= PT_PTRACED;
- return 0;
+ task_lock(current);
+ if (!(current->ptrace & PT_PTRACED)) {
+ ret = security_ptrace(current->parent, current);
+ /*
+ * Set the ptrace bit in the process ptrace flags.
+ */
+ if (!ret)
+ current->ptrace |= PT_PTRACED;
+ }
+ task_unlock(current);
+ return ret;
}
/**
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 13458bbaa1b..2058f88c7bb 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -479,12 +479,31 @@ static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
return 0;
}
+/*
+ * Check to see if there is any immediate RCU-related work to be done
+ * by the current CPU, returning 1 if so. This function is part of the
+ * RCU implementation; it is -not- an exported member of the RCU API.
+ */
int rcu_pending(int cpu)
{
return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
}
+/*
+ * Check to see if any future RCU-related work will need to be done
+ * by the current CPU, even if none need be done immediately, returning
+ * 1 if so. This function is part of the RCU implementation; it is -not-
+ * an exported member of the RCU API.
+ */
+int rcu_needs_cpu(int cpu)
+{
+ struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+ struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
+
+ return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
+}
+
void rcu_check_callbacks(int cpu, int user)
{
if (user ||
@@ -520,7 +539,7 @@ static void __devinit rcu_online_cpu(int cpu)
tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
}
-static int __devinit rcu_cpu_notify(struct notifier_block *self,
+static int rcu_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -537,7 +556,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __devinitdata rcu_nb = {
+static struct notifier_block rcu_nb = {
.notifier_call = rcu_cpu_notify,
};
diff --git a/kernel/sched.c b/kernel/sched.c
index 365f0b90b4d..c13f1bd2df7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -665,55 +665,13 @@ static int effective_prio(task_t *p)
}
/*
- * We place interactive tasks back into the active array, if possible.
- *
- * To guarantee that this does not starve expired tasks we ignore the
- * interactivity of a task if the first expired task had to wait more
- * than a 'reasonable' amount of time. This deadline timeout is
- * load-dependent, as the frequency of array switched decreases with
- * increasing number of running tasks. We also ignore the interactivity
- * if a better static_prio task has expired, and switch periodically
- * regardless, to ensure that highly interactive tasks do not starve
- * the less fortunate for unreasonably long periods.
- */
-static inline int expired_starving(runqueue_t *rq)
-{
- int limit;
-
- /*
- * Arrays were recently switched, all is well
- */
- if (!rq->expired_timestamp)
- return 0;
-
- limit = STARVATION_LIMIT * rq->nr_running;
-
- /*
- * It's time to switch arrays
- */
- if (jiffies - rq->expired_timestamp >= limit)
- return 1;
-
- /*
- * There's a better selection in the expired array
- */
- if (rq->curr->static_prio > rq->best_expired_prio)
- return 1;
-
- /*
- * All is well
- */
- return 0;
-}
-
-/*
* __activate_task - move a task to the runqueue.
*/
static void __activate_task(task_t *p, runqueue_t *rq)
{
prio_array_t *target = rq->active;
- if (unlikely(batch_task(p) || (expired_starving(rq) && !rt_task(p))))
+ if (batch_task(p))
target = rq->expired;
enqueue_task(p, target);
rq->nr_running++;
@@ -2532,6 +2490,22 @@ unsigned long long current_sched_time(const task_t *tsk)
}
/*
+ * We place interactive tasks back into the active array, if possible.
+ *
+ * To guarantee that this does not starve expired tasks we ignore the
+ * interactivity of a task if the first expired task had to wait more
+ * than a 'reasonable' amount of time. This deadline timeout is
+ * load-dependent, as the frequency of array switched decreases with
+ * increasing number of running tasks. We also ignore the interactivity
+ * if a better static_prio task has expired:
+ */
+#define EXPIRED_STARVING(rq) \
+ ((STARVATION_LIMIT && ((rq)->expired_timestamp && \
+ (jiffies - (rq)->expired_timestamp >= \
+ STARVATION_LIMIT * ((rq)->nr_running) + 1))) || \
+ ((rq)->curr->static_prio > (rq)->best_expired_prio))
+
+/*
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
@@ -2666,7 +2640,7 @@ void scheduler_tick(void)
if (!rq->expired_timestamp)
rq->expired_timestamp = jiffies;
- if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
+ if (!TASK_INTERACTIVE(p) || EXPIRED_STARVING(rq)) {
enqueue_task(p, rq->expired);
if (p->static_prio < rq->best_expired_prio)
rq->best_expired_prio = p->static_prio;
@@ -4814,7 +4788,7 @@ static int migration_call(struct notifier_block *nfb, unsigned long action,
/* Register at highest priority so that task migration (migrate_all_tasks)
* happens before everything else.
*/
-static struct notifier_block __devinitdata migration_notifier = {
+static struct notifier_block migration_notifier = {
.notifier_call = migration_call,
.priority = 10
};
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ec8fed42a86..336f92d64e2 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -446,7 +446,7 @@ static void takeover_tasklets(unsigned int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __devinit cpu_callback(struct notifier_block *nfb,
+static int cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -484,7 +484,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static struct notifier_block __devinitdata cpu_nfb = {
+static struct notifier_block cpu_nfb = {
.notifier_call = cpu_callback
};
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index ced91e1ff56..14c7faf0290 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu)
/*
* Create/destroy watchdog threads as CPUs come and go:
*/
-static int __devinit
+static int
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int hotcpu = (unsigned long)hcpu;
@@ -140,7 +140,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
-static struct notifier_block __devinitdata cpu_nfb = {
+static struct notifier_block cpu_nfb = {
.notifier_call = cpu_callback
};
diff --git a/kernel/timer.c b/kernel/timer.c
index 88377378883..9e49deed468 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -541,6 +541,22 @@ found:
}
spin_unlock(&base->lock);
+ /*
+ * It can happen that other CPUs service timer IRQs and increment
+ * jiffies, but we have not yet got a local timer tick to process
+ * the timer wheels. In that case, the expiry time can be before
+ * jiffies, but since the high-resolution timer here is relative to
+ * jiffies, the default expression when high-resolution timers are
+ * not active,
+ *
+ * time_before(MAX_JIFFY_OFFSET + jiffies, expires)
+ *
+ * would falsely evaluate to true. If that is the case, just
+ * return jiffies so that we can immediately fire the local timer
+ */
+ if (time_before(expires, jiffies))
+ return jiffies;
+
if (time_before(hr_expires, expires))
return hr_expires;
@@ -1314,7 +1330,7 @@ static void __devinit migrate_timers(int cpu)
}
#endif /* CONFIG_HOTPLUG_CPU */
-static int __devinit timer_cpu_notify(struct notifier_block *self,
+static int timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1334,7 +1350,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
return NOTIFY_OK;
}
-static struct notifier_block __devinitdata timers_nb = {
+static struct notifier_block timers_nb = {
.notifier_call = timer_cpu_notify,
};
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e9e464a9037..880fb415a8f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -547,7 +547,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
}
/* We're holding the cpucontrol mutex here */
-static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
+static int workqueue_cpu_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{