aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bounds.c23
-rw-r--r--kernel/cpuset.c25
-rw-r--r--kernel/exit.c8
-rw-r--r--kernel/fork.c66
-rw-r--r--kernel/hrtimer.c34
-rw-r--r--kernel/kexec.c3
-rw-r--r--kernel/kprobes.c349
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/power/Kconfig10
-rw-r--r--kernel/power/Makefile1
-rw-r--r--kernel/power/console.c27
-rw-r--r--kernel/power/pm.c205
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/sys.c27
-rw-r--r--kernel/time/tick-sched.c1
16 files changed, 414 insertions, 428 deletions
diff --git a/kernel/bounds.c b/kernel/bounds.c
new file mode 100644
index 00000000000..c3c55544db2
--- /dev/null
+++ b/kernel/bounds.c
@@ -0,0 +1,23 @@
+/*
+ * Generate definitions needed by the preprocessor.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#define __GENERATING_BOUNDS_H
+/* Include headers that define the enum constants of interest */
+#include <linux/page-flags.h>
+#include <linux/mmzone.h>
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+void foo(void)
+{
+ /* The enum constants to put into include/linux/bounds.h */
+ DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
+ DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
+ /* End of constants */
+}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8b35fbd8292..48a976c52cf 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -941,7 +941,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
cs->mems_generation = cpuset_mems_generation++;
mutex_unlock(&callback_mutex);
- cpuset_being_rebound = cs; /* causes mpol_copy() rebind */
+ cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
fudge = 10; /* spare mmarray[] slots */
fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
@@ -992,7 +992,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
* rebind the vma mempolicies of each mm in mmarray[] to their
* new cpuset, and release that mm. The mpol_rebind_mm()
* call takes mmap_sem, which we couldn't take while holding
- * tasklist_lock. Forks can happen again now - the mpol_copy()
+ * tasklist_lock. Forks can happen again now - the mpol_dup()
* cpuset_being_rebound check will catch such forks, and rebind
* their vma mempolicies too. Because we still hold the global
* cgroup_mutex, we know that no other rebind effort will
@@ -1265,7 +1265,8 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
return -E2BIG;
/* +1 for nul-terminator */
- if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0)
+ buffer = kmalloc(nbytes + 1, GFP_KERNEL);
+ if (!buffer)
return -ENOMEM;
if (copy_from_user(buffer, userbuf, nbytes)) {
@@ -1958,22 +1959,14 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
}
/**
- * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed
- * @zl: the zonelist to be checked
+ * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
+ * @nodemask: the nodemask to be checked
*
- * Are any of the nodes on zonelist zl allowed in current->mems_allowed?
+ * Are any of the nodes in the nodemask allowed in current->mems_allowed?
*/
-int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
+int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
{
- int i;
-
- for (i = 0; zl->zones[i]; i++) {
- int nid = zone_to_nid(zl->zones[i]);
-
- if (node_isset(nid, current->mems_allowed))
- return 1;
- }
- return 0;
+ return nodes_intersects(*nodemask, current->mems_allowed);
}
/*
diff --git a/kernel/exit.c b/kernel/exit.c
index cece89f80ab..2a9d98c641a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -507,10 +507,9 @@ void put_files_struct(struct files_struct *files)
}
}
-EXPORT_SYMBOL(put_files_struct);
-
-void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
+void reset_files_struct(struct files_struct *files)
{
+ struct task_struct *tsk = current;
struct files_struct *old;
old = tsk->files;
@@ -519,7 +518,6 @@ void reset_files_struct(struct task_struct *tsk, struct files_struct *files)
task_unlock(tsk);
put_files_struct(old);
}
-EXPORT_SYMBOL(reset_files_struct);
void exit_files(struct task_struct *tsk)
{
@@ -969,7 +967,7 @@ NORET_TYPE void do_exit(long code)
proc_exit_connector(tsk);
exit_notify(tsk, group_dead);
#ifdef CONFIG_NUMA
- mpol_free(tsk->mempolicy);
+ mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
diff --git a/kernel/fork.c b/kernel/fork.c
index 89fe414645e..6067e429f28 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -279,7 +279,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (!tmp)
goto fail_nomem;
*tmp = *mpnt;
- pol = mpol_copy(vma_policy(mpnt));
+ pol = mpol_dup(vma_policy(mpnt));
retval = PTR_ERR(pol);
if (IS_ERR(pol))
goto fail_nomem_policy;
@@ -521,7 +521,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* Allocate a new mm structure and copy contents from the
* mm structure of the passed in task structure.
*/
-static struct mm_struct *dup_mm(struct task_struct *tsk)
+struct mm_struct *dup_mm(struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm = current->mm;
int err;
@@ -805,12 +805,6 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
goto out;
}
- /*
- * Note: we may be using current for both targets (See exec.c)
- * This works because we cache current->files (old) as oldf. Don't
- * break this.
- */
- tsk->files = NULL;
newf = dup_fd(oldf, &error);
if (!newf)
goto out;
@@ -846,34 +840,6 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
return 0;
}
-/*
- * Helper to unshare the files of the current task.
- * We don't want to expose copy_files internals to
- * the exec layer of the kernel.
- */
-
-int unshare_files(void)
-{
- struct files_struct *files = current->files;
- int rc;
-
- BUG_ON(!files);
-
- /* This can race but the race causes us to copy when we don't
- need to and drop the copy */
- if(atomic_read(&files->count) == 1)
- {
- atomic_inc(&files->count);
- return 0;
- }
- rc = copy_files(0, current);
- if(rc)
- current->files = files;
- return rc;
-}
-
-EXPORT_SYMBOL(unshare_files);
-
static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
{
struct sighand_struct *sig;
@@ -1150,7 +1116,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->audit_context = NULL;
cgroup_fork(p);
#ifdef CONFIG_NUMA
- p->mempolicy = mpol_copy(p->mempolicy);
+ p->mempolicy = mpol_dup(p->mempolicy);
if (IS_ERR(p->mempolicy)) {
retval = PTR_ERR(p->mempolicy);
p->mempolicy = NULL;
@@ -1408,7 +1374,7 @@ bad_fork_cleanup_security:
security_task_free(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
- mpol_free(p->mempolicy);
+ mpol_put(p->mempolicy);
bad_fork_cleanup_cgroup:
#endif
cgroup_exit(p, cgroup_callbacks_done);
@@ -1811,3 +1777,27 @@ bad_unshare_cleanup_thread:
bad_unshare_out:
return err;
}
+
+/*
+ * Helper to unshare the files of the current task.
+ * We don't want to expose copy_files internals to
+ * the exec layer of the kernel.
+ */
+
+int unshare_files(struct files_struct **displaced)
+{
+ struct task_struct *task = current;
+ struct files_struct *copy = NULL;
+ int error;
+
+ error = unshare_fd(CLONE_FILES, &copy);
+ if (error || !copy) {
+ *displaced = NULL;
+ return error;
+ }
+ *displaced = task->files;
+ task_lock(task);
+ task->files = copy;
+ task_unlock(task);
+ return 0;
+}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index f78777abe76..dea4c9124ac 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -590,7 +590,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
list_add_tail(&timer->cb_entry,
&base->cpu_base->cb_pending);
timer->state = HRTIMER_STATE_PENDING;
- raise_softirq(HRTIMER_SOFTIRQ);
return 1;
default:
BUG();
@@ -633,6 +632,11 @@ static int hrtimer_switch_to_hres(void)
return 1;
}
+static inline void hrtimer_raise_softirq(void)
+{
+ raise_softirq(HRTIMER_SOFTIRQ);
+}
+
#else
static inline int hrtimer_hres_active(void) { return 0; }
@@ -651,6 +655,7 @@ static inline int hrtimer_reprogram(struct hrtimer *timer,
{
return 0;
}
+static inline void hrtimer_raise_softirq(void) { }
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -850,7 +855,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
- int ret;
+ int ret, raise;
base = lock_hrtimer_base(timer, &flags);
@@ -884,8 +889,18 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
enqueue_hrtimer(timer, new_base,
new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
+ /*
+ * The timer may be expired and moved to the cb_pending
+ * list. We can not raise the softirq with base lock held due
+ * to a possible deadlock with runqueue lock.
+ */
+ raise = timer->state == HRTIMER_STATE_PENDING;
+
unlock_hrtimer_base(timer, &flags);
+ if (raise)
+ hrtimer_raise_softirq();
+
return ret;
}
EXPORT_SYMBOL_GPL(hrtimer_start);
@@ -1080,8 +1095,19 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
* If the timer was rearmed on another CPU, reprogram
* the event device.
*/
- if (timer->base->first == &timer->node)
- hrtimer_reprogram(timer, timer->base);
+ struct hrtimer_clock_base *base = timer->base;
+
+ if (base->first == &timer->node &&
+ hrtimer_reprogram(timer, base)) {
+ /*
+ * Timer is expired. Thus move it from tree to
+ * pending list again.
+ */
+ __remove_hrtimer(timer, base,
+ HRTIMER_STATE_PENDING, 0);
+ list_add_tail(&timer->cb_entry,
+ &base->cpu_base->cb_pending);
+ }
}
}
spin_unlock_irq(&cpu_base->lock);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 6782dce93d0..cb85c79989b 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1405,6 +1405,9 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
VMCOREINFO_NUMBER(NR_FREE_PAGES);
+ VMCOREINFO_NUMBER(PG_lru);
+ VMCOREINFO_NUMBER(PG_private);
+ VMCOREINFO_NUMBER(PG_swapcache);
arch_crash_save_vmcoreinfo();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index fcfb580c3af..1e0250cb948 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,6 +72,18 @@ DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
DEFINE_SPINLOCK(kretprobe_lock); /* Protects kretprobe_inst_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+/*
+ * Normally, functions that we'd want to prohibit kprobes in, are marked
+ * __kprobes. But, there are cases where such functions already belong to
+ * a different section (__sched for preempt_schedule)
+ *
+ * For such cases, we now have a blacklist
+ */
+struct kprobe_blackpoint kprobe_blacklist[] = {
+ {"preempt_schedule",},
+ {NULL} /* Terminator */
+};
+
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
/*
* kprobe->ainsn.insn points to the copy of the instruction to be
@@ -417,6 +429,21 @@ static inline void free_rp_inst(struct kretprobe *rp)
}
}
+static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
+{
+ unsigned long flags;
+ struct kretprobe_instance *ri;
+ struct hlist_node *pos, *next;
+ /* No race here */
+ spin_lock_irqsave(&kretprobe_lock, flags);
+ hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
+ ri->rp = NULL;
+ hlist_del(&ri->uflist);
+ }
+ spin_unlock_irqrestore(&kretprobe_lock, flags);
+ free_rp_inst(rp);
+}
+
/*
* Keep all fields in the kprobe consistent
*/
@@ -492,9 +519,22 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
static int __kprobes in_kprobes_functions(unsigned long addr)
{
+ struct kprobe_blackpoint *kb;
+
if (addr >= (unsigned long)__kprobes_text_start &&
addr < (unsigned long)__kprobes_text_end)
return -EINVAL;
+ /*
+ * If there exists a kprobe_blacklist, verify and
+ * fail any probe registration in the prohibited area
+ */
+ for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
+ if (kb->start_addr) {
+ if (addr >= kb->start_addr &&
+ addr < (kb->start_addr + kb->range))
+ return -EINVAL;
+ }
+ }
return 0;
}
@@ -555,6 +595,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
}
p->nmissed = 0;
+ INIT_LIST_HEAD(&p->list);
mutex_lock(&kprobe_mutex);
old_p = get_kprobe(p->addr);
if (old_p) {
@@ -581,35 +622,28 @@ out:
return ret;
}
-int __kprobes register_kprobe(struct kprobe *p)
-{
- return __register_kprobe(p, (unsigned long)__builtin_return_address(0));
-}
-
-void __kprobes unregister_kprobe(struct kprobe *p)
+/*
+ * Unregister a kprobe without a scheduler synchronization.
+ */
+static int __kprobes __unregister_kprobe_top(struct kprobe *p)
{
- struct module *mod;
struct kprobe *old_p, *list_p;
- int cleanup_p;
- mutex_lock(&kprobe_mutex);
old_p = get_kprobe(p->addr);
- if (unlikely(!old_p)) {
- mutex_unlock(&kprobe_mutex);
- return;
- }
+ if (unlikely(!old_p))
+ return -EINVAL;
+
if (p != old_p) {
list_for_each_entry_rcu(list_p, &old_p->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
goto valid_p;
- mutex_unlock(&kprobe_mutex);
- return;
+ return -EINVAL;
}
valid_p:
if (old_p == p ||
(old_p->pre_handler == aggr_pre_handler &&
- p->list.next == &old_p->list && p->list.prev == &old_p->list)) {
+ list_is_singular(&old_p->list))) {
/*
* Only probe on the hash list. Disarm only if kprobes are
* enabled - otherwise, the breakpoint would already have
@@ -618,43 +652,97 @@ valid_p:
if (kprobe_enabled)
arch_disarm_kprobe(p);
hlist_del_rcu(&old_p->hlist);
- cleanup_p = 1;
} else {
+ if (p->break_handler)
+ old_p->break_handler = NULL;
+ if (p->post_handler) {
+ list_for_each_entry_rcu(list_p, &old_p->list, list) {
+ if ((list_p != p) && (list_p->post_handler))
+ goto noclean;
+ }
+ old_p->post_handler = NULL;
+ }
+noclean:
list_del_rcu(&p->list);
- cleanup_p = 0;
}
+ return 0;
+}
- mutex_unlock(&kprobe_mutex);
+static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
+{
+ struct module *mod;
+ struct kprobe *old_p;
- synchronize_sched();
if (p->mod_refcounted) {
mod = module_text_address((unsigned long)p->addr);
if (mod)
module_put(mod);
}
- if (cleanup_p) {
- if (p != old_p) {
- list_del_rcu(&p->list);
+ if (list_empty(&p->list) || list_is_singular(&p->list)) {
+ if (!list_empty(&p->list)) {
+ /* "p" is the last child of an aggr_kprobe */
+ old_p = list_entry(p->list.next, struct kprobe, list);
+ list_del(&p->list);
kfree(old_p);
}
arch_remove_kprobe(p);
- } else {
- mutex_lock(&kprobe_mutex);
- if (p->break_handler)
- old_p->break_handler = NULL;
- if (p->post_handler){
- list_for_each_entry_rcu(list_p, &old_p->list, list){
- if (list_p->post_handler){
- cleanup_p = 2;
- break;
- }
- }
- if (cleanup_p == 0)
- old_p->post_handler = NULL;
+ }
+}
+
+static int __register_kprobes(struct kprobe **kps, int num,
+ unsigned long called_from)
+{
+ int i, ret = 0;
+
+ if (num <= 0)
+ return -EINVAL;
+ for (i = 0; i < num; i++) {
+ ret = __register_kprobe(kps[i], called_from);
+ if (ret < 0 && i > 0) {
+ unregister_kprobes(kps, i);
+ break;
}
- mutex_unlock(&kprobe_mutex);
}
+ return ret;
+}
+
+/*
+ * Registration and unregistration functions for kprobe.
+ */
+int __kprobes register_kprobe(struct kprobe *p)
+{
+ return __register_kprobes(&p, 1,
+ (unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kprobe(struct kprobe *p)
+{
+ unregister_kprobes(&p, 1);
+}
+
+int __kprobes register_kprobes(struct kprobe **kps, int num)
+{
+ return __register_kprobes(kps, num,
+ (unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kprobes(struct kprobe **kps, int num)
+{
+ int i;
+
+ if (num <= 0)
+ return;
+ mutex_lock(&kprobe_mutex);
+ for (i = 0; i < num; i++)
+ if (__unregister_kprobe_top(kps[i]) < 0)
+ kps[i]->addr = NULL;
+ mutex_unlock(&kprobe_mutex);
+
+ synchronize_sched();
+ for (i = 0; i < num; i++)
+ if (kps[i]->addr)
+ __unregister_kprobe_bottom(kps[i]);
}
static struct notifier_block kprobe_exceptions_nb = {
@@ -667,24 +755,69 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}
-int __kprobes register_jprobe(struct jprobe *jp)
+static int __register_jprobes(struct jprobe **jps, int num,
+ unsigned long called_from)
{
- unsigned long addr = arch_deref_entry_point(jp->entry);
+ struct jprobe *jp;
+ int ret = 0, i;
- if (!kernel_text_address(addr))
+ if (num <= 0)
return -EINVAL;
+ for (i = 0; i < num; i++) {
+ unsigned long addr;
+ jp = jps[i];
+ addr = arch_deref_entry_point(jp->entry);
+
+ if (!kernel_text_address(addr))
+ ret = -EINVAL;
+ else {
+ /* Todo: Verify probepoint is a function entry point */
+ jp->kp.pre_handler = setjmp_pre_handler;
+ jp->kp.break_handler = longjmp_break_handler;
+ ret = __register_kprobe(&jp->kp, called_from);
+ }
+ if (ret < 0 && i > 0) {
+ unregister_jprobes(jps, i);
+ break;
+ }
+ }
+ return ret;
+}
- /* Todo: Verify probepoint is a function entry point */
- jp->kp.pre_handler = setjmp_pre_handler;
- jp->kp.break_handler = longjmp_break_handler;
-
- return __register_kprobe(&jp->kp,
+int __kprobes register_jprobe(struct jprobe *jp)
+{
+ return __register_jprobes(&jp, 1,
(unsigned long)__builtin_return_address(0));
}
void __kprobes unregister_jprobe(struct jprobe *jp)
{
- unregister_kprobe(&jp->kp);
+ unregister_jprobes(&jp, 1);
+}
+
+int __kprobes register_jprobes(struct jprobe **jps, int num)
+{
+ return __register_jprobes(jps, num,
+ (unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_jprobes(struct jprobe **jps, int num)
+{
+ int i;
+
+ if (num <= 0)
+ return;
+ mutex_lock(&kprobe_mutex);
+ for (i = 0; i < num; i++)
+ if (__unregister_kprobe_top(&jps[i]->kp) < 0)
+ jps[i]->kp.addr = NULL;
+ mutex_unlock(&kprobe_mutex);
+
+ synchronize_sched();
+ for (i = 0; i < num; i++) {
+ if (jps[i]->kp.addr)
+ __unregister_kprobe_bottom(&jps[i]->kp);
+ }
}
#ifdef CONFIG_KRETPROBES
@@ -725,7 +858,8 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
return 0;
}
-int __kprobes register_kretprobe(struct kretprobe *rp)
+static int __kprobes __register_kretprobe(struct kretprobe *rp,
+ unsigned long called_from)
{
int ret = 0;
struct kretprobe_instance *inst;
@@ -771,46 +905,101 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
rp->nmissed = 0;
/* Establish function entry probe point */
- if ((ret = __register_kprobe(&rp->kp,
- (unsigned long)__builtin_return_address(0))) != 0)
+ ret = __register_kprobe(&rp->kp, called_from);
+ if (ret != 0)
free_rp_inst(rp);
return ret;
}
+static int __register_kretprobes(struct kretprobe **rps, int num,
+ unsigned long called_from)
+{
+ int ret = 0, i;
+
+ if (num <= 0)
+ return -EINVAL;
+ for (i = 0; i < num; i++) {
+ ret = __register_kretprobe(rps[i], called_from);
+ if (ret < 0 && i > 0) {
+ unregister_kretprobes(rps, i);
+ break;
+ }
+ }
+ return ret;
+}
+
+int __kprobes register_kretprobe(struct kretprobe *rp)
+{
+ return __register_kretprobes(&rp, 1,
+ (unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kretprobe(struct kretprobe *rp)
+{
+ unregister_kretprobes(&rp, 1);
+}
+
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
+{
+ return __register_kretprobes(rps, num,
+ (unsigned long)__builtin_return_address(0));
+}
+
+void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+{
+ int i;
+
+ if (num <= 0)
+ return;
+ mutex_lock(&kprobe_mutex);
+ for (i = 0; i < num; i++)
+ if (__unregister_kprobe_top(&rps[i]->kp) < 0)
+ rps[i]->kp.addr = NULL;
+ mutex_unlock(&kprobe_mutex);
+
+ synchronize_sched();
+ for (i = 0; i < num; i++) {
+ if (rps[i]->kp.addr) {
+ __unregister_kprobe_bottom(&rps[i]->kp);
+ cleanup_rp_inst(rps[i]);
+ }
+ }
+}
+
#else /* CONFIG_KRETPROBES */
int __kprobes register_kretprobe(struct kretprobe *rp)
{
return -ENOSYS;
}
-static int __kprobes pre_handler_kretprobe(struct kprobe *p,
- struct pt_regs *regs)
+int __kprobes register_kretprobes(struct kretprobe **rps, int num)
{
- return 0;
+ return -ENOSYS;
}
-#endif /* CONFIG_KRETPROBES */
-
void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
- unsigned long flags;
- struct kretprobe_instance *ri;
- struct hlist_node *pos, *next;
+}
- unregister_kprobe(&rp->kp);
+void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
+{
+}
- /* No race here */
- spin_lock_irqsave(&kretprobe_lock, flags);
- hlist_for_each_entry_safe(ri, pos, next, &rp->used_instances, uflist) {
- ri->rp = NULL;
- hlist_del(&ri->uflist);
- }
- spin_unlock_irqrestore(&kretprobe_lock, flags);
- free_rp_inst(rp);
+static int __kprobes pre_handler_kretprobe(struct kprobe *p,
+ struct pt_regs *regs)
+{
+ return 0;
}
+#endif /* CONFIG_KRETPROBES */
+
static int __init init_kprobes(void)
{
int i, err = 0;
+ unsigned long offset = 0, size = 0;
+ char *modname, namebuf[128];
+ const char *symbol_name;
+ void *addr;
+ struct kprobe_blackpoint *kb;
/* FIXME allocate the probe table, currently defined statically */
/* initialize all list heads */
@@ -819,6 +1008,28 @@ static int __init init_kprobes(void)
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
}
+ /*
+ * Lookup and populate the kprobe_blacklist.
+ *
+ * Unlike the kretprobe blacklist, we'll need to determine
+ * the range of addresses that belong to the said functions,
+ * since a kprobe need not necessarily be at the beginning
+ * of a function.
+ */
+ for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
+ kprobe_lookup_name(kb->name, addr);
+ if (!addr)
+ continue;
+
+ kb->start_addr = (unsigned long)addr;
+ symbol_name = kallsyms_lookup(kb->start_addr,
+ &size, &offset, &modname, namebuf);
+ if (!symbol_name)
+ kb->range = 0;
+ else
+ kb->range = size;
+ }
+
if (kretprobe_blacklist_size) {
/* lookup the function address from its name */
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
@@ -1066,8 +1277,12 @@ module_init(init_kprobes);
EXPORT_SYMBOL_GPL(register_kprobe);
EXPORT_SYMBOL_GPL(unregister_kprobe);
+EXPORT_SYMBOL_GPL(register_kprobes);
+EXPORT_SYMBOL_GPL(unregister_kprobes);
EXPORT_SYMBOL_GPL(register_jprobe);
EXPORT_SYMBOL_GPL(unregister_jprobe);
+EXPORT_SYMBOL_GPL(register_jprobes);
+EXPORT_SYMBOL_GPL(unregister_jprobes);
#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(jprobe_return);
#endif
@@ -1075,4 +1290,6 @@ EXPORT_SYMBOL_GPL(jprobe_return);
#ifdef CONFIG_KPROBES
EXPORT_SYMBOL_GPL(register_kretprobe);
EXPORT_SYMBOL_GPL(unregister_kretprobe);
+EXPORT_SYMBOL_GPL(register_kretprobes);
+EXPORT_SYMBOL_GPL(unregister_kretprobes);
#endif
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 6d792b66d85..5ca37fa50be 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -92,7 +92,7 @@ static struct pid_namespace *create_pid_namespace(int level)
atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
for (i = 1; i < PIDMAP_ENTRIES; i++) {
- ns->pidmap[i].page = 0;
+ ns->pidmap[i].page = NULL;
atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6233f3b4ae6..b45da40e8d2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -19,16 +19,6 @@ config PM
will issue the hlt instruction if nothing is to be done, thereby
sending the processor to sleep and saving power.
-config PM_LEGACY
- bool "Legacy Power Management API (DEPRECATED)"
- depends on PM
- default n
- ---help---
- Support for pm_register() and friends. This old API is obsoleted
- by the driver model.
-
- If unsure, say N.
-
config PM_DEBUG
bool "Power Management Debug Support"
depends on PM
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index f7dfff28ecd..597823b5b70 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -4,7 +4,6 @@ EXTRA_CFLAGS += -DDEBUG
endif
obj-y := main.o
-obj-$(CONFIG_PM_LEGACY) += pm.o
obj-$(CONFIG_PM_SLEEP) += process.o console.o
obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 89bcf4973ee..b8628be2a46 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -7,17 +7,39 @@
#include <linux/vt_kern.h>
#include <linux/kbd_kern.h>
#include <linux/console.h>
+#include <linux/module.h>
#include "power.h"
#if defined(CONFIG_VT) && defined(CONFIG_VT_CONSOLE)
#define SUSPEND_CONSOLE (MAX_NR_CONSOLES-1)
static int orig_fgconsole, orig_kmsg;
+static int disable_vt_switch;
+
+/*
+ * Normally during a suspend, we allocate a new console and switch to it.
+ * When we resume, we switch back to the original console. This switch
+ * can be slow, so on systems where the framebuffer can handle restoration
+ * of video registers anyways, there's little point in doing the console
+ * switch. This function allows you to disable it by passing it '0'.
+ */
+void pm_set_vt_switch(int do_switch)
+{
+ acquire_console_sem();
+ disable_vt_switch = !do_switch;
+ release_console_sem();
+}
+EXPORT_SYMBOL(pm_set_vt_switch);
int pm_prepare_console(void)
{
acquire_console_sem();
+ if (disable_vt_switch) {
+ release_console_sem();
+ return 0;
+ }
+
orig_fgconsole = fg_console;
if (vc_allocate(SUSPEND_CONSOLE)) {
@@ -50,9 +72,12 @@ int pm_prepare_console(void)
void pm_restore_console(void)
{
acquire_console_sem();
+ if (disable_vt_switch) {
+ release_console_sem();
+ return;
+ }
set_console(orig_fgconsole);
release_console_sem();
kmsg_redirect = orig_kmsg;
- return;
}
#endif
diff --git a/kernel/power/pm.c b/kernel/power/pm.c
deleted file mode 100644
index 60c73fa670d..00000000000
--- a/kernel/power/pm.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * pm.c - Power management interface
- *
- * Copyright (C) 2000 Andrew Henroid
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/pm.h>
-#include <linux/pm_legacy.h>
-#include <linux/interrupt.h>
-#include <linux/mutex.h>
-
-/*
- * Locking notes:
- * pm_devs_lock can be a semaphore providing pm ops are not called
- * from an interrupt handler (already a bad idea so no change here). Each
- * change must be protected so that an unlink of an entry doesn't clash
- * with a pm send - which is permitted to sleep in the current architecture
- *
- * Module unloads clashing with pm events now work out safely, the module
- * unload path will block until the event has been sent. It may well block
- * until a resume but that will be fine.
- */
-
-static DEFINE_MUTEX(pm_devs_lock);
-static LIST_HEAD(pm_devs);
-
-/**
- * pm_register - register a device with power management
- * @type: device type
- * @id: device ID
- * @callback: callback function
- *
- * Add a device to the list of devices that wish to be notified about
- * power management events. A &pm_dev structure is returned on success,
- * on failure the return is %NULL.
- *
- * The callback function will be called in process context and
- * it may sleep.
- */
-
-struct pm_dev *pm_register(pm_dev_t type,
- unsigned long id,
- pm_callback callback)
-{
- struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL);
- if (dev) {
- dev->type = type;
- dev->id = id;
- dev->callback = callback;
-
- mutex_lock(&pm_devs_lock);
- list_add(&dev->entry, &pm_devs);
- mutex_unlock(&pm_devs_lock);
- }
- return dev;
-}
-
-/**
- * pm_send - send request to a single device
- * @dev: device to send to
- * @rqst: power management request
- * @data: data for the callback
- *
- * Issue a power management request to a given device. The
- * %PM_SUSPEND and %PM_RESUME events are handled specially. The
- * data field must hold the intended next state. No call is made
- * if the state matches.
- *
- * BUGS: what stops two power management requests occurring in parallel
- * and conflicting.
- *
- * WARNING: Calling pm_send directly is not generally recommended, in
- * particular there is no locking against the pm_dev going away. The
- * caller must maintain all needed locking or have 'inside knowledge'
- * on the safety. Also remember that this function is not locked against
- * pm_unregister. This means that you must handle SMP races on callback
- * execution and unload yourself.
- */
-
-static int pm_send(struct pm_dev *dev, pm_request_t rqst, void *data)
-{
- int status = 0;
- unsigned long prev_state, next_state;
-
- if (in_interrupt())
- BUG();
-
- switch (rqst) {
- case PM_SUSPEND:
- case PM_RESUME:
- prev_state = dev->state;
- next_state = (unsigned long) data;
- if (prev_state != next_state) {
- if (dev->callback)
- status = (*dev->callback)(dev, rqst, data);
- if (!status) {
- dev->state = next_state;
- dev->prev_state = prev_state;
- }
- }
- else {
- dev->prev_state = prev_state;
- }
- break;
- default:
- if (dev->callback)
- status = (*dev->callback)(dev, rqst, data);
- break;
- }
- return status;
-}
-
-/*
- * Undo incomplete request
- */
-static void pm_undo_all(struct pm_dev *last)
-{
- struct list_head *entry = last->entry.prev;
- while (entry != &pm_devs) {
- struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
- if (dev->state != dev->prev_state) {
- /* previous state was zero (running) resume or
- * previous state was non-zero (suspended) suspend
- */
- pm_request_t undo = (dev->prev_state
- ? PM_SUSPEND:PM_RESUME);
- pm_send(dev, undo, (void*) dev->prev_state);
- }
- entry = entry->prev;
- }
-}
-
-/**
- * pm_send_all - send request to all managed devices
- * @rqst: power management request
- * @data: data for the callback
- *
- * Issue a power management request to a all devices. The
- * %PM_SUSPEND events are handled specially. Any device is
- * permitted to fail a suspend by returning a non zero (error)
- * value from its callback function. If any device vetoes a
- * suspend request then all other devices that have suspended
- * during the processing of this request are restored to their
- * previous state.
- *
- * WARNING: This function takes the pm_devs_lock. The lock is not dropped until
- * the callbacks have completed. This prevents races against pm locking
- * functions, races against module unload pm_unregister code. It does
- * mean however that you must not issue pm_ functions within the callback
- * or you will deadlock and users will hate you.
- *
- * Zero is returned on success. If a suspend fails then the status
- * from the device that vetoes the suspend is returned.
- *
- * BUGS: what stops two power management requests occurring in parallel
- * and conflicting.
- */
-
-int pm_send_all(pm_request_t rqst, void *data)
-{
- struct list_head *entry;
-
- mutex_lock(&pm_devs_lock);
- entry = pm_devs.next;
- while (entry != &pm_devs) {
- struct pm_dev *dev = list_entry(entry, struct pm_dev, entry);
- if (dev->callback) {
- int status = pm_send(dev, rqst, data);
- if (status) {
- /* return devices to previous state on
- * failed suspend request
- */
- if (rqst == PM_SUSPEND)
- pm_undo_all(dev);
- mutex_unlock(&pm_devs_lock);
- return status;
- }
- }
- entry = entry->next;
- }
- mutex_unlock(&pm_devs_lock);
- return 0;
-}
-
-EXPORT_SYMBOL(pm_register);
-EXPORT_SYMBOL(pm_send_all);
-
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 67e392ed549..dac4b4e5729 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -612,7 +612,7 @@ int generic_ptrace_pokedata(struct task_struct *tsk, long addr, long data)
return (copied == sizeof(data)) ? 0 : -EIO;
}
-#ifdef CONFIG_COMPAT
+#if defined CONFIG_COMPAT && defined __ARCH_WANT_COMPAT_SYS_PTRACE
#include <linux/compat.h>
int compat_ptrace_request(struct task_struct *child, compat_long_t request,
@@ -667,7 +667,6 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
return ret;
}
-#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE
asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
compat_long_t addr, compat_long_t data)
{
@@ -710,6 +709,4 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
unlock_kernel();
return ret;
}
-#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */
-
-#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_COMPAT && __ARCH_WANT_COMPAT_SYS_PTRACE */
diff --git a/kernel/sched.c b/kernel/sched.c
index 0014b03adac..740fb409e5b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1657,42 +1657,6 @@ void aggregate_group_weight(struct task_group *tg, struct sched_domain *sd)
}
/*
- * Redistribute tg->shares amongst all tg->cfs_rq[]s.
- */
-static void __aggregate_redistribute_shares(struct task_group *tg)
-{
- int i, max_cpu = smp_processor_id();
- unsigned long rq_weight = 0;
- unsigned long shares, max_shares = 0, shares_rem = tg->shares;
-
- for_each_possible_cpu(i)
- rq_weight += tg->cfs_rq[i]->load.weight;
-
- for_each_possible_cpu(i) {
- /*
- * divide shares proportional to the rq_weights.
- */
- shares = tg->shares * tg->cfs_rq[i]->load.weight;
- shares /= rq_weight + 1;
-
- tg->cfs_rq[i]->shares = shares;
-
- if (shares > max_shares) {
- max_shares = shares;
- max_cpu = i;
- }
- shares_rem -= shares;
- }
-
- /*
- * Ensure it all adds up to tg->shares; we can loose a few
- * due to rounding down when computing the per-cpu shares.
- */
- if (shares_rem)
- tg->cfs_rq[max_cpu]->shares += shares_rem;
-}
-
-/*
* Compute the weight of this group on the given cpus.
*/
static
@@ -1701,18 +1665,11 @@ void aggregate_group_shares(struct task_group *tg, struct sched_domain *sd)
unsigned long shares = 0;
int i;
-again:
for_each_cpu_mask(i, sd->span)
shares += tg->cfs_rq[i]->shares;
- /*
- * When the span doesn't have any shares assigned, but does have
- * tasks to run do a machine wide rebalance (should be rare).
- */
- if (unlikely(!shares && aggregate(tg, sd)->rq_weight)) {
- __aggregate_redistribute_shares(tg);
- goto again;
- }
+ if ((!shares && aggregate(tg, sd)->rq_weight) || shares > tg->shares)
+ shares = tg->shares;
aggregate(tg, sd)->shares = shares;
}
@@ -7991,11 +7948,6 @@ void __init sched_init_smp(void)
#else
void __init sched_init_smp(void)
{
-#if defined(CONFIG_NUMA)
- sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
- GFP_KERNEL);
- BUG_ON(sched_group_nodes_bycpu == NULL);
-#endif
sched_init_granularity();
}
#endif /* CONFIG_SMP */
@@ -8128,7 +8080,7 @@ void __init sched_init(void)
* we use alloc_bootmem().
*/
if (alloc_size) {
- ptr = (unsigned long)alloc_bootmem_low(alloc_size);
+ ptr = (unsigned long)alloc_bootmem(alloc_size);
#ifdef CONFIG_FAIR_GROUP_SCHED
init_task_group.se = (struct sched_entity **)ptr;
diff --git a/kernel/sys.c b/kernel/sys.c
index 6a0cc71ee88..f2a45136695 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1632,10 +1632,9 @@ asmlinkage long sys_umask(int mask)
asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
unsigned long arg4, unsigned long arg5)
{
- long error;
+ long uninitialized_var(error);
- error = security_task_prctl(option, arg2, arg3, arg4, arg5);
- if (error)
+ if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error))
return error;
switch (option) {
@@ -1688,17 +1687,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
error = -EINVAL;
break;
- case PR_GET_KEEPCAPS:
- if (current->keep_capabilities)
- error = 1;
- break;
- case PR_SET_KEEPCAPS:
- if (arg2 != 0 && arg2 != 1) {
- error = -EINVAL;
- break;
- }
- current->keep_capabilities = arg2;
- break;
case PR_SET_NAME: {
struct task_struct *me = current;
unsigned char ncomm[sizeof(me->comm)];
@@ -1732,17 +1720,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
case PR_SET_SECCOMP:
error = prctl_set_seccomp(arg2);
break;
-
- case PR_CAPBSET_READ:
- if (!cap_valid(arg2))
- return -EINVAL;
- return !!cap_raised(current->cap_bset, arg2);
- case PR_CAPBSET_DROP:
-#ifdef CONFIG_SECURITY_FILE_CAPABILITIES
- return cap_prctl_drop(arg2);
-#else
- return -EINVAL;
-#endif
case PR_GET_TSC:
error = GET_TSC_CTL(arg2);
break;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d358d4e3a95..b854a895591 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -393,6 +393,7 @@ void tick_nohz_restart_sched_tick(void)
sub_preempt_count(HARDIRQ_OFFSET);
}
+ touch_softlockup_watchdog();
/*
* Cancel the scheduled timer and restore the tick
*/