diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 2 | ||||
-rw-r--r-- | kernel/futex.c | 53 | ||||
-rw-r--r-- | kernel/kexec.c | 7 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 60 | ||||
-rw-r--r-- | kernel/power/Makefile | 2 | ||||
-rw-r--r-- | kernel/power/console.c | 6 | ||||
-rw-r--r-- | kernel/power/disk.c | 22 | ||||
-rw-r--r-- | kernel/power/main.c | 8 | ||||
-rw-r--r-- | kernel/power/swap.c | 5 | ||||
-rw-r--r-- | kernel/power/user.c | 8 | ||||
-rw-r--r-- | kernel/printk.c | 15 | ||||
-rw-r--r-- | kernel/sched.c | 15 | ||||
-rw-r--r-- | kernel/seccomp.c | 7 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 25 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_mmiotrace.c | 14 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 19 | ||||
-rw-r--r-- | kernel/user_namespace.c | 21 |
19 files changed, 210 insertions, 86 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 170a9213c1b..e4791b3ba55 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_PM) += power/ +obj-$(CONFIG_FREEZER) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e14db9c089b..9edb5c4b79b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1122,8 +1122,8 @@ static void cgroup_kill_sb(struct super_block *sb) { mutex_unlock(&cgroup_mutex); - kfree(root); kill_litter_super(sb); + kfree(root); } static struct file_system_type cgroup_fs_type = { diff --git a/kernel/futex.c b/kernel/futex.c index f89d373a9c6..438701adce2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1165,6 +1165,7 @@ static int futex_wait(u32 __user *uaddr, int fshared, u32 val, ktime_t *abs_time, u32 bitset, int clockrt) { struct task_struct *curr = current; + struct restart_block *restart; DECLARE_WAITQUEUE(wait, curr); struct futex_hash_bucket *hb; struct futex_q q; @@ -1216,11 +1217,13 @@ retry: if (!ret) goto retry; - return ret; + goto out; } ret = -EWOULDBLOCK; - if (uval != val) - goto out_unlock_put_key; + if (unlikely(uval != val)) { + queue_unlock(&q, hb); + goto out_put_key; + } /* Only actually queue if *uaddr contained val. */ queue_me(&q, hb); @@ -1284,38 +1287,38 @@ retry: */ /* If we were woken (and unqueued), we succeeded, whatever. */ + ret = 0; if (!unqueue_me(&q)) - return 0; + goto out_put_key; + ret = -ETIMEDOUT; if (rem) - return -ETIMEDOUT; + goto out_put_key; /* * We expect signal_pending(current), but another thread may * have handled it for us already. */ + ret = -ERESTARTSYS; if (!abs_time) - return -ERESTARTSYS; - else { - struct restart_block *restart; - restart = ¤t_thread_info()->restart_block; - restart->fn = futex_wait_restart; - restart->futex.uaddr = (u32 *)uaddr; - restart->futex.val = val; - restart->futex.time = abs_time->tv64; - restart->futex.bitset = bitset; - restart->futex.flags = 0; - - if (fshared) - restart->futex.flags |= FLAGS_SHARED; - if (clockrt) - restart->futex.flags |= FLAGS_CLOCKRT; - return -ERESTART_RESTARTBLOCK; - } + goto out_put_key; -out_unlock_put_key: - queue_unlock(&q, hb); - put_futex_key(fshared, &q.key); + restart = ¤t_thread_info()->restart_block; + restart->fn = futex_wait_restart; + restart->futex.uaddr = (u32 *)uaddr; + restart->futex.val = val; + restart->futex.time = abs_time->tv64; + restart->futex.bitset = bitset; + restart->futex.flags = 0; + + if (fshared) + restart->futex.flags |= FLAGS_SHARED; + if (clockrt) + restart->futex.flags |= FLAGS_CLOCKRT; + ret = -ERESTART_RESTARTBLOCK; + +out_put_key: + put_futex_key(fshared, &q.key); out: return ret; } diff --git a/kernel/kexec.c b/kernel/kexec.c index 795e7b67a22..c7fd6692939 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1465,6 +1465,11 @@ int kernel_kexec(void) error = device_power_down(PMSG_FREEZE); if (error) goto Enable_irqs; + + /* Suspend system devices */ + error = sysdev_suspend(PMSG_FREEZE); + if (error) + goto Power_up_devices; } else #endif { @@ -1477,6 +1482,8 @@ int kernel_kexec(void) #ifdef CONFIG_KEXEC_JUMP if (kexec_image->preserve_context) { + sysdev_resume(); + Power_up_devices: device_power_up(PMSG_RESTORE); Enable_irqs: local_irq_enable(); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 2313a4cc14e..e976e505648 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -681,6 +681,33 @@ static void cpu_timer_fire(struct k_itimer *timer) } /* + * Sample a process (thread group) timer for the given group_leader task. + * Must be called with tasklist_lock held for reading. + */ +static int cpu_timer_sample_group(const clockid_t which_clock, + struct task_struct *p, + union cpu_time_count *cpu) +{ + struct task_cputime cputime; + + thread_group_cputimer(p, &cputime); + switch (CPUCLOCK_WHICH(which_clock)) { + default: + return -EINVAL; + case CPUCLOCK_PROF: + cpu->cpu = cputime_add(cputime.utime, cputime.stime); + break; + case CPUCLOCK_VIRT: + cpu->cpu = cputime.utime; + break; + case CPUCLOCK_SCHED: + cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); + break; + } + return 0; +} + +/* * Guts of sys_timer_settime for CPU timers. * This is called with the timer locked and interrupts disabled. * If we return TIMER_RETRY, it's necessary to release the timer's lock @@ -741,7 +768,7 @@ int posix_cpu_timer_set(struct k_itimer *timer, int flags, if (CPUCLOCK_PERTHREAD(timer->it_clock)) { cpu_clock_sample(timer->it_clock, p, &val); } else { - cpu_clock_sample_group(timer->it_clock, p, &val); + cpu_timer_sample_group(timer->it_clock, p, &val); } if (old) { @@ -889,7 +916,7 @@ void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) read_unlock(&tasklist_lock); goto dead; } else { - cpu_clock_sample_group(timer->it_clock, p, &now); + cpu_timer_sample_group(timer->it_clock, p, &now); clear_dead = (unlikely(p->exit_state) && thread_group_empty(p)); } @@ -1244,7 +1271,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) clear_dead_task(timer, now); goto out_unlock; } - cpu_clock_sample_group(timer->it_clock, p, &now); + cpu_timer_sample_group(timer->it_clock, p, &now); bump_cpu_timer(timer, now); /* Leave the tasklist_lock locked for the call below. */ } @@ -1409,33 +1436,6 @@ void run_posix_cpu_timers(struct task_struct *tsk) } /* - * Sample a process (thread group) timer for the given group_leader task. - * Must be called with tasklist_lock held for reading. - */ -static int cpu_timer_sample_group(const clockid_t which_clock, - struct task_struct *p, - union cpu_time_count *cpu) -{ - struct task_cputime cputime; - - thread_group_cputimer(p, &cputime); - switch (CPUCLOCK_WHICH(which_clock)) { - default: - return -EINVAL; - case CPUCLOCK_PROF: - cpu->cpu = cputime_add(cputime.utime, cputime.stime); - break; - case CPUCLOCK_VIRT: - cpu->cpu = cputime.utime; - break; - case CPUCLOCK_SCHED: - cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); - break; - } - return 0; -} - -/* * Set one of the process-wide special case CPU timers. * The tsk->sighand->siglock must be held by the caller. * The *newval argument is relative and we update it to be absolute, *oldval diff --git a/kernel/power/Makefile b/kernel/power/Makefile index d7a10167a25..720ea4f781b 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -3,7 +3,7 @@ ifeq ($(CONFIG_PM_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -obj-y := main.o +obj-$(CONFIG_PM) += main.o obj-$(CONFIG_PM_SLEEP) += console.o obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o diff --git a/kernel/power/console.c b/kernel/power/console.c index b8628be2a46..a3961b205de 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -78,6 +78,12 @@ void pm_restore_console(void) } set_console(orig_fgconsole); release_console_sem(); + + if (vt_waitactive(orig_fgconsole)) { + pr_debug("Resume: Can't switch VCs."); + return; + } + kmsg_redirect = orig_kmsg; } #endif diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 432ee575c9e..4a4a206b197 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -227,6 +227,12 @@ static int create_image(int platform_mode) "aborting hibernation\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_FREEZE); + if (error) { + printk(KERN_ERR "PM: Some devices failed to power down, " + "aborting hibernation\n"); + goto Power_up_devices; + } if (hibernation_test(TEST_CORE)) goto Power_up; @@ -242,9 +248,11 @@ static int create_image(int platform_mode) if (!in_suspend) platform_leave(platform_mode); Power_up: + sysdev_resume(); /* NOTE: device_power_up() is just a resume() for devices * that suspended with irqs off ... no overall powerup. */ + Power_up_devices: device_power_up(in_suspend ? (error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE); Enable_irqs: @@ -335,6 +343,7 @@ static int resume_target_kernel(void) "aborting resume\n"); goto Enable_irqs; } + sysdev_suspend(PMSG_QUIESCE); /* We'll ignore saved state, but this gets preempt count (etc) right */ save_processor_state(); error = restore_highmem(); @@ -357,6 +366,7 @@ static int resume_target_kernel(void) swsusp_free(); restore_processor_state(); touch_softlockup_watchdog(); + sysdev_resume(); device_power_up(PMSG_RECOVER); Enable_irqs: local_irq_enable(); @@ -440,6 +450,7 @@ int hibernation_platform_enter(void) local_irq_disable(); error = device_power_down(PMSG_HIBERNATE); if (!error) { + sysdev_suspend(PMSG_HIBERNATE); hibernation_ops->enter(); /* We should never get here */ while (1); @@ -595,6 +606,12 @@ static int software_resume(void) unsigned int flags; /* + * If the user said "noresume".. bail out early. + */ + if (noresume) + return 0; + + /* * name_to_dev_t() below takes a sysfs buffer mutex when sysfs * is configured into the kernel. Since the regular hibernate * trigger path is via sysfs which takes a buffer mutex before @@ -610,6 +627,11 @@ static int software_resume(void) mutex_unlock(&pm_mutex); return -ENOENT; } + /* + * Some device discovery might still be in progress; we need + * to wait for this to finish. + */ + wait_for_device_probe(); swsusp_resume_device = name_to_dev_t(resume_file); pr_debug("PM: Resume from partition %s\n", resume_file); } else { diff --git a/kernel/power/main.c b/kernel/power/main.c index b4d219016b6..c9632f841f6 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -298,8 +298,12 @@ static int suspend_enter(suspend_state_t state) goto Done; } - if (!suspend_test(TEST_CORE)) - error = suspend_ops->enter(state); + error = sysdev_suspend(PMSG_SUSPEND); + if (!error) { + if (!suspend_test(TEST_CORE)) + error = suspend_ops->enter(state); + sysdev_resume(); + } device_power_up(PMSG_RESUME); Done: diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 6da14358537..505f319e489 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -60,6 +60,7 @@ static struct block_device *resume_bdev; static int submit(int rw, pgoff_t page_off, struct page *page, struct bio **bio_chain) { + const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); @@ -80,7 +81,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, bio_get(bio); if (bio_chain == NULL) { - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); wait_on_page_locked(page); if (rw == READ) bio_set_pages_dirty(bio); @@ -90,7 +91,7 @@ static int submit(int rw, pgoff_t page_off, struct page *page, get_page(page); /* These pages are freed later */ bio->bi_private = *bio_chain; *bio_chain = bio; - submit_bio(rw | (1 << BIO_RW_SYNC), bio); + submit_bio(bio_rw, bio); } return 0; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 005b93d839b..6c85359364f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -95,15 +95,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = swsusp_resume_device ? swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); if (error) - pm_notifier_call_chain(PM_POST_RESTORE); + pm_notifier_call_chain(PM_POST_HIBERNATION); } else { data->swap = -1; data->mode = O_WRONLY; - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + error = pm_notifier_call_chain(PM_RESTORE_PREPARE); if (error) - pm_notifier_call_chain(PM_POST_HIBERNATION); + pm_notifier_call_chain(PM_POST_RESTORE); } if (error) atomic_inc(&snapshot_device_available); diff --git a/kernel/printk.c b/kernel/printk.c index 69188f226a9..e3602d0755b 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -73,7 +73,6 @@ EXPORT_SYMBOL(oops_in_progress); * driver system. */ static DECLARE_MUTEX(console_sem); -static DECLARE_MUTEX(secondary_console_sem); struct console *console_drivers; EXPORT_SYMBOL_GPL(console_drivers); @@ -891,12 +890,14 @@ void suspend_console(void) printk("Suspending console(s) (use no_console_suspend to debug)\n"); acquire_console_sem(); console_suspended = 1; + up(&console_sem); } void resume_console(void) { if (!console_suspend_enabled) return; + down(&console_sem); console_suspended = 0; release_console_sem(); } @@ -912,11 +913,9 @@ void resume_console(void) void acquire_console_sem(void) { BUG_ON(in_interrupt()); - if (console_suspended) { - down(&secondary_console_sem); - return; - } down(&console_sem); + if (console_suspended) + return; console_locked = 1; console_may_schedule = 1; } @@ -926,6 +925,10 @@ int try_acquire_console_sem(void) { if (down_trylock(&console_sem)) return -1; + if (console_suspended) { + up(&console_sem); + return -1; + } console_locked = 1; console_may_schedule = 0; return 0; @@ -979,7 +982,7 @@ void release_console_sem(void) unsigned wake_klogd = 0; if (console_suspended) { - up(&secondary_console_sem); + up(&console_sem); return; } diff --git a/kernel/sched.c b/kernel/sched.c index 61245b8d0f1..7d97ff7c447 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6939,20 +6939,26 @@ static void free_rootdomain(struct root_domain *rd) static void rq_attach_root(struct rq *rq, struct root_domain *rd) { + struct root_domain *old_rd = NULL; unsigned long flags; spin_lock_irqsave(&rq->lock, flags); if (rq->rd) { - struct root_domain *old_rd = rq->rd; + old_rd = rq->rd; if (cpumask_test_cpu(rq->cpu, old_rd->online)) set_rq_offline(rq); cpumask_clear_cpu(rq->cpu, old_rd->span); - if (atomic_dec_and_test(&old_rd->refcount)) - free_rootdomain(old_rd); + /* + * If we dont want to free the old_rt yet then + * set old_rd to NULL to skip the freeing later + * in this function: + */ + if (!atomic_dec_and_test(&old_rd->refcount)) + old_rd = NULL; } atomic_inc(&rd->refcount); @@ -6963,6 +6969,9 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) set_rq_online(rq); spin_unlock_irqrestore(&rq->lock, flags); + + if (old_rd) + free_rootdomain(old_rd); } static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) diff --git a/kernel/seccomp.c b/kernel/seccomp.c index ad64fcb731f..57d4b13b631 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -8,6 +8,7 @@ #include <linux/seccomp.h> #include <linux/sched.h> +#include <linux/compat.h> /* #define SECCOMP_DEBUG 1 */ #define NR_SECCOMP_MODES 1 @@ -22,7 +23,7 @@ static int mode1_syscalls[] = { 0, /* null terminated */ }; -#ifdef TIF_32BIT +#ifdef CONFIG_COMPAT static int mode1_syscalls_32[] = { __NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32, 0, /* null terminated */ @@ -37,8 +38,8 @@ void __secure_computing(int this_syscall) switch (mode) { case 1: syscall = mode1_syscalls; -#ifdef TIF_32BIT - if (test_thread_flag(TIF_32BIT)) +#ifdef CONFIG_COMPAT + if (is_compat_task()) syscall = mode1_syscalls_32; #endif do { diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index e2a4ff6fc3a..34e707e5ab8 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -52,6 +52,7 @@ config FUNCTION_TRACER depends on HAVE_FUNCTION_TRACER depends on DEBUG_KERNEL select FRAME_POINTER + select KALLSYMS select TRACING select CONTEXT_SWITCH_TRACER help @@ -238,6 +239,7 @@ config STACK_TRACER depends on DEBUG_KERNEL select FUNCTION_TRACER select STACKTRACE + select KALLSYMS help This special tracer records the maximum stack footprint of the kernel and displays it in debugfs/tracing/stack_trace. @@ -302,4 +304,27 @@ config FTRACE_STARTUP_TEST functioning properly. It will do tests on all the configured tracers of ftrace. +config MMIOTRACE + bool "Memory mapped IO tracing" + depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI + select TRACING + help + Mmiotrace traces Memory Mapped I/O access and is meant for + debugging and reverse engineering. It is called from the ioremap + implementation and works via page faults. Tracing is disabled by + default and can be enabled at run-time. + + See Documentation/tracers/mmiotrace.txt. + If you are not helping to develop drivers, say N. + +config MMIOTRACE_TEST + tristate "Test module for mmiotrace" + depends on MMIOTRACE && m + help + This is a dumb module for testing mmiotrace. It is very dangerous + as it will write garbage to IO memory starting at a given address. + However, it should be safe to use on e.g. unused portion of VRAM. + + Say N, unless you absolutely know what you are doing. + endmenu diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 9a236ffe2aa..fdf913dfc7e 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2033,7 +2033,7 @@ free: static int start_graph_tracing(void) { struct ftrace_ret_stack **ret_stack_list; - int ret; + int ret, cpu; ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE * sizeof(struct ftrace_ret_stack *), @@ -2042,6 +2042,10 @@ static int start_graph_tracing(void) if (!ret_stack_list) return -ENOMEM; + /* The cpu_boot init_task->ret_stack will never be freed */ + for_each_online_cpu(cpu) + ftrace_graph_init_task(idle_task(cpu)); + do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index fffcb069f1d..80e503ef613 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/mmiotrace.h> #include <linux/pci.h> +#include <asm/atomic.h> #include "trace.h" @@ -19,6 +20,7 @@ struct header_iter { static struct trace_array *mmio_trace_array; static bool overrun_detected; static unsigned long prev_overruns; +static atomic_t dropped_count; static void mmio_reset_data(struct trace_array *tr) { @@ -121,11 +123,11 @@ static void mmio_close(struct trace_iterator *iter) static unsigned long count_overruns(struct trace_iterator *iter) { - unsigned long cnt = 0; + unsigned long cnt = atomic_xchg(&dropped_count, 0); unsigned long over = ring_buffer_overruns(iter->tr->buffer); if (over > prev_overruns) - cnt = over - prev_overruns; + cnt += over - prev_overruns; prev_overruns = over; return cnt; } @@ -310,8 +312,10 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); - if (!event) + if (!event) { + atomic_inc(&dropped_count); return; + } entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_RW; @@ -338,8 +342,10 @@ static void __trace_mmiotrace_map(struct trace_array *tr, event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq_flags); - if (!event) + if (!event) { + atomic_inc(&dropped_count); return; + } entry = ring_buffer_event_data(event); tracing_generic_entry_update(&entry->ent, 0, preempt_count()); entry->ent.type = TRACE_MMIO_MAP; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 88c8eb70f54..bc8e80a86bc 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -23,10 +23,20 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu) { struct ring_buffer_event *event; struct trace_entry *entry; + unsigned int loops = 0; while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) { entry = ring_buffer_event_data(event); + /* + * The ring buffer is a size of trace_buf_size, if + * we loop more than the size, there's something wrong + * with the ring buffer. + */ + if (loops++ > trace_buf_size) { + printk(KERN_CONT ".. bad ring buffer "); + goto failed; + } if (!trace_valid_entry(entry)) { printk(KERN_CONT ".. invalid entry %d ", entry->type); @@ -57,11 +67,20 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) cnt = ring_buffer_entries(tr->buffer); + /* + * The trace_test_buffer_cpu runs a while loop to consume all data. + * If the calling tracer is broken, and is constantly filling + * the buffer, this will run forever, and hard lock the box. + * We disable the ring buffer while we do this test to prevent + * a hard lock up. + */ + tracing_off(); for_each_possible_cpu(cpu) { ret = trace_test_buffer_cpu(tr, cpu); if (ret) break; } + tracing_on(); __raw_spin_unlock(&ftrace_max_lock); local_irq_restore(flags); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 79084311ee5..076c7c8215b 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -60,12 +60,25 @@ int create_user_ns(struct cred *new) return 0; } -void free_user_ns(struct kref *kref) +/* + * Deferred destructor for a user namespace. This is required because + * free_user_ns() may be called with uidhash_lock held, but we need to call + * back to free_uid() which will want to take the lock again. + */ +static void free_user_ns_work(struct work_struct *work) { - struct user_namespace *ns; - - ns = container_of(kref, struct user_namespace, kref); + struct user_namespace *ns = + container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); kfree(ns); } + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns = + container_of(kref, struct user_namespace, kref); + + INIT_WORK(&ns->destroyer, free_user_ns_work); + schedule_work(&ns->destroyer); +} EXPORT_SYMBOL(free_user_ns); |