From 52fdd08903a1d1162e184114837e232640191627 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 3 Sep 2005 15:56:52 -0700 Subject: [PATCH] unify x86/x86-64 semaphore code This patch moves the common code in x86 and x86-64's semaphore.c into a single file in lib/semaphore-sleepers.c. The arch specific asm stubs are left in the arch tree (in semaphore.c for i386 and in the asm for x86-64). There should be no changes in code/functionality with this patch. Signed-off-by: Benjamin LaHaise Cc: Andi Kleen Signed-off-by: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 4 + arch/x86_64/kernel/Makefile | 2 +- arch/x86_64/kernel/semaphore.c | 180 ----------------------------------------- 3 files changed, 5 insertions(+), 181 deletions(-) delete mode 100644 arch/x86_64/kernel/semaphore.c (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 660a03a89e6..75e52c57f19 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -24,6 +24,10 @@ config X86 bool default y +config SEMAPHORE_SLEEPERS + bool + default y + config MMU bool default y diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 48f9e2c19cd..427501a336c 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -4,7 +4,7 @@ extra-y := head.o head64.o init_task.o vmlinux.lds EXTRA_AFLAGS := -traditional -obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ +obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o diff --git a/arch/x86_64/kernel/semaphore.c b/arch/x86_64/kernel/semaphore.c deleted file mode 100644 index 48f7c18172b..00000000000 --- a/arch/x86_64/kernel/semaphore.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * x86_64 semaphore implementation. - * - * (C) Copyright 1999 Linus Torvalds - * - * Portions Copyright 1999 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * rw semaphores implemented November 1999 by Benjamin LaHaise - */ -#include -#include -#include -#include - -#include - -/* - * Semaphores are implemented using a two-way counter: - * The "count" variable is decremented for each process - * that tries to acquire the semaphore, while the "sleeping" - * variable is a count of such acquires. - * - * Notably, the inline "up()" and "down()" functions can - * efficiently test if they need to do any extra work (up - * needs to do something only if count was negative before - * the increment operation. - * - * "sleeping" and the contention routine ordering is protected - * by the spinlock in the semaphore's waitqueue head. - * - * Note that these functions are only called when there is - * contention on the lock, and as such all this is the - * "non-critical" part of the whole semaphore business. The - * critical part is the inline stuff in - * where we want to avoid any extra jumps and calls. - */ - -/* - * Logic: - * - only on a boundary condition do we need to care. When we go - * from a negative count to a non-negative, we wake people up. - * - when we go from a non-negative count to a negative do we - * (a) synchronize with the "sleeper" count and (b) make sure - * that we're on the wakeup list before we synchronize so that - * we cannot lose wakeup events. - */ - -void __up(struct semaphore *sem) -{ - wake_up(&sem->wait); -} - -void __sched __down(struct semaphore * sem) -{ - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_UNINTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * the wait_queue_head. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_UNINTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - tsk->state = TASK_RUNNING; -} - -int __sched __down_interruptible(struct semaphore * sem) -{ - int retval = 0; - struct task_struct *tsk = current; - DECLARE_WAITQUEUE(wait, tsk); - unsigned long flags; - - tsk->state = TASK_INTERRUPTIBLE; - spin_lock_irqsave(&sem->wait.lock, flags); - add_wait_queue_exclusive_locked(&sem->wait, &wait); - - sem->sleepers++; - for (;;) { - int sleepers = sem->sleepers; - - /* - * With signals pending, this turns into - * the trylock failure case - we won't be - * sleeping, and we* can't get the lock as - * it has contention. Just correct the count - * and exit. - */ - if (signal_pending(current)) { - retval = -EINTR; - sem->sleepers = 0; - atomic_add(sleepers, &sem->count); - break; - } - - /* - * Add "everybody else" into it. They aren't - * playing, because we own the spinlock in - * wait_queue_head. The "-1" is because we're - * still hoping to get the semaphore. - */ - if (!atomic_add_negative(sleepers - 1, &sem->count)) { - sem->sleepers = 0; - break; - } - sem->sleepers = 1; /* us - see -1 above */ - spin_unlock_irqrestore(&sem->wait.lock, flags); - - schedule(); - - spin_lock_irqsave(&sem->wait.lock, flags); - tsk->state = TASK_INTERRUPTIBLE; - } - remove_wait_queue_locked(&sem->wait, &wait); - wake_up_locked(&sem->wait); - spin_unlock_irqrestore(&sem->wait.lock, flags); - - tsk->state = TASK_RUNNING; - return retval; -} - -/* - * Trylock failed - make sure we correct for - * having decremented the count. - * - * We could have done the trylock with a - * single "cmpxchg" without failure cases, - * but then it wouldn't work on a 386. - */ -int __down_trylock(struct semaphore * sem) -{ - int sleepers; - unsigned long flags; - - spin_lock_irqsave(&sem->wait.lock, flags); - sleepers = sem->sleepers + 1; - sem->sleepers = 0; - - /* - * Add "everybody else" and us into it. They aren't - * playing, because we own the spinlock in the - * wait_queue_head. - */ - if (!atomic_add_negative(sleepers, &sem->count)) { - wake_up_locked(&sem->wait); - } - - spin_unlock_irqrestore(&sem->wait.lock, flags); - return 1; -} - - -- cgit v1.2.3 From 7dc24db1757f950f8bd21b7191106d4bf5134be7 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 3 Sep 2005 15:56:55 -0700 Subject: [PATCH] ISA DMA suspend for x86_64 Reset the ISA DMA controller into a known state after a suspend. Primary concern was reenabling the cascading DMA channel (4). Signed-off-by: Pierre Ossman Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 427501a336c..c32e198d7b2 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -7,7 +7,7 @@ EXTRA_AFLAGS := -traditional obj-y := process.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ - setup64.o bootflag.o e820.o reboot.o quirks.o + setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -45,3 +45,4 @@ swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o quirks-y += ../../i386/kernel/quirks.o +i8237-y += ../../i386/kernel/i8237.o -- cgit v1.2.3 From 829ca9a30a2ddb727981d80fabdbff2ea86bc9ea Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 3 Sep 2005 15:56:56 -0700 Subject: [PATCH] swsusp: fix remaining u32 vs. pm_message_t confusion Fix remaining bits of u32 vs. pm_message confusion. Should not break anything. Signed-off-by: Pavel Machek Signed-off-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 4e44d6e6b7e..64a8e05d581 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -290,7 +290,7 @@ void enable_timer_nmi_watchdog(void) static int nmi_pm_active; /* nmi_active before suspend */ -static int lapic_nmi_suspend(struct sys_device *dev, u32 state) +static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) { nmi_pm_active = nmi_active; disable_lapic_nmi_watchdog(); -- cgit v1.2.3 From 54d5d42404e7705cf3804593189e963350d470e5 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:15 -0700 Subject: [PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj Acked-by: Zwane Mwaikambo Grudgingly-acked-by: Andi Kleen Signed-off-by: Coywolf Qi Hunt Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 5 +++ arch/x86_64/kernel/io_apic.c | 102 +++++++++++++++++++++++++++---------------- 2 files changed, 69 insertions(+), 38 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 75e52c57f19..251ce7cf1a3 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -441,6 +441,11 @@ config ISA_DMA_API bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + menu "Power management options" source kernel/power/Kconfig diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d206d7e49cf..76bcc4e6979 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; #define vector_to_irq(vector) (vector) #endif +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#ifdef CONFIG_SMP +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + set_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} #define DO_ACTION(name,R,ACTION, FINAL) \ \ @@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) */ static void ack_edge_ioapic_irq(unsigned int irq) { + move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) static void end_level_ioapic_irq (unsigned int irq) { + move_irq(irq); ack_APIC_irq(); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - unsigned long flags; - unsigned int dest; - - dest = cpu_mask_to_apicid(mask); - - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } -#endif +#endif // CONFIG_SMP +#endif // CONFIG_PCI_MSI /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static struct hw_interrupt_type ioapic_level_type = { @@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; @@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) } } +#endif -- cgit v1.2.3 From c078d3266e58a3edd698b9f143f78fc585679031 Mon Sep 17 00:00:00 2001 From: Zwane Mwaikambo Date: Tue, 6 Sep 2005 15:16:16 -0700 Subject: [PATCH] x86_64: print processor number in show_regs Up to date I've been using the GS value to determine the processor number in dumps from show_regs, however this can be cumbersome to do if you don't have the vmlinux to verify with the address of cpu_pda, how about the following? I considered using hard_smp_processor_id for robustness but we already dereference current so we're already relying on MSR_GS_BASE being sane. Signed-off-by: Zwane Mwaikambo Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7577f9d7a75..8661f82ac70 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -310,6 +310,7 @@ void __show_regs(struct pt_regs * regs) void show_regs(struct pt_regs *regs) { + printk("CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(®s->rsp); } -- cgit v1.2.3 From 0c2b9d5c035f5a82c9c4ba717984d04838a0701e Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:18 -0700 Subject: [PATCH] x86_64: Fix cluster mode send_IPI_allbutself to use get_cpu()/put_cpu() Need to ensure we dont get prempted when we clear ourself from mask when using clustered mode genapic code. Signed-off-by: Ashok Raj Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic_cluster.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index 9703da7202e..f6523dd1bc0 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c @@ -72,10 +72,14 @@ static void cluster_send_IPI_mask(cpumask_t mask, int vector) static void cluster_send_IPI_allbutself(int vector) { cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); + int me = get_cpu(); /* Ensure we are not preempted when we clear */ + + cpu_clear(me, mask); if (!cpus_empty(mask)) cluster_send_IPI_mask(mask, vector); + + put_cpu(); } static void cluster_send_IPI_all(int vector) -- cgit v1.2.3 From 48c8b1134249432318c8e5d19adc37c45242c4b1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 6 Sep 2005 15:16:20 -0700 Subject: [PATCH] x86_64: Fix off by one in e820_mapped This allows a valid iommu placed immediately after memory to work, to be recognized as after the last byte of memory and not overlapping it. Signed-off-by: Eric W. Biederman Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/e820.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index b548dea4e5b..116ac5f53dc 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c @@ -85,7 +85,7 @@ int __init e820_mapped(unsigned long start, unsigned long end, unsigned type) struct e820entry *ei = &e820.map[i]; if (type && ei->type != type) continue; - if (ei->addr >= end || ei->addr + ei->size < start) + if (ei->addr >= end || ei->addr + ei->size <= start) continue; return 1; } -- cgit v1.2.3 From 8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Sep 2005 15:16:27 -0700 Subject: [PATCH] detect soft lockups This patch adds a new kernel debug feature: CONFIG_DETECT_SOFTLOCKUP. When enabled then per-CPU watchdog threads are started, which try to run once per second. If they get delayed for more than 10 seconds then a callback from the timer interrupt detects this condition and prints out a warning message and a stack dump (once per lockup incident). The feature is otherwise non-intrusive, it doesnt try to unlock the box in any way, it only gets the debug info out, automatically, and on all CPUs affected by the lockup. Signed-off-by: Ingo Molnar Signed-off-by: Nishanth Aravamudan Signed-Off-By: Matthias Urlichs Signed-off-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 2 ++ arch/x86_64/kernel/time.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 64a8e05d581..84cae81fff8 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -463,6 +463,8 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) per_cpu(nmi_touch, i) = 1; + + touch_softlockup_watchdog(); } void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 66bf6ddeb0c..2b5d9da912a 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1041,6 +1041,7 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } -- cgit v1.2.3 From 19306059cd7fedaf96b4b0260a9a8a45e513c857 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 6 Sep 2005 15:16:35 -0700 Subject: [PATCH] NMI: Update NMI users of RCU to use new API Uses of RCU for dynamically changeable NMI handlers need to use the new rcu_dereference() and rcu_assign_pointer() facilities. This change makes it clear that these uses are safe from a memory-barrier viewpoint, but the main purpose is to document exactly what operations are being protected by RCU. This has been tested on x86 and x86-64, which are the only architectures affected by this change. Signed-off-by: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 84cae81fff8..caf164959e1 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -524,14 +524,14 @@ asmlinkage void do_nmi(struct pt_regs * regs, long error_code) nmi_enter(); add_pda(__nmi_count,1); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); } void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } void unset_nmi_callback(void) -- cgit v1.2.3 From f8eeaaf4180334a8e5c3582fe62a5f8176a8c124 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 6 Sep 2005 15:17:24 -0700 Subject: [PATCH] Make the bzImage format self-terminating Signed-off-by: H. Peter Anvin Cc: Frank Sorenson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/boot/setup.S | 2 +- arch/x86_64/boot/tools/build.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S index ff58b2832b7..12ea0b6c52e 100644 --- a/arch/x86_64/boot/setup.S +++ b/arch/x86_64/boot/setup.S @@ -81,7 +81,7 @@ start: # This is the setup header, and it must start at %cs:2 (old 0x9020:2) .ascii "HdrS" # header signature - .word 0x0203 # header version number (>= 0x0105) + .word 0x0204 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) realmode_swtch: .word 0, 0 # default_switch, SETUPSEG start_sys_seg: .word SYSSEG diff --git a/arch/x86_64/boot/tools/build.c b/arch/x86_64/boot/tools/build.c index 18b5bac1c42..c44f5e2ec10 100644 --- a/arch/x86_64/boot/tools/build.c +++ b/arch/x86_64/boot/tools/build.c @@ -178,7 +178,9 @@ int main(int argc, char ** argv) die("Output: seek failed"); buf[0] = (sys_size & 0xff); buf[1] = ((sys_size >> 8) & 0xff); - if (write(1, buf, 2) != 2) + buf[2] = ((sys_size >> 16) & 0xff); + buf[3] = ((sys_size >> 24) & 0xff); + if (write(1, buf, 4) != 4) die("Write of image length failed"); return 0; /* Everything is OK */ -- cgit v1.2.3 From 6c231b7bab0aa6860cd9da2de8a064eddc34c146 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Tue, 6 Sep 2005 15:17:45 -0700 Subject: [PATCH] Additions to .data.read_mostly section Mark variables which are usually accessed for reads with __readmostly. Signed-off-by: Alok N Kataria Signed-off-by: Shai Fultheim Signed-off-by: Ravikiran Thirumalai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic.c | 2 +- arch/x86_64/kernel/io_apic.c | 10 +++++----- arch/x86_64/kernel/setup.c | 2 +- arch/x86_64/kernel/setup64.c | 2 +- arch/x86_64/kernel/smpboot.c | 10 +++++----- arch/x86_64/mm/numa.c | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 30c843a5efd..f062aa03bab 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -25,7 +25,7 @@ #endif /* which logical CPU number maps to which CPU (physical APIC ID) */ -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(x86_cpu_to_apicid); u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 76bcc4e6979..ba1a744e9bf 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -70,7 +70,7 @@ static struct irq_pin_list { short apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -683,7 +683,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1443,7 +1443,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -1456,7 +1456,7 @@ static struct hw_interrupt_type ioapic_edge_type = { #endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -1529,7 +1529,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 116a491e296..b356f8e6adf 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -65,7 +65,7 @@ * Machine setup.. */ -struct cpuinfo_x86 boot_cpu_data; +struct cpuinfo_x86 boot_cpu_data __read_mostly; unsigned long mmu_cr4_features; diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 34082c1cc41..e3ffcacc8c9 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -36,7 +36,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); -unsigned long __supported_pte_mask = ~0UL; +unsigned long __supported_pte_mask __read_mostly = ~0UL; static int do_not_nx __initdata = 0; /* noexec=on|off diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index fa25e39fe54..90aeccd1519 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -62,13 +62,13 @@ /* Number of siblings per CPU package */ int smp_num_siblings = 1; /* Package ID of each logical CPU */ -u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; EXPORT_SYMBOL(phys_proc_id); EXPORT_SYMBOL(cpu_core_id); /* Bitmask of currently online CPUs */ -cpumask_t cpu_online_map; +cpumask_t cpu_online_map __read_mostly; EXPORT_SYMBOL(cpu_online_map); @@ -88,8 +88,8 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; /* Set when the idlers are all forked */ int smp_threads_ready; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; +cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); /* diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 6a156f5692a..04f7a33e144 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -22,14 +22,14 @@ #define Dprintk(x...) #endif -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; int memnode_shift; u8 memnodemap[NODEMAPSIZE]; -unsigned char cpu_to_node[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; -cpumask_t node_to_cpumask[MAX_NUMNODES]; +unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; +cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; int numa_off __initdata; -- cgit v1.2.3 From b149ee2233edf08fb59b11e879a2c5941929bcb8 Mon Sep 17 00:00:00 2001 From: john stultz Date: Tue, 6 Sep 2005 15:17:46 -0700 Subject: [PATCH] NTP: ntp-helper functions This patch cleans up a commonly repeated set of changes to the NTP state variables by adding two helper inline functions: ntp_clear(): Clears the ntp state variables ntp_synced(): Returns 1 if the system is synced with a time server. This was compile tested for alpha, arm, i386, x86-64, ppc64, s390, sparc, sparc64. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2b5d9da912a..7b6abe05825 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -176,10 +176,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -471,7 +468,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * off) isn't likely to go away much sooner anyway. */ - if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update && + if (ntp_synced() && xtime.tv_sec > rtc_update && abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { set_rtc_mmss(xtime.tv_sec); rtc_update = xtime.tv_sec + 660; -- cgit v1.2.3 From e922efc342d565a38eed3af377ff403f52148864 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:25 -0700 Subject: [PATCH] remove duplicated sys_open32() code from 64bit archs 64 bit architectures all implement their own compatibility sys_open(), when in fact the difference is simply not forcing the O_LARGEFILE flag. So use the a common function instead. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/ia32entry.S | 2 +- arch/x86_64/ia32/sys_ia32.c | 26 -------------------------- 2 files changed, 1 insertion(+), 27 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c45d6a05b98..f174083d556 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -307,7 +307,7 @@ ia32_sys_call_table: .quad stub32_fork .quad sys_read .quad sys_write - .quad sys32_open /* 5 */ + .quad compat_sys_open /* 5 */ .quad sys_close .quad sys32_waitpid .quad sys_creat diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index be996d1b691..04d80406ce4 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -969,32 +969,6 @@ long sys32_kill(int pid, int sig) return sys_kill(pid, sig); } -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - /* don't force O_LARGEFILE */ - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = error; - } else { - fsnotify_open(f->f_dentry); - fd_install(fd, f); - } - } - putname(tmp); - } - return fd; -} - extern asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, -- cgit v1.2.3 From 0f2fbdcbb041f9087da42f8ac2e81f2817098d2a Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Tue, 6 Sep 2005 15:19:28 -0700 Subject: [PATCH] kprobes: prevent possible race conditions x86_64 changes This patch contains the x86_64 architecture specific changes to prevent the possible race conditions. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 12 ++++++++---- arch/x86_64/kernel/kprobes.c | 35 ++++++++++++++++++----------------- arch/x86_64/kernel/traps.c | 14 +++++++++----- arch/x86_64/kernel/vmlinux.lds.S | 1 + arch/x86_64/mm/fault.c | 4 +++- 5 files changed, 39 insertions(+), 27 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 096d470e280..be51dbe1f75 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -784,8 +784,9 @@ ENTRY(execve) ret CFI_ENDPROC -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) errorentry do_page_fault + .previous .text ENTRY(coprocessor_error) zeroentry do_coprocessor_error @@ -797,13 +798,14 @@ ENTRY(device_not_available) zeroentry math_state_restore /* runs on exception stack */ -ENTRY(debug) +KPROBE_ENTRY(debug) CFI_STARTPROC pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug jmp paranoid_exit CFI_ENDPROC + .previous .text /* runs on exception stack */ ENTRY(nmi) @@ -854,8 +856,9 @@ paranoid_schedule: jmp paranoid_userspace CFI_ENDPROC -ENTRY(int3) +KPROBE_ENTRY(int3) zeroentry do_int3 + .previous .text ENTRY(overflow) zeroentry do_overflow @@ -892,8 +895,9 @@ ENTRY(stack_segment) jmp paranoid_exit CFI_ENDPROC -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) errorentry do_general_protection + .previous .text ENTRY(alignment_check) errorentry do_alignment_check diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 5c6dc705148..c21cceaea27 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -74,7 +74,7 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { /* insn: must be on special executable page on x86_64. */ up(&kprobe_mutex); @@ -189,7 +189,7 @@ static inline s32 *is_riprel(u8 *insn) return NULL; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { s32 *ripdisp; memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); @@ -215,21 +215,21 @@ void arch_copy_kprobe(struct kprobe *p) p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { up(&kprobe_mutex); free_insn_slot(p->ainsn.insn); @@ -261,7 +261,7 @@ static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) kprobe_saved_rflags &= ~IF_MASK; } -static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) { regs->eflags |= TF_MASK; regs->eflags &= ~IF_MASK; @@ -272,7 +272,8 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)regs->rsp; struct kretprobe_instance *ri; @@ -295,7 +296,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -int kprobe_handler(struct pt_regs *regs) +int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -399,7 +400,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -478,7 +479,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)regs->rsp; unsigned long next_rip = 0; @@ -536,7 +537,7 @@ static void resume_execution(struct kprobe *p, struct pt_regs *regs) * Interrupts are disabled on entry as trap1 is an interrupt gate and they * remain disabled thoroughout this function. And we hold kprobe lock. */ -int post_kprobe_handler(struct pt_regs *regs) +int __kprobes post_kprobe_handler(struct pt_regs *regs) { if (!kprobe_running()) return 0; @@ -571,7 +572,7 @@ out: } /* Interrupts disabled, kprobe_lock held. */ -int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { if (current_kprobe->fault_handler && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) @@ -590,8 +591,8 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -619,7 +620,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -640,7 +641,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchg %%rbx,%%rsp \n" @@ -651,7 +652,7 @@ void jprobe_return(void) (jprobe_saved_rsp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->rip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 6ead433a388..f238d6078a5 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -419,8 +420,9 @@ void die_nmi(char *str, struct pt_regs *regs) do_exit(SIGSEGV); } -static void do_trap(int trapnr, int signr, char *str, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, + struct pt_regs * regs, long error_code, + siginfo_t *info) { conditional_sti(regs); @@ -504,7 +506,8 @@ DO_ERROR(18, SIGSEGV, "reserved", reserved) DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR( 8, SIGSEGV, "double fault", double_fault) -asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { conditional_sti(regs); @@ -622,7 +625,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) io_check_error(reason, regs); } -asmlinkage void do_int3(struct pt_regs * regs, long error_code) +asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; @@ -653,7 +656,8 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) +asmlinkage void __kprobes do_debug(struct pt_regs * regs, + unsigned long error_code) { unsigned long condition; struct task_struct *tsk = current; diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 2a94f9b60b2..d4abb07af52 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -21,6 +21,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index ca914c3bd49..816732d8858 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -23,6 +23,7 @@ #include /* For unblank_screen() */ #include #include +#include #include #include @@ -294,7 +295,8 @@ int exception_trace = 1; * bit 2 == 0 means kernel, 1 means user-mode * bit 3 == 1 means fault was an instruction fetch */ -asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code) +asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; -- cgit v1.2.3 From bce0649417d6e71f6df8ab7b11103d247913b142 Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Tue, 6 Sep 2005 15:19:34 -0700 Subject: [PATCH] kprobes: fix handling of simultaneous probe hit/unregister This patch fixes a bug in kprobes's handling of a corner case on i386 and x86_64. On an SMP system, if one CPU unregisters a kprobe just after another CPU hits that probepoint, kprobe_handler() on the latter CPU sees that the kprobe has been unregistered, and attempts to let the CPU continue as if the probepoint hadn't been hit. The bug is that on i386 and x86_64, we were neglecting to set the IP back to the beginning of the probed instruction. This could cause an oops or crash. This bug doesn't exist on ppc64 and ia64, where a breakpoint instruction leaves the IP pointing to the beginning of the instruction. I don't know about sparc64. (Dave, could you please advise?) This fix has been tested on i386 and x86_64 SMP systems. To reproduce the problem, set one CPU to work registering and unregistering a kprobe repeatedly, and another CPU pounding the probepoint in a tight loop. Acked-by: Prasanna S Panchamukhi Signed-off-by: Jim Keniston Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index c21cceaea27..2d7658fbbb2 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -361,7 +361,10 @@ int __kprobes kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->rip = (unsigned long)addr; ret = 1; } /* Not one of ours: let kernel handle it */ -- cgit v1.2.3 From deac66ae454cacf942c051b86d9232af546fb187 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Tue, 6 Sep 2005 15:19:35 -0700 Subject: [PATCH] kprobes: fix bug when probed on task and isr functions This patch fixes a race condition where in system used to hang or sometime crash within minutes when kprobes are inserted on ISR routine and a task routine. The fix has been stress tested on i386, ia64, pp64 and on x86_64. To reproduce the problem insert kprobes on schedule() and do_IRQ() functions and you should see hang or system crash. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 2d7658fbbb2..df08c43276a 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -311,7 +311,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); -- cgit v1.2.3 From a08b6b7968e7a6afc75e365ac31830867275abdc Mon Sep 17 00:00:00 2001 From: "viro@ZenIV.linux.org.uk" Date: Tue, 6 Sep 2005 01:48:42 +0100 Subject: [PATCH] Kconfig fix (BLK_DEV_FD dependencies) Sanitized and fixed floppy dependencies: split the messy dependencies for BLK_DEV_FD by introducing a new symbol (ARCH_MAY_HAVE_PC_FDC), making BLK_DEV_FD depend on that one and taking declarations of ARCH_MAY_HAVE_PC_FDC to arch/*/Kconfig. While we are at it, fixed several obvious cases when BLK_DEV_FD should have been excluded (architectures lacking asm/floppy.h are *not* going to have floppy.c compile, let alone work). If you can come up with better name for that ("this architecture might have working PC-compatible floppy disk controller"), you are more than welcome - just s/ARCH_MAY_HAVE_PC_FDC/your_prefered_name/g in the patch below... Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 251ce7cf1a3..8f868b67ef0 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -65,6 +65,10 @@ config GENERIC_IOMAP bool default y +config ARCH_MAY_HAVE_PC_FDC + bool + default y + source "init/Kconfig" -- cgit v1.2.3