diff options
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/Kconfig | 5 | ||||
-rw-r--r-- | arch/i386/boot/setup.S | 2 | ||||
-rw-r--r-- | arch/i386/boot/tools/build.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/dmi_scan.c | 231 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 13 | ||||
-rw-r--r-- | arch/i386/kernel/io_apic.c | 65 | ||||
-rw-r--r-- | arch/i386/kernel/kprobes.c | 35 | ||||
-rw-r--r-- | arch/i386/kernel/nmi.c | 5 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/time.c | 13 | ||||
-rw-r--r-- | arch/i386/kernel/timers/timer_hpet.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/traps.c | 16 | ||||
-rw-r--r-- | arch/i386/kernel/vmlinux.lds.S | 1 | ||||
-rw-r--r-- | arch/i386/mach-default/topology.c | 4 | ||||
-rw-r--r-- | arch/i386/mm/discontig.c | 8 | ||||
-rw-r--r-- | arch/i386/mm/fault.c | 4 | ||||
-rw-r--r-- | arch/i386/mm/init.c | 2 | ||||
-rw-r--r-- | arch/i386/oprofile/init.c | 12 | ||||
-rw-r--r-- | arch/i386/oprofile/nmi_int.c | 4 | ||||
-rw-r--r-- | arch/i386/oprofile/nmi_timer_int.c | 2 |
20 files changed, 258 insertions, 174 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 3b3b017e1c1..4b7de3e1e57 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1318,6 +1318,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + config X86_SMP bool depends on SMP && !X86_VOYAGER diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 8cb420f40c5..ca668d9df16 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -82,7 +82,7 @@ start: # This is the setup header, and it must start at %cs:2 (old 0x9020:2) .ascii "HdrS" # header signature - .word 0x0203 # header version number (>= 0x0105) + .word 0x0204 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) realmode_swtch: .word 0, 0 # default_switch, SETUPSEG start_sys_seg: .word SYSSEG diff --git a/arch/i386/boot/tools/build.c b/arch/i386/boot/tools/build.c index 6835f6d47c3..05798419a6a 100644 --- a/arch/i386/boot/tools/build.c +++ b/arch/i386/boot/tools/build.c @@ -177,7 +177,9 @@ int main(int argc, char ** argv) die("Output: seek failed"); buf[0] = (sys_size & 0xff); buf[1] = ((sys_size >> 8) & 0xff); - if (write(1, buf, 2) != 2) + buf[2] = ((sys_size >> 16) & 0xff); + buf[3] = ((sys_size >> 24) & 0xff); + if (write(1, buf, 4) != 4) die("Write of image length failed"); return 0; /* Everything is OK */ diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index a3cdf894302..58516e2ac17 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -6,32 +6,28 @@ #include <linux/bootmem.h> -struct dmi_header { - u8 type; - u8 length; - u16 handle; -}; - -#undef DMI_DEBUG - -#ifdef DMI_DEBUG -#define dmi_printk(x) printk x -#else -#define dmi_printk(x) -#endif - static char * __init dmi_string(struct dmi_header *dm, u8 s) { u8 *bp = ((u8 *) dm) + dm->length; + char *str = ""; - if (!s) - return ""; - s--; - while (s > 0 && *bp) { - bp += strlen(bp) + 1; + if (s) { s--; - } - return bp; + while (s > 0 && *bp) { + bp += strlen(bp) + 1; + s--; + } + + if (*bp != 0) { + str = alloc_bootmem(strlen(bp) + 1); + if (str != NULL) + strcpy(str, bp); + else + printk(KERN_ERR "dmi_string: out of memory.\n"); + } + } + + return str; } /* @@ -84,69 +80,76 @@ static int __init dmi_checksum(u8 *buf) return sum == 0; } -static int __init dmi_iterate(void (*decode)(struct dmi_header *)) +static char *dmi_ident[DMI_STRING_MAX]; +static LIST_HEAD(dmi_devices); + +/* + * Save a DMI string + */ +static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) { - u8 buf[15]; - char __iomem *p, *q; + char *p, *d = (char*) dm; - /* - * no iounmap() for that ioremap(); it would be a no-op, but it's - * so early in setup that sucker gets confused into doing what - * it shouldn't if we actually call it. - */ - p = ioremap(0xF0000, 0x10000); + if (dmi_ident[slot]) + return; + + p = dmi_string(dm, d[string]); if (p == NULL) - return -1; + return; - for (q = p; q < p + 0x10000; q += 16) { - memcpy_fromio(buf, q, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; + dmi_ident[slot] = p; +} - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); +static void __init dmi_save_devices(struct dmi_header *dm) +{ + int i, count = (dm->length - sizeof(struct dmi_header)) / 2; + struct dmi_device *dev; + + for (i = 0; i < count; i++) { + char *d = ((char *) dm) + (i * 2); - dmi_printk((KERN_INFO "%d structures occupying %d bytes.\n", - num, len)); - dmi_printk((KERN_INFO "DMI table at 0x%08X.\n", base)); + /* Skip disabled device */ + if ((*d & 0x80) == 0) + continue; - if (dmi_table(base,len, num, decode) == 0) - return 0; + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_devices: out of memory.\n"); + break; } + + dev->type = *d++ & 0x7f; + dev->name = dmi_string(dm, *d); + dev->device_data = NULL; + + list_add(&dev->list, &dmi_devices); } - return -1; } -static char *dmi_ident[DMI_STRING_MAX]; - -/* - * Save a DMI string - */ -static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) +static void __init dmi_save_ipmi_device(struct dmi_header *dm) { - char *d = (char*)dm; - char *p = dmi_string(dm, d[string]); + struct dmi_device *dev; + void * data; - if (p == NULL || *p == 0) + data = alloc_bootmem(dm->length); + if (data == NULL) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; - if (dmi_ident[slot]) + } + + memcpy(data, dm, dm->length); + + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); return; + } - dmi_ident[slot] = alloc_bootmem(strlen(p) + 1); - if(dmi_ident[slot]) - strcpy(dmi_ident[slot], p); - else - printk(KERN_ERR "dmi_save_ident: out of memory.\n"); + dev->type = DMI_DEV_TYPE_IPMI; + dev->name = "IPMI controller"; + dev->device_data = data; + + list_add(&dev->list, &dmi_devices); } /* @@ -156,42 +159,69 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) */ static void __init dmi_decode(struct dmi_header *dm) { - u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) { - case 0: - dmi_printk(("BIOS Vendor: %s\n", dmi_string(dm, data[4]))); + case 0: /* BIOS Information */ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); - dmi_printk(("BIOS Version: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_BIOS_VERSION, 5); - dmi_printk(("BIOS Release: %s\n", dmi_string(dm, data[8]))); dmi_save_ident(dm, DMI_BIOS_DATE, 8); break; - case 1: - dmi_printk(("System Vendor: %s\n", dmi_string(dm, data[4]))); + case 1: /* System Information */ dmi_save_ident(dm, DMI_SYS_VENDOR, 4); - dmi_printk(("Product Name: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); - dmi_printk(("Version: %s\n", dmi_string(dm, data[6]))); dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); - dmi_printk(("Serial Number: %s\n", dmi_string(dm, data[7]))); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); break; - case 2: - dmi_printk(("Board Vendor: %s\n", dmi_string(dm, data[4]))); + case 2: /* Base Board Information */ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); - dmi_printk(("Board Name: %s\n", dmi_string(dm, data[5]))); dmi_save_ident(dm, DMI_BOARD_NAME, 5); - dmi_printk(("Board Version: %s\n", dmi_string(dm, data[6]))); dmi_save_ident(dm, DMI_BOARD_VERSION, 6); break; + case 10: /* Onboard Devices Information */ + dmi_save_devices(dm); + break; + case 38: /* IPMI Device Information */ + dmi_save_ipmi_device(dm); } } void __init dmi_scan_machine(void) { - if (dmi_iterate(dmi_decode)) - printk(KERN_INFO "DMI not present.\n"); + u8 buf[15]; + char __iomem *p, *q; + + /* + * no iounmap() for that ioremap(); it would be a no-op, but it's + * so early in setup that sucker gets confused into doing what + * it shouldn't if we actually call it. + */ + p = ioremap(0xF0000, 0x10000); + if (p == NULL) + goto out; + + for (q = p; q < p + 0x10000; q += 16) { + memcpy_fromio(buf, q, 15); + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; + + /* + * DMI version 0.0 means that the real version is taken from + * the SMBIOS version, which we don't know at this point. + */ + if (buf[14] != 0) + printk(KERN_INFO "DMI %d.%d present.\n", + buf[14] >> 4, buf[14] & 0xF); + else + printk(KERN_INFO "DMI present.\n"); + + if (dmi_table(base,len, num, dmi_decode) == 0) + return; + } + } + +out: printk(KERN_INFO "DMI not present.\n"); } @@ -218,9 +248,9 @@ int dmi_check_system(struct dmi_system_id *list) /* No match */ goto fail; } + count++; if (d->callback && d->callback(d)) break; - count++; fail: d++; } @@ -240,3 +270,32 @@ char *dmi_get_system_info(int field) return dmi_ident[field]; } EXPORT_SYMBOL(dmi_get_system_info); + +/** + * dmi_find_device - find onboard device by type/name + * @type: device type or %DMI_DEV_TYPE_ANY to match all device types + * @desc: device name string or %NULL to match all + * @from: previous device found in search, or %NULL for new search. + * + * Iterates through the list of known onboard devices. If a device is + * found with a matching @vendor and @device, a pointer to its device + * structure is returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * If @from is not %NULL, searches continue from next device. + */ +struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) +{ + struct list_head *d, *head = from ? &from->list : &dmi_devices; + + for(d = head->next; d != &dmi_devices; d = d->next) { + struct dmi_device *dev = list_entry(d, struct dmi_device, list); + + if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) && + ((name == NULL) || (strcmp(dev->name, name) == 0))) + return dev; + } + + return NULL; +} +EXPORT_SYMBOL(dmi_find_device); diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index abb909793ef..3aad0383966 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -507,7 +507,7 @@ label: \ pushl $__KERNEL_CS; \ pushl $sysenter_past_esp -ENTRY(debug) +KPROBE_ENTRY(debug) cmpl $sysenter_entry,(%esp) jne debug_stack_correct FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) @@ -518,7 +518,7 @@ debug_stack_correct: movl %esp,%eax # pt_regs pointer call do_debug jmp ret_from_exception - + .previous .text /* * NMI is doubly nasty. It can happen _while_ we're handling * a debug fault, and the debug fault hasn't yet been able to @@ -591,13 +591,14 @@ nmi_16bit_stack: .long 1b,iret_exc .previous -ENTRY(int3) +KPROBE_ENTRY(int3) pushl $-1 # mark this as an int SAVE_ALL xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_int3 jmp ret_from_exception + .previous .text ENTRY(overflow) pushl $0 @@ -631,17 +632,19 @@ ENTRY(stack_segment) pushl $do_stack_segment jmp error_code -ENTRY(general_protection) +KPROBE_ENTRY(general_protection) pushl $do_general_protection jmp error_code + .previous .text ENTRY(alignment_check) pushl $do_alignment_check jmp error_code -ENTRY(page_fault) +KPROBE_ENTRY(page_fault) pushl $do_page_fault jmp error_code + .previous .text #ifdef CONFIG_X86_MCE ENTRY(machine_check) diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 6578f40bd50..0e727e6da5c 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -33,6 +33,7 @@ #include <linux/acpi.h> #include <linux/module.h> #include <linux/sysdev.h> + #include <asm/io.h> #include <asm/smp.h> #include <asm/desc.h> @@ -77,7 +78,7 @@ static struct irq_pin_list { int apic, pin, next; } irq_2_pin[PIN_MAP_SIZE]; -int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; +int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; #ifdef CONFIG_PCI_MSI #define vector_to_irq(vector) \ (platform_legacy_irq(vector) ? vector : vector_irq[vector]) @@ -222,13 +223,21 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; + cpumask_t tmp; + cpus_and(tmp, cpumask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(cpumask, tmp, CPU_MASK_ALL); + apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; @@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } + set_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # define Dprintk(x...) # endif -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq) cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(irq, cpumask_of_cpu(new_cpu)); } } @@ -528,16 +532,12 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - irq_desc_t *desc = irq_desc + selected_irq; - unsigned long flags; Dprintk("irq = %d moved to cpu = %d\n", selected_irq, min_loaded); /* mark for change destination */ - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[selected_irq] = - cpumask_of_cpu(min_loaded); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); + /* Since we made a change, come back sooner to * check for more variation. */ @@ -568,7 +568,8 @@ static int balanced_irq(void *unused) /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < NR_IRQS ; i++) { - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); + pending_irq_cpumask[i] = cpumask_of_cpu(0); + set_pending_irq(i, cpumask_of_cpu(0)); } for ( ; ; ) { @@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str) __setup("noirqbalance", irqbalance_disable); -static inline void move_irq(int irq) -{ - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } -} - late_initcall(balanced_irq_init); - -#else /* !CONFIG_IRQBALANCE */ -static inline void move_irq(int irq) { } #endif /* CONFIG_IRQBALANCE */ +#endif /* CONFIG_SMP */ #ifndef CONFIG_SMP void fastcall send_IPI_self(int vector) @@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void) } } +#endif /* * EISA Edge/Level control register, ELCR @@ -1127,7 +1119,7 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -u8 irq_vector[NR_IRQ_VECTORS] = { FIRST_DEVICE_VECTOR , 0 }; +u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; int assign_irq_vector(int irq) { @@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); end_level_ioapic_irq(irq); } @@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } #endif +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1992,7 +1990,7 @@ static void set_ioapic_affinity_vector (unsigned int vector, * edge-triggered handler, without risking IRQ storms and other ugly * races. */ -static struct hw_interrupt_type ioapic_edge_type = { +static struct hw_interrupt_type ioapic_edge_type __read_mostly = { .typename = "IO-APIC-edge", .startup = startup_edge_ioapic, .shutdown = shutdown_edge_ioapic, @@ -2000,10 +1998,12 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; -static struct hw_interrupt_type ioapic_level_type = { +static struct hw_interrupt_type ioapic_level_type __read_mostly = { .typename = "IO-APIC-level", .startup = startup_level_ioapic, .shutdown = shutdown_level_ioapic, @@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -2074,7 +2076,7 @@ static void ack_lapic_irq (unsigned int irq) static void end_lapic_irq (unsigned int i) { /* nothing */ } -static struct hw_interrupt_type lapic_irq_type = { +static struct hw_interrupt_type lapic_irq_type __read_mostly = { .typename = "local-APIC-edge", .startup = NULL, /* startup_irq() not used for IRQ0 */ .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ @@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index a6d8c45961d..6345b430b10 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -62,32 +62,32 @@ static inline int is_IF_modifier(kprobe_opcode_t opcode) return 0; } -int arch_prepare_kprobe(struct kprobe *p) +int __kprobes arch_prepare_kprobe(struct kprobe *p) { return 0; } -void arch_copy_kprobe(struct kprobe *p) +void __kprobes arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; } -void arch_arm_kprobe(struct kprobe *p) +void __kprobes arch_arm_kprobe(struct kprobe *p) { *p->addr = BREAKPOINT_INSTRUCTION; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_disarm_kprobe(struct kprobe *p) +void __kprobes arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; flush_icache_range((unsigned long) p->addr, (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -void arch_remove_kprobe(struct kprobe *p) +void __kprobes arch_remove_kprobe(struct kprobe *p) { } @@ -127,7 +127,8 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } -void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, + struct pt_regs *regs) { unsigned long *sara = (unsigned long *)®s->esp; struct kretprobe_instance *ri; @@ -150,7 +151,7 @@ void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. */ -static int kprobe_handler(struct pt_regs *regs) +static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; @@ -176,7 +177,8 @@ static int kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS) { + if (kprobe_status == KPROBE_HIT_SS && + *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kprobe_saved_eflags; unlock_kprobes(); @@ -220,7 +222,10 @@ static int kprobe_handler(struct pt_regs *regs) * either a probepoint or a debugger breakpoint * at this address. In either case, no further * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. */ + regs->eip -= sizeof(kprobe_opcode_t); ret = 1; } /* Not one of ours: let kernel handle it */ @@ -259,7 +264,7 @@ no_kprobe: /* * Called when we hit the probe point at kretprobe_trampoline */ -int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -338,7 +343,7 @@ int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; @@ -444,8 +449,8 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) /* * Wrapper routine to for handling exceptions. */ -int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, - void *data) +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; switch (val) { @@ -473,7 +478,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, return NOTIFY_DONE; } -int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; @@ -495,7 +500,7 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } -void jprobe_return(void) +void __kprobes jprobe_return(void) { preempt_enable_no_resched(); asm volatile (" xchgl %%ebx,%%esp \n" @@ -506,7 +511,7 @@ void jprobe_return(void) (jprobe_saved_esp):"memory"); } -int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { u8 *addr = (u8 *) (regs->eip - 1); unsigned long stack_addr = (unsigned long)jprobe_saved_esp; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 8bbdbda07a2..0178457db72 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -478,6 +478,11 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } extern void die_nmi(struct pt_regs *, const char *msg); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 294bcca985a..e29fd5aeaf8 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(efi_enabled); /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 6f794a78ee1..eefea7c5500 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -194,10 +194,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; @@ -252,8 +249,7 @@ EXPORT_SYMBOL(profile_pc); * timer_interrupt() needs to keep up the real-time clock, * as well as call the "do_timer()" routine every clocktick */ -static inline void do_timer_interrupt(int irq, void *dev_id, - struct pt_regs *regs) +static inline void do_timer_interrupt(int irq, struct pt_regs *regs) { #ifdef CONFIG_X86_IO_APIC if (timer_ack) { @@ -307,7 +303,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) cur_timer->mark_offset(); - do_timer_interrupt(irq, NULL, regs); + do_timer_interrupt(irq, regs); write_sequnlock(&xtime_lock); return IRQ_HANDLED; @@ -348,7 +344,7 @@ static void sync_cmos_clock(unsigned long dummy) * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... */ - if ((time_status & STA_UNSYNC) != 0) + if (!ntp_synced()) /* * Not synced, exit, do not restart a timer (if one is * running, let it run out). @@ -422,6 +418,7 @@ static int timer_resume(struct sys_device *dev) last_timer->resume(); cur_timer = last_timer; last_timer = NULL; + touch_softlockup_watchdog(); return 0; } diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index 001de97c9e4..d973a8b681f 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -18,8 +18,8 @@ #include "mach_timer.h" #include <asm/hpet.h> -static unsigned long __read_mostly hpet_usec_quotient; /* convert hpet clks to usec */ -static unsigned long tsc_hpet_quotient; /* convert tsc to hpet clks */ +static unsigned long hpet_usec_quotient __read_mostly; /* convert hpet clks to usec */ +static unsigned long tsc_hpet_quotient __read_mostly; /* convert tsc to hpet clks */ static unsigned long hpet_last; /* hpet counter value at last tick*/ static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */ static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */ diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 54629bb5893..09a58cb6daa 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -363,8 +363,9 @@ static inline void die_if_kernel(const char * str, struct pt_regs * regs, long e die(str, regs, err); } -static void do_trap(int trapnr, int signr, char *str, int vm86, - struct pt_regs * regs, long error_code, siginfo_t *info) +static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, + struct pt_regs * regs, long error_code, + siginfo_t *info) { struct task_struct *tsk = current; tsk->thread.error_code = error_code; @@ -460,7 +461,8 @@ DO_ERROR(12, SIGBUS, "stack segment", stack_segment) DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0) -fastcall void do_general_protection(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_general_protection(struct pt_regs * regs, + long error_code) { int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); @@ -657,7 +659,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) ++nmi_count(cpu); - if (!nmi_callback(regs, cpu)) + if (!rcu_dereference(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); @@ -665,7 +667,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { - nmi_callback = callback; + rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); @@ -676,7 +678,7 @@ void unset_nmi_callback(void) EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES -fastcall void do_int3(struct pt_regs *regs, long error_code) +fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) @@ -710,7 +712,7 @@ fastcall void do_int3(struct pt_regs *regs, long error_code) * find every occurrence of the TF bit that could be saved away even * by user code) */ -fastcall void do_debug(struct pt_regs * regs, long error_code) +fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) { unsigned int condition; struct task_struct *tsk = current; diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 761972f8cb6..13b9c62cbbb 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -22,6 +22,7 @@ SECTIONS *(.text) SCHED_TEXT LOCK_TEXT + KPROBES_TEXT *(.fixup) *(.gnu.warning) } = 0x9090 diff --git a/arch/i386/mach-default/topology.c b/arch/i386/mach-default/topology.c index 23395fff35d..b64314069e7 100644 --- a/arch/i386/mach-default/topology.c +++ b/arch/i386/mach-default/topology.c @@ -76,7 +76,7 @@ static int __init topology_init(void) for_each_online_node(i) arch_register_node(i); - for_each_cpu(i) + for_each_present_cpu(i) arch_register_cpu(i); return 0; } @@ -87,7 +87,7 @@ static int __init topology_init(void) { int i; - for_each_cpu(i) + for_each_present_cpu(i) arch_register_cpu(i); return 0; } diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 6711ce3f691..244d8ec66be 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -37,7 +37,7 @@ #include <asm/mmzone.h> #include <bios_ebda.h> -struct pglist_data *node_data[MAX_NUMNODES]; +struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; @@ -49,8 +49,8 @@ bootmem_data_t node0_bdata; * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +unsigned long node_start_pfn[MAX_NUMNODES] __read_mostly; +unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly; #ifdef CONFIG_DISCONTIGMEM @@ -66,7 +66,7 @@ unsigned long node_end_pfn[MAX_NUMNODES]; * physnode_map[4-7] = 1; * physnode_map[8- ] = -1; */ -s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1}; EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 411b8500ad1..9edd4485b91 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -21,6 +21,7 @@ #include <linux/vt_kern.h> /* For unblank_screen() */ #include <linux/highmem.h> #include <linux/module.h> +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -223,7 +224,8 @@ fastcall void do_invalid_op(struct pt_regs *, unsigned long); * bit 1 == 0 means read, 1 means write * bit 2 == 0 means kernel, 1 means user-mode */ -fastcall void do_page_fault(struct pt_regs *regs, unsigned long error_code) +fastcall void __kprobes do_page_fault(struct pt_regs *regs, + unsigned long error_code) { struct task_struct *tsk; struct mm_struct *mm; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 9edfc058b89..2ebaf75f732 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -393,7 +393,7 @@ void zap_low_mappings (void) } static int disable_nx __initdata = 0; -u64 __supported_pte_mask = ~_PAGE_NX; +u64 __supported_pte_mask __read_mostly = ~_PAGE_NX; /* * noexec = on|off diff --git a/arch/i386/oprofile/init.c b/arch/i386/oprofile/init.c index c90332de582..5341d481d92 100644 --- a/arch/i386/oprofile/init.c +++ b/arch/i386/oprofile/init.c @@ -15,9 +15,9 @@ * with the NMI mode driver. */ -extern int nmi_init(struct oprofile_operations * ops); -extern int nmi_timer_init(struct oprofile_operations * ops); -extern void nmi_exit(void); +extern int op_nmi_init(struct oprofile_operations * ops); +extern int op_nmi_timer_init(struct oprofile_operations * ops); +extern void op_nmi_exit(void); extern void x86_backtrace(struct pt_regs * const regs, unsigned int depth); @@ -28,11 +28,11 @@ int __init oprofile_arch_init(struct oprofile_operations * ops) ret = -ENODEV; #ifdef CONFIG_X86_LOCAL_APIC - ret = nmi_init(ops); + ret = op_nmi_init(ops); #endif #ifdef CONFIG_X86_IO_APIC if (ret < 0) - ret = nmi_timer_init(ops); + ret = op_nmi_timer_init(ops); #endif ops->backtrace = x86_backtrace; @@ -43,6 +43,6 @@ int __init oprofile_arch_init(struct oprofile_operations * ops) void oprofile_arch_exit(void) { #ifdef CONFIG_X86_LOCAL_APIC - nmi_exit(); + op_nmi_exit(); #endif } diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 255e4702d18..0493e8b8ec4 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c @@ -355,7 +355,7 @@ static int __init ppro_init(char ** cpu_type) /* in order to get driverfs right */ static int using_nmi; -int __init nmi_init(struct oprofile_operations *ops) +int __init op_nmi_init(struct oprofile_operations *ops) { __u8 vendor = boot_cpu_data.x86_vendor; __u8 family = boot_cpu_data.x86; @@ -420,7 +420,7 @@ int __init nmi_init(struct oprofile_operations *ops) } -void nmi_exit(void) +void op_nmi_exit(void) { if (using_nmi) exit_driverfs(); diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index c58d0c14f27..ad93cdd55d6 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -40,7 +40,7 @@ static void timer_stop(void) } -int __init nmi_timer_init(struct oprofile_operations * ops) +int __init op_nmi_timer_init(struct oprofile_operations * ops) { extern int nmi_active; |