diff options
Diffstat (limited to 'arch')
150 files changed, 4091 insertions, 1296 deletions
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index f7c96635d3b..c5739d6309d 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -509,7 +509,7 @@ config NR_CPUS depends on SMP default "64" -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous Memory Support (EXPERIMENTAL)" depends on EXPERIMENTAL help @@ -518,6 +518,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config NUMA bool "NUMA Support (EXPERIMENTAL)" depends on DISCONTIGMEM diff --git a/arch/alpha/defconfig b/arch/alpha/defconfig index 5e39b7a7c8f..6da9c3dbde4 100644 --- a/arch/alpha/defconfig +++ b/arch/alpha/defconfig @@ -96,7 +96,7 @@ CONFIG_ALPHA_CORE_AGP=y CONFIG_ALPHA_BROKEN_IRQ_MASK=y CONFIG_EISA=y # CONFIG_SMP is not set -# CONFIG_DISCONTIGMEM is not set +# CONFIG_ARCH_DISCONTIGMEM_ENABLE is not set CONFIG_VERBOSE_MCHECK=y CONFIG_VERBOSE_MCHECK_ON=1 CONFIG_PCI_LEGACY_PROC=y diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c index ba81c4422aa..c7481d59b6d 100644 --- a/arch/alpha/mm/numa.c +++ b/arch/alpha/mm/numa.c @@ -327,8 +327,6 @@ void __init mem_init(void) extern char _text, _etext, _data, _edata; extern char __init_begin, __init_end; unsigned long nid, i; - struct page * lmem_map; - high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT); reservedpages = 0; @@ -338,10 +336,10 @@ void __init mem_init(void) */ totalram_pages += free_all_bootmem_node(NODE_DATA(nid)); - lmem_map = node_mem_map(nid); pfn = NODE_DATA(nid)->node_start_pfn; for (i = 0; i < node_spanned_pages(nid); i++, pfn++) - if (page_is_ram(pfn) && PageReserved(lmem_map+i)) + if (page_is_ram(pfn) && + PageReserved(nid_page_nr(nid, i))) reservedpages++; } @@ -373,18 +371,18 @@ show_mem(void) show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_online_node(nid) { - struct page * lmem_map = node_mem_map(nid); i = node_spanned_pages(nid); while (i-- > 0) { + struct page *page = nid_page_nr(nid, i); total++; - if (PageReserved(lmem_map+i)) + if (PageReserved(page)) reserved++; - else if (PageSwapCache(lmem_map+i)) + else if (PageSwapCache(page)) cached++; - else if (!page_count(lmem_map+i)) + else if (!page_count(page)) free++; else - shared += page_count(lmem_map + i) - 1; + shared += page_count(page) - 1; } } printk("%ld pages of RAM\n",total); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ee8a9ad7bbd..07ba77c19f6 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -346,7 +346,7 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool default (ARCH_LH7A40X && !LH7A40X_CONTIGMEM) help @@ -355,6 +355,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config LEDS bool "Timer and CPU usage LEDs" depends on ARCH_CDB89712 || ARCH_CO285 || ARCH_EBSA110 || \ diff --git a/arch/arm/boot/install.sh b/arch/arm/boot/install.sh index 935bb27369e..9f9bed20734 100644 --- a/arch/arm/boot/install.sh +++ b/arch/arm/boot/install.sh @@ -21,8 +21,8 @@ # # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi if [ "$(basename $2)" = "zImage" ]; then # Compressed install diff --git a/arch/arm26/Kconfig b/arch/arm26/Kconfig index 6caed90661f..dc0c1936969 100644 --- a/arch/arm26/Kconfig +++ b/arch/arm26/Kconfig @@ -179,6 +179,8 @@ config CMDLINE time by entering them here. As a minimum, you should specify the memory size and the root device (e.g., mem=64M root=/dev/nfs). +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" diff --git a/arch/arm26/boot/install.sh b/arch/arm26/boot/install.sh index c628328dd9e..8a8399b26cf 100644 --- a/arch/arm26/boot/install.sh +++ b/arch/arm26/boot/install.sh @@ -23,8 +23,8 @@ # User may have a custom install script -if [ -x /sbin/installkernel ]; then - exec /sbin/installkernel "$@" +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then + exec /sbin/${CROSS_COMPILE}installkernel "$@" fi if [ "$2" = "zImage" ]; then diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig index 4332ca348d5..f848e376149 100644 --- a/arch/cris/Kconfig +++ b/arch/cris/Kconfig @@ -74,6 +74,8 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +source mm/Kconfig + endmenu menu "Hardware setup" diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 2b19372767e..c93f95146cc 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -74,6 +74,8 @@ config HIGHPTE with a lot of RAM, this can be wasteful of precious low memory. Setting this option will put user-space page tables in high memory. +source "mm/Kconfig" + choice prompt "uClinux kernel load address" depends on !MMU diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index d9dd62a565a..a380167a13c 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -180,4 +180,7 @@ config CPU_H8S config PREEMPT bool "Preemptible Kernel" default n + +source "mm/Kconfig" + endmenu diff --git a/arch/h8300/platform/h8300h/ptrace_h8300h.c b/arch/h8300/platform/h8300h/ptrace_h8300h.c index 18e51a7167d..6ac93c05a1a 100644 --- a/arch/h8300/platform/h8300h/ptrace_h8300h.c +++ b/arch/h8300/platform/h8300h/ptrace_h8300h.c @@ -245,12 +245,12 @@ static unsigned short *getnextpc(struct task_struct *child, unsigned short *pc) addr = h8300_get_reg(child, regno-1+PT_ER1); return (unsigned short *)addr; case relb: - if ((inst = 0x55) || isbranch(child,inst & 0x0f)) + if (inst == 0x55 || isbranch(child,inst & 0x0f)) pc = (unsigned short *)((unsigned long)pc + ((signed char)(*fetch_p))); return pc+1; /* skip myself */ case relw: - if ((inst = 0x5c) || isbranch(child,(*fetch_p & 0xf0) >> 4)) + if (inst == 0x5c || isbranch(child,(*fetch_p & 0xf0) >> 4)) pc = (unsigned short *)((unsigned long)pc + ((signed short)(*(pc+1)))); return pc+2; /* skip myself */ diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index dfd904f6883..d4ae5f9ceae 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -68,7 +68,6 @@ config X86_VOYAGER config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - select DISCONTIGMEM select NUMA help This option is used for getting Linux to run on a (IBM/Sequent) NUMA @@ -783,25 +782,48 @@ comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) -config DISCONTIGMEM - bool - depends on NUMA - default y - config HAVE_ARCH_BOOTMEM_NODE bool depends on NUMA default y -config HAVE_MEMORY_PRESENT +config ARCH_HAVE_MEMORY_PRESENT bool depends on DISCONTIGMEM default y config NEED_NODE_MEMMAP_SIZE bool - depends on DISCONTIGMEM + depends on DISCONTIGMEM || SPARSEMEM + default y + +config HAVE_ARCH_ALLOC_REMAP + bool + depends on NUMA + default y + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on ARCH_SPARSEMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + bool default y + depends on NUMA config HIGHPTE bool "Allocate 3rd-level pagetables from highmem" @@ -939,6 +961,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 1c36ca332a9..bf7c9ba709f 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -17,6 +17,13 @@ # 20050320 Kianusch Sayah Karadji <kianusch@sk-tech.net> # Added support for GEODE CPU +HAS_BIARCH := $(call cc-option-yn, -m32) +ifeq ($(HAS_BIARCH),y) +AS := $(AS) --32 +LD := $(LD) -m elf_i386 +CC := $(CC) -m32 +endif + LDFLAGS := -m elf_i386 OBJCOPYFLAGS := -O binary -R .note -R .comment -S LDFLAGS_vmlinux := diff --git a/arch/i386/boot/install.sh b/arch/i386/boot/install.sh index 90f2452b3b9..f17b40dfc0f 100644 --- a/arch/i386/boot/install.sh +++ b/arch/i386/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index d509836b70c..8d993fa7175 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1133,7 +1133,7 @@ inline void smp_local_timer_interrupt(struct pt_regs * regs) } #ifdef CONFIG_SMP - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index d199e525680..b9954248d0a 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -635,7 +635,7 @@ void __init cpu_init (void) /* Clear all 6 debug registers: */ -#define CD(register) __asm__("movl %0,%%db" #register ::"r"(0) ); +#define CD(register) set_debugreg(0, register) CD(0); CD(1); CD(2); CD(3); /* no db4 and db5 */; CD(6); CD(7); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index e1c2042b9b7..d66b09e0c82 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -375,6 +375,19 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; } +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + /** * mtrr_add - Add a memory type region * @base: Physical base address of region @@ -415,11 +428,8 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type, char increment) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } @@ -511,11 +521,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) int mtrr_del(int reg, unsigned long base, unsigned long size) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 7323c19f354..8bd77d948a8 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -86,7 +86,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if ( cpu_has(c, X86_FEATURE_TSC) ) { - seq_printf(m, "cpu MHz\t\t: %lu.%03lu\n", + seq_printf(m, "cpu MHz\t\t: %u.%03u\n", cpu_khz / 1000, (cpu_khz % 1000)); } diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 903190a4b3f..180f070d03c 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c @@ -1,97 +1,17 @@ #include <linux/config.h> #include <linux/module.h> -#include <linux/smp.h> -#include <linux/user.h> -#include <linux/elfcore.h> -#include <linux/mca.h> -#include <linux/sched.h> -#include <linux/in6.h> -#include <linux/interrupt.h> -#include <linux/smp_lock.h> -#include <linux/pm.h> -#include <linux/pci.h> -#include <linux/apm_bios.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/tty.h> -#include <linux/highmem.h> -#include <linux/time.h> - -#include <asm/semaphore.h> -#include <asm/processor.h> -#include <asm/i387.h> -#include <asm/uaccess.h> #include <asm/checksum.h> -#include <asm/io.h> -#include <asm/delay.h> -#include <asm/irq.h> -#include <asm/mmx.h> #include <asm/desc.h> -#include <asm/pgtable.h> -#include <asm/tlbflush.h> -#include <asm/nmi.h> -#include <asm/ist.h> -#include <asm/kdebug.h> - -extern void dump_thread(struct pt_regs *, struct user *); -extern spinlock_t rtc_lock; /* This is definitely a GPL-only symbol */ EXPORT_SYMBOL_GPL(cpu_gdt_table); -#if defined(CONFIG_APM_MODULE) -extern void machine_real_restart(unsigned char *, int); -EXPORT_SYMBOL(machine_real_restart); -extern void default_idle(void); -EXPORT_SYMBOL(default_idle); -#endif - -#ifdef CONFIG_SMP -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); -#endif - -#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) -extern struct drive_info_struct drive_info; -EXPORT_SYMBOL(drive_info); -#endif - -extern unsigned long cpu_khz; -extern unsigned long get_cmos_time(void); - -/* platform dependent support */ -EXPORT_SYMBOL(boot_cpu_data); -#ifdef CONFIG_DISCONTIGMEM -EXPORT_SYMBOL(node_data); -EXPORT_SYMBOL(physnode_map); -#endif -#ifdef CONFIG_X86_NUMAQ -EXPORT_SYMBOL(xquad_portio); -#endif -EXPORT_SYMBOL(dump_thread); -EXPORT_SYMBOL(dump_fpu); -EXPORT_SYMBOL_GPL(kernel_fpu_begin); -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(ioremap_nocache); -EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(kernel_thread); -EXPORT_SYMBOL(pm_idle); -EXPORT_SYMBOL(pm_power_off); -EXPORT_SYMBOL(get_cmos_time); -EXPORT_SYMBOL(cpu_khz); -EXPORT_SYMBOL(apm_info); - EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); EXPORT_SYMBOL(__down_failed_trylock); EXPORT_SYMBOL(__up_wakeup); /* Networking helper routines. */ EXPORT_SYMBOL(csum_partial_copy_generic); -/* Delay loops */ -EXPORT_SYMBOL(__ndelay); -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__delay); -EXPORT_SYMBOL(__const_udelay); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); @@ -105,87 +25,11 @@ EXPORT_SYMBOL(__put_user_8); EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(strstr); -EXPORT_SYMBOL(strncpy_from_user); -EXPORT_SYMBOL(__strncpy_from_user); -EXPORT_SYMBOL(clear_user); -EXPORT_SYMBOL(__clear_user); -EXPORT_SYMBOL(__copy_from_user_ll); -EXPORT_SYMBOL(__copy_to_user_ll); -EXPORT_SYMBOL(strnlen_user); - -EXPORT_SYMBOL(dma_alloc_coherent); -EXPORT_SYMBOL(dma_free_coherent); - -#ifdef CONFIG_PCI -EXPORT_SYMBOL(pci_mem_start); -#endif - -#ifdef CONFIG_PCI_BIOS -EXPORT_SYMBOL(pcibios_set_irq_routing); -EXPORT_SYMBOL(pcibios_get_irq_routing_table); -#endif - -#ifdef CONFIG_X86_USE_3DNOW -EXPORT_SYMBOL(_mmx_memcpy); -EXPORT_SYMBOL(mmx_clear_page); -EXPORT_SYMBOL(mmx_copy_page); -#endif - -#ifdef CONFIG_X86_HT -EXPORT_SYMBOL(smp_num_siblings); -EXPORT_SYMBOL(cpu_sibling_map); -#endif - #ifdef CONFIG_SMP -EXPORT_SYMBOL(cpu_data); -EXPORT_SYMBOL(cpu_online_map); -EXPORT_SYMBOL(cpu_callout_map); +extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); +extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); EXPORT_SYMBOL(__write_lock_failed); EXPORT_SYMBOL(__read_lock_failed); - -/* Global SMP stuff */ -EXPORT_SYMBOL(smp_call_function); - -/* TLB flushing */ -EXPORT_SYMBOL(flush_tlb_page); -#endif - -#ifdef CONFIG_X86_IO_APIC -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -#endif - -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif - -#ifdef CONFIG_VT -EXPORT_SYMBOL(screen_info); -#endif - -EXPORT_SYMBOL(get_wchan); - -EXPORT_SYMBOL(rtc_lock); - -EXPORT_SYMBOL_GPL(set_nmi_callback); -EXPORT_SYMBOL_GPL(unset_nmi_callback); - -EXPORT_SYMBOL(register_die_notifier); -#ifdef CONFIG_HAVE_DEC_LOCK -EXPORT_SYMBOL(_atomic_dec_and_lock); -#endif - -EXPORT_SYMBOL(__PAGE_KERNEL); - -#ifdef CONFIG_HIGHMEM -EXPORT_SYMBOL(kmap); -EXPORT_SYMBOL(kunmap); -EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); -EXPORT_SYMBOL(kmap_atomic_to_page); -#endif - -#if defined(CONFIG_X86_SPEEDSTEP_SMI) || defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) -EXPORT_SYMBOL(ist_info); #endif EXPORT_SYMBOL(csum_partial); diff --git a/arch/i386/kernel/i387.c b/arch/i386/kernel/i387.c index c55e037f08f..b817168d9c6 100644 --- a/arch/i386/kernel/i387.c +++ b/arch/i386/kernel/i387.c @@ -10,6 +10,7 @@ #include <linux/config.h> #include <linux/sched.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/i387.h> #include <asm/math_emu.h> @@ -79,6 +80,7 @@ void kernel_fpu_begin(void) } clts(); } +EXPORT_SYMBOL_GPL(kernel_fpu_begin); void restore_fpu( struct task_struct *tsk ) { @@ -526,6 +528,7 @@ int dump_fpu( struct pt_regs *regs, struct user_i387_struct *fpu ) return fpvalid; } +EXPORT_SYMBOL(dump_fpu); int dump_task_fpu(struct task_struct *tsk, struct user_i387_struct *fpu) { diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 7a324e8b86f..08540bc4ba3 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -31,7 +31,7 @@ #include <linux/mc146818rtc.h> #include <linux/compiler.h> #include <linux/acpi.h> - +#include <linux/module.h> #include <linux/sysdev.h> #include <asm/io.h> #include <asm/smp.h> @@ -812,6 +812,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); /* * This function currently is only a helper for the i386 smp boot process where @@ -1659,6 +1660,12 @@ static void __init setup_ioapic_ids_from_mpc(void) unsigned long flags; /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + return; + /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ @@ -1684,10 +1691,6 @@ static void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic].mpc_apicid = reg_00.bits.ID; } - /* Don't check I/O APIC IDs for some xAPIC systems. They have - * no meaning without the serial APIC bus. */ - if (NO_IOAPIC_CHECK) - continue; /* * Sanity check, is the ID really free? Every APIC in a * system must have a unique ID or we get lots of nice diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 59ff9b45506..3762f6b35ab 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -23,6 +23,9 @@ * Rusty Russell). * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes * interface to access function arguments. + * 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston + * <jkenisto@us.ibm.com> and Prasanna S Panchamukhi + * <prasanna@in.ibm.com> added function-return probes. */ #include <linux/config.h> @@ -30,15 +33,14 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/desc.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_esp; /* copy of the kernel stack at the probe fire time */ @@ -68,16 +70,50 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->eip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_eflags_prev = kprobe_old_eflags; + kprobe_saved_eflags_prev = kprobe_saved_eflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_eflags = kprobe_old_eflags_prev; + kprobe_saved_eflags = kprobe_saved_eflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_eflags = kprobe_old_eflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->opcode)) + kprobe_saved_eflags &= ~IF_MASK; } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -91,6 +127,50 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)®s->esp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -127,8 +207,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -163,11 +253,7 @@ static int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_eflags = kprobe_old_eflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->opcode)) - kprobe_saved_eflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -184,6 +270,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = ((unsigned long *) ®s->esp) - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)®s->esp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->eip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -263,13 +398,22 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_eflags; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 1347ab4939e..383a11600d2 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -914,7 +914,10 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id); + else + mp_ioapics[idx].mpc_apicid = id; mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx); /* @@ -1055,11 +1058,20 @@ void __init mp_config_acpi_legacy_irqs (void) } } +#define MAX_GSI_NUM 4096 + int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrups, which + * represent all possible interrupts, and IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; #ifdef CONFIG_ACPI_BUS /* Don't set up the ACPI SCI because it's already set up */ @@ -1094,11 +1106,26 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + if (gsi < MAX_GSI_NUM) { + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 2c0ee9c2d02..da6c46d667c 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -28,8 +28,7 @@ #include <linux/sysctl.h> #include <asm/smp.h> -#include <asm/mtrr.h> -#include <asm/mpspec.h> +#include <asm/div64.h> #include <asm/nmi.h> #include "mach_traps.h" @@ -324,6 +323,16 @@ static void clear_msr_range(unsigned int base, unsigned int n) wrmsr(base+i, 0, 0); } +static inline void write_watchdog_counter(const char *descr) +{ + u64 count = (u64)cpu_khz * 1000; + + do_div(count, nmi_hz); + if(descr) + Dprintk("setting %s to -0x%08Lx\n", descr, count); + wrmsrl(nmi_perfctr_msr, 0 - count); +} + static void setup_k7_watchdog(void) { unsigned int evntsel; @@ -339,8 +348,7 @@ static void setup_k7_watchdog(void) | K7_NMI_EVENT; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); - Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("K7_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= K7_EVNTSEL_ENABLE; wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); @@ -361,8 +369,7 @@ static void setup_p6_watchdog(void) | P6_NMI_EVENT; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); - Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0); + write_watchdog_counter("P6_PERFCTR0"); apic_write(APIC_LVTPC, APIC_DM_NMI); evntsel |= P6_EVNTSEL0_ENABLE; wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); @@ -402,8 +409,7 @@ static int setup_p4_watchdog(void) wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); - Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000)); - wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter("P4_IQ_COUNTER0"); apic_write(APIC_LVTPC, APIC_DM_NMI); wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); return 1; @@ -518,7 +524,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) * other P6 variant */ apic_write(APIC_LVTPC, APIC_DM_NMI); } - wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); + write_watchdog_counter(NULL); } } diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 4de2e03c7b4..1e51427cc9e 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -11,6 +11,7 @@ #include <linux/mm.h> #include <linux/string.h> #include <linux/pci.h> +#include <linux/module.h> #include <asm/io.h> struct dma_coherent_mem { @@ -54,6 +55,7 @@ void *dma_alloc_coherent(struct device *dev, size_t size, } return ret; } +EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) @@ -68,6 +70,7 @@ void dma_free_coherent(struct device *dev, size_t size, } else free_pages((unsigned long)vaddr, order); } +EXPORT_SYMBOL(dma_free_coherent); int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, dma_addr_t device_addr, size_t size, int flags) diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 96e3ea6b17c..aea2ce1145d 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -37,6 +37,7 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -73,6 +74,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +EXPORT_SYMBOL(pm_idle); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); void disable_hlt(void) @@ -105,6 +107,9 @@ void default_idle(void) cpu_relax(); } } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(default_idle); +#endif /* * On SMP it's slightly faster (but much more power-consuming!) @@ -262,7 +267,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (regs->xcs & 3) + if (user_mode(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s)\n", regs->eflags, print_tainted(), system_utsname.release); @@ -325,6 +330,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) /* Ok, create the new process.. */ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); } +EXPORT_SYMBOL(kernel_thread); /* * Free current thread data structures etc.. @@ -334,6 +340,13 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); @@ -357,6 +370,13 @@ void flush_thread(void) { struct task_struct *tsk = current; + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -508,6 +528,7 @@ void dump_thread(struct pt_regs * regs, struct user * dump) dump->u_fpvalid = dump_fpu (regs, &dump->i387); } +EXPORT_SYMBOL(dump_thread); /* * Capture the user space registers if the task is not running (in user space) @@ -627,13 +648,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas * Now maybe reload the debug registers */ if (unlikely(next->debugreg[7])) { - loaddebug(next, 0); - loaddebug(next, 1); - loaddebug(next, 2); - loaddebug(next, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ - loaddebug(next, 6); - loaddebug(next, 7); + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) @@ -731,6 +752,7 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } +EXPORT_SYMBOL(get_wchan); /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index e34f651fa13..0da59b42843 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -668,7 +668,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) info.si_code = TRAP_BRKPT; /* User-mode eip? */ - info.si_addr = user_mode(regs) ? (void __user *) regs->eip : NULL; + info.si_addr = user_mode_vm(regs) ? (void __user *) regs->eip : NULL; /* Send us the fakey SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 6dc27eb70ee..db912209a8d 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -2,6 +2,7 @@ * linux/arch/i386/kernel/reboot.c */ +#include <linux/config.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/delay.h> @@ -19,6 +20,7 @@ * Power off function, if any */ void (*pm_power_off)(void); +EXPORT_SYMBOL(pm_power_off); static int reboot_mode; static int reboot_thru_bios; @@ -295,6 +297,9 @@ void machine_real_restart(unsigned char *code, int length) : : "i" ((void *) (0x1000 - sizeof (real_mode_switch) - 100))); } +#ifdef CONFIG_APM_MODULE +EXPORT_SYMBOL(machine_real_restart); +#endif void machine_restart(char * __unused) { diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2bfbddebdbf..30406fd0b64 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -23,8 +23,10 @@ * This file handles the architecture-dependent parts of initialization */ +#include <linux/config.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/mmzone.h> #include <linux/tty.h> #include <linux/ioport.h> #include <linux/acpi.h> @@ -73,6 +75,7 @@ EXPORT_SYMBOL(efi_enabled); struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; /* common cpu data for all cpus */ struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; +EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; @@ -90,12 +93,18 @@ extern acpi_interrupt_flags acpi_sci_flags; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; +#ifdef CONFIG_MCA +EXPORT_SYMBOL(machine_id); +#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0x10000000; +#ifdef CONFIG_PCI +EXPORT_SYMBOL(pci_mem_start); +#endif /* Boot loader ID as an integer, for the benefit of proc_dointvec */ int bootloader_type; @@ -107,14 +116,26 @@ static unsigned int highmem_pages = -1; * Setup options */ struct drive_info_struct { char dummy[32]; } drive_info; +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_HD) || \ + defined(CONFIG_BLK_DEV_IDE_MODULE) || defined(CONFIG_BLK_DEV_HD_MODULE) +EXPORT_SYMBOL(drive_info); +#endif struct screen_info screen_info; +#ifdef CONFIG_VT +EXPORT_SYMBOL(screen_info); +#endif struct apm_info apm_info; +EXPORT_SYMBOL(apm_info); struct sys_desc_table_struct { unsigned short length; unsigned char table[0]; }; struct edid_info edid_info; struct ist_info ist_info; +#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ + defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE) +EXPORT_SYMBOL(ist_info); +#endif struct e820map e820; extern void early_cpu_init(void); @@ -1022,7 +1043,7 @@ static void __init reserve_ebda_region(void) reserve_bootmem(addr, PAGE_SIZE); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init setup_bootmem_allocator(void); static unsigned long __init setup_memory(void) { @@ -1072,9 +1093,9 @@ void __init zone_sizes_init(void) free_area_init(zones_size); } #else -extern unsigned long setup_memory(void); +extern unsigned long __init setup_memory(void); extern void zone_sizes_init(void); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* !CONFIG_NEED_MULTIPLE_NODES */ void __init setup_bootmem_allocator(void) { @@ -1475,6 +1496,7 @@ void __init setup_arch(char **cmdline_p) #endif paging_init(); remapped_pgdat_init(); + sparse_init(); zone_sizes_init(); /* diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index ea46d028af0..b9b8f4e20fa 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -346,8 +346,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) extern void __user __kernel_sigreturn; extern void __user __kernel_rt_sigreturn; -static void setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs) +static int setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs * regs) { void __user *restorer; struct sigframe __user *frame; @@ -429,13 +429,14 @@ static void setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { void __user *restorer; @@ -522,20 +523,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->eip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs * regs) { + int ret; + /* Are we from a system call? */ if (regs->orig_eax >= 0) { /* If so, check system call restarting.. */ @@ -569,17 +573,19 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, /* Set up the stack frame */ if (ka->sa.sa_flags & SA_SIGINFO) - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); else - setup_frame(sig, ka, oldset, regs); + ret = setup_frame(sig, ka, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -599,7 +605,7 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->xcs & 3) != 3) + if (!user_mode(regs)) return 1; if (current->flags & PF_FREEZE) { @@ -618,12 +624,11 @@ int fastcall do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (unlikely(current->thread.debugreg[7])) { - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[7], 7); } /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6223c33ac91..68be7d0c723 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -19,6 +19,7 @@ #include <linux/mc146818rtc.h> #include <linux/cache.h> #include <linux/interrupt.h> +#include <linux/module.h> #include <asm/mtrr.h> #include <asm/tlbflush.h> @@ -452,6 +453,7 @@ void flush_tlb_page(struct vm_area_struct * vma, unsigned long va) preempt_enable(); } +EXPORT_SYMBOL(flush_tlb_page); static void do_flush_tlb_all(void* info) { @@ -547,6 +549,7 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, return 0; } +EXPORT_SYMBOL(smp_call_function); static void stop_this_cpu (void * dummy) { diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index bc1bb6919e6..c20d96d5c15 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -60,6 +60,9 @@ static int __initdata smp_b_stepping; /* Number of siblings per CPU package */ int smp_num_siblings = 1; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(smp_num_siblings); +#endif int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */ EXPORT_SYMBOL(phys_proc_id); int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */ @@ -67,13 +70,16 @@ EXPORT_SYMBOL(cpu_core_id); /* bitmap of online cpus */ cpumask_t cpu_online_map; +EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; +EXPORT_SYMBOL(cpu_callout_map); static cpumask_t smp_commenced_mask; /* Per CPU bogomips and other parameters */ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_data); u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; @@ -199,7 +205,7 @@ static void __init synchronize_tsc_bp (void) unsigned long long t0; unsigned long long sum, avg; long long delta; - unsigned long one_usec; + unsigned int one_usec; int buggy = 0; printk(KERN_INFO "checking TSC synchronization across %u CPUs: ", num_booting_cpus()); @@ -885,8 +891,14 @@ static void smp_tune_scheduling (void) static int boot_cpu_logical_apicid; /* Where the IO area was mapped on multiquad, always 0 otherwise */ void *xquad_portio; +#ifdef CONFIG_X86_NUMAQ +EXPORT_SYMBOL(xquad_portio); +#endif cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; +#ifdef CONFIG_X86_HT +EXPORT_SYMBOL(cpu_sibling_map); +#endif cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; EXPORT_SYMBOL(cpu_core_map); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index a0dcb7c87c3..e68d9fdb075 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -77,11 +77,13 @@ u64 jiffies_64 = INITIAL_JIFFIES; EXPORT_SYMBOL(jiffies_64); -unsigned long cpu_khz; /* Detected as we calibrate the TSC */ +unsigned int cpu_khz; /* Detected as we calibrate the TSC */ +EXPORT_SYMBOL(cpu_khz); extern unsigned long wall_jiffies; DEFINE_SPINLOCK(rtc_lock); +EXPORT_SYMBOL(rtc_lock); DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); @@ -324,6 +326,8 @@ unsigned long get_cmos_time(void) return retval; } +EXPORT_SYMBOL(get_cmos_time); + static void sync_cmos_clock(unsigned long dummy); static struct timer_list sync_cmos_timer = diff --git a/arch/i386/kernel/timers/common.c b/arch/i386/kernel/timers/common.c index 8e201219f52..37353bd3180 100644 --- a/arch/i386/kernel/timers/common.c +++ b/arch/i386/kernel/timers/common.c @@ -139,6 +139,15 @@ bad_calibration: } #endif + +unsigned long read_timer_tsc(void) +{ + unsigned long retval; + rdtscl(retval); + return retval; +} + + /* calculate cpu_khz */ void init_cpu_khz(void) { @@ -154,7 +163,8 @@ void init_cpu_khz(void) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } } } diff --git a/arch/i386/kernel/timers/timer.c b/arch/i386/kernel/timers/timer.c index a3d6a288088..7e39ed8e33f 100644 --- a/arch/i386/kernel/timers/timer.c +++ b/arch/i386/kernel/timers/timer.c @@ -64,3 +64,12 @@ struct timer_opts* __init select_timer(void) panic("select_timer: Cannot find a suitable timer\n"); return NULL; } + +int read_current_timer(unsigned long *timer_val) +{ + if (cur_timer->read_timer) { + *timer_val = cur_timer->read_timer(); + return 0; + } + return -1; +} diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index f778f471a09..d766e0963ac 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -158,7 +158,7 @@ static int __init init_hpet(char* override) { unsigned long eax=0, edx=1000; ASM_DIV64_REG(cpu_khz, edx, tsc_quotient, eax, edx); - printk("Detected %lu.%03lu MHz processor.\n", + printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); @@ -186,6 +186,7 @@ static struct timer_opts timer_hpet = { .get_offset = get_offset_hpet, .monotonic_clock = monotonic_clock_hpet, .delay = delay_hpet, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_hpet_init = { diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index d77f22030fe..4ef20e66349 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -246,6 +246,7 @@ static struct timer_opts timer_pmtmr = { .get_offset = get_offset_pmtmr, .monotonic_clock = monotonic_clock_pmtmr, .delay = delay_pmtmr, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_pmtmr_init = { diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 180444d8782..54c36b18202 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -256,7 +256,7 @@ static unsigned long loops_per_jiffy_ref = 0; #ifndef CONFIG_SMP static unsigned long fast_gettimeoffset_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int cpu_khz_ref = 0; #endif static int @@ -323,7 +323,7 @@ static inline void cpufreq_delayed_get(void) { return; } int recalibrate_cpu_khz(void) { #ifndef CONFIG_SMP - unsigned long cpu_khz_old = cpu_khz; + unsigned int cpu_khz_old = cpu_khz; if (cpu_has_tsc) { init_cpu_khz(); @@ -534,7 +534,8 @@ static int __init init_tsc(char* override) :"=a" (cpu_khz), "=d" (edx) :"r" (tsc_quotient), "0" (eax), "1" (edx)); - printk("Detected %lu.%03lu MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); + printk("Detected %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); } set_cyc2ns_scale(cpu_khz/1000); return 0; @@ -572,6 +573,7 @@ static struct timer_opts timer_tsc = { .get_offset = get_offset_tsc, .monotonic_clock = monotonic_clock_tsc, .delay = delay_tsc, + .read_timer = read_timer_tsc, }; struct init_timer_opts __initdata timer_tsc_init = { diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 83c579e82a8..e4d4e2162c7 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -104,6 +104,7 @@ int register_die_notifier(struct notifier_block *nb) spin_unlock_irqrestore(&die_notifier_lock, flags); return err; } +EXPORT_SYMBOL(register_die_notifier); static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -209,7 +210,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); ss = __KERNEL_DS; - if (regs->xcs & 3) { + if (user_mode(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -265,7 +266,7 @@ static void handle_BUG(struct pt_regs *regs) char c; unsigned long eip; - if (regs->xcs & 3) + if (user_mode(regs)) goto no_bug; /* Not in kernel */ eip = regs->eip; @@ -353,7 +354,7 @@ void die(const char * str, struct pt_regs * regs, long err) static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) { - if (!(regs->eflags & VM_MASK) && !(3 & regs->xcs)) + if (!user_mode_vm(regs)) die(str, regs, err); } @@ -366,7 +367,7 @@ static void do_trap(int trapnr, int signr, char *str, int vm86, goto trap_signal; } - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto kernel_trap; trap_signal: { @@ -488,7 +489,7 @@ fastcall void do_general_protection(struct pt_regs * regs, long error_code) if (regs->eflags & VM_MASK) goto gp_in_vm86; - if (!(regs->xcs & 3)) + if (!user_mode(regs)) goto gp_in_kernel; current->thread.error_code = error_code; @@ -636,11 +637,13 @@ void set_nmi_callback(nmi_callback_t callback) { nmi_callback = callback; } +EXPORT_SYMBOL_GPL(set_nmi_callback); void unset_nmi_callback(void) { nmi_callback = dummy_nmi_callback; } +EXPORT_SYMBOL_GPL(unset_nmi_callback); #ifdef CONFIG_KPROBES fastcall void do_int3(struct pt_regs *regs, long error_code) @@ -682,7 +685,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) unsigned int condition; struct task_struct *tsk = current; - __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -713,7 +716,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * check for kernel mode by just checking the CPL * of CS. */ - if ((regs->xcs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; } @@ -724,9 +727,7 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) * the signal is delivered. */ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); + set_debugreg(0, 7); return; debug_vm86: diff --git a/arch/i386/lib/dec_and_lock.c b/arch/i386/lib/dec_and_lock.c index ab43394dc77..8b81b2524fa 100644 --- a/arch/i386/lib/dec_and_lock.c +++ b/arch/i386/lib/dec_and_lock.c @@ -8,6 +8,7 @@ */ #include <linux/spinlock.h> +#include <linux/module.h> #include <asm/atomic.h> int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) @@ -38,3 +39,4 @@ slow_path: spin_unlock(lock); return 0; } +EXPORT_SYMBOL(_atomic_dec_and_lock); diff --git a/arch/i386/lib/delay.c b/arch/i386/lib/delay.c index eb0cdfe9280..c49a6acbee5 100644 --- a/arch/i386/lib/delay.c +++ b/arch/i386/lib/delay.c @@ -13,6 +13,7 @@ #include <linux/config.h> #include <linux/sched.h> #include <linux/delay.h> +#include <linux/module.h> #include <asm/processor.h> #include <asm/delay.h> #include <asm/timer.h> @@ -47,3 +48,8 @@ void __ndelay(unsigned long nsecs) { __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ } + +EXPORT_SYMBOL(__delay); +EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(__udelay); +EXPORT_SYMBOL(__ndelay); diff --git a/arch/i386/lib/mmx.c b/arch/i386/lib/mmx.c index 01f8b1a2cc8..2afda94dffd 100644 --- a/arch/i386/lib/mmx.c +++ b/arch/i386/lib/mmx.c @@ -3,6 +3,7 @@ #include <linux/string.h> #include <linux/sched.h> #include <linux/hardirq.h> +#include <linux/module.h> #include <asm/i387.h> @@ -397,3 +398,7 @@ void mmx_copy_page(void *to, void *from) else fast_copy_page(to, from); } + +EXPORT_SYMBOL(_mmx_memcpy); +EXPORT_SYMBOL(mmx_clear_page); +EXPORT_SYMBOL(mmx_copy_page); diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 51aa2bbb026..4cf981d70f4 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c @@ -84,6 +84,7 @@ __strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } +EXPORT_SYMBOL(__strncpy_from_user); /** * strncpy_from_user: - Copy a NUL terminated string from userspace. @@ -111,7 +112,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) __do_strncpy_from_user(dst, src, count, res); return res; } - +EXPORT_SYMBOL(strncpy_from_user); /* * Zero Userspace @@ -157,6 +158,7 @@ clear_user(void __user *to, unsigned long n) __do_clear_user(to, n); return n; } +EXPORT_SYMBOL(clear_user); /** * __clear_user: - Zero a block of memory in user space, with less checking. @@ -175,6 +177,7 @@ __clear_user(void __user *to, unsigned long n) __do_clear_user(to, n); return n; } +EXPORT_SYMBOL(__clear_user); /** * strlen_user: - Get the size of a string in user space. @@ -218,6 +221,7 @@ long strnlen_user(const char __user *s, long n) :"cc"); return res & mask; } +EXPORT_SYMBOL(strnlen_user); #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long @@ -570,6 +574,7 @@ survive: n = __copy_user_intel(to, from, n); return n; } +EXPORT_SYMBOL(__copy_to_user_ll); unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n) @@ -581,6 +586,7 @@ __copy_from_user_ll(void *to, const void __user *from, unsigned long n) n = __copy_user_zeroing_intel(to, from, n); return n; } +EXPORT_SYMBOL(__copy_from_user_ll); /** * copy_to_user: - Copy a block of data into user space. diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index a6e0ddd65bd..8c8527593da 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -1288,7 +1288,7 @@ smp_local_timer_interrupt(struct pt_regs * regs) per_cpu(prof_counter, cpu); } - update_process_times(user_mode(regs)); + update_process_times(user_mode_vm(regs)); } if( ((1<<cpu) & voyager_extended_vic_processors) == 0) diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile index fc327250684..80908b5aa60 100644 --- a/arch/i386/mm/Makefile +++ b/arch/i386/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += discontig.o +obj-$(CONFIG_NUMA) += discontig.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 1726b4096b1..f429c871e84 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -29,12 +29,14 @@ #include <linux/highmem.h> #include <linux/initrd.h> #include <linux/nodemask.h> +#include <linux/module.h> #include <asm/e820.h> #include <asm/setup.h> #include <asm/mmzone.h> #include <bios_ebda.h> struct pglist_data *node_data[MAX_NUMNODES]; +EXPORT_SYMBOL(node_data); bootmem_data_t node0_bdata; /* @@ -42,12 +44,16 @@ bootmem_data_t node0_bdata; * populated the following initialisation. * * 1) node_online_map - the map of all nodes configured (online) in the system - * 2) physnode_map - the mapping between a pfn and owning node - * 3) node_start_pfn - the starting page frame number for a node + * 2) node_start_pfn - the starting page frame number for a node * 3) node_end_pfn - the ending page fram number for a node */ +unsigned long node_start_pfn[MAX_NUMNODES]; +unsigned long node_end_pfn[MAX_NUMNODES]; + +#ifdef CONFIG_DISCONTIGMEM /* + * 4) physnode_map - the mapping between a pfn and owning node * physnode_map keeps track of the physical memory layout of a generic * numa node on a 256Mb break (each element of the array will * represent 256Mb of memory and will be marked by the node id. so, @@ -59,6 +65,7 @@ bootmem_data_t node0_bdata; * physnode_map[8- ] = -1; */ s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1}; +EXPORT_SYMBOL(physnode_map); void memory_present(int nid, unsigned long start, unsigned long end) { @@ -85,9 +92,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, return (nr_pages + 1) * sizeof(struct page); } - -unsigned long node_start_pfn[MAX_NUMNODES]; -unsigned long node_end_pfn[MAX_NUMNODES]; +#endif extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); @@ -108,6 +113,9 @@ unsigned long node_remap_offset[MAX_NUMNODES]; void *node_remap_start_vaddr[MAX_NUMNODES]; void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); +void *node_remap_end_vaddr[MAX_NUMNODES]; +void *node_remap_alloc_vaddr[MAX_NUMNODES]; + /* * FLAT - support for basic PC memory model with discontig enabled, essentially * a single node with all available processors in it with a flat @@ -146,6 +154,21 @@ static void __init find_max_pfn_node(int nid) BUG(); } +/* Find the owning node for a pfn. */ +int early_pfn_to_nid(unsigned long pfn) +{ + int nid; + + for_each_node(nid) { + if (node_end_pfn[nid] == 0) + break; + if (node_start_pfn[nid] <= pfn && node_end_pfn[nid] >= pfn) + return nid; + } + + return 0; +} + /* * Allocate memory for the pg_data_t for this node via a crude pre-bootmem * method. For node zero take this from the bottom of memory, for @@ -163,6 +186,21 @@ static void __init allocate_pgdat(int nid) } } +void *alloc_remap(int nid, unsigned long size) +{ + void *allocation = node_remap_alloc_vaddr[nid]; + + size = ALIGN(size, L1_CACHE_BYTES); + + if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid]) + return 0; + + node_remap_alloc_vaddr[nid] += size; + memset(allocation, 0, size); + + return allocation; +} + void __init remap_numa_kva(void) { void *vaddr; @@ -170,8 +208,6 @@ void __init remap_numa_kva(void) int node; for_each_online_node(node) { - if (node == 0) - continue; for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) { vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT); set_pmd_pfn((ulong) vaddr, @@ -185,13 +221,9 @@ static unsigned long calculate_numa_remap_pages(void) { int nid; unsigned long size, reserve_pages = 0; + unsigned long pfn; for_each_online_node(nid) { - if (nid == 0) - continue; - if (!node_remap_size[nid]) - continue; - /* * The acpi/srat node info can show hot-add memroy zones * where memory could be added but not currently present. @@ -208,11 +240,24 @@ static unsigned long calculate_numa_remap_pages(void) size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES; /* now the roundup is correct, convert to PAGE_SIZE pages */ size = size * PTRS_PER_PTE; + + /* + * Validate the region we are allocating only contains valid + * pages. + */ + for (pfn = node_end_pfn[nid] - size; + pfn < node_end_pfn[nid]; pfn++) + if (!page_is_ram(pfn)) + break; + + if (pfn != node_end_pfn[nid]) + size = 0; + printk("Reserving %ld pages of KVA for lmem_map of node %d\n", size, nid); node_remap_size[nid] = size; - reserve_pages += size; node_remap_offset[nid] = reserve_pages; + reserve_pages += size; printk("Shrinking node %d from %ld pages to %ld pages\n", nid, node_end_pfn[nid], node_end_pfn[nid] - size); node_end_pfn[nid] -= size; @@ -265,12 +310,18 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for_each_online_node(nid) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - (highstart_pfn + reserve_pages) - node_remap_offset[nid]); + highstart_pfn + node_remap_offset[nid]); + /* Init the node remap allocator */ + node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] + + (node_remap_size[nid] * PAGE_SIZE); + node_remap_alloc_vaddr[nid] = node_remap_start_vaddr[nid] + + ALIGN(sizeof(pg_data_t), PAGE_SIZE); + allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - - node_remap_offset[nid] + node_remap_size[nid])); + (ulong) pfn_to_kaddr(highstart_pfn + + node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); @@ -333,23 +384,9 @@ void __init zone_sizes_init(void) } zholes_size = get_zholes_size(nid); - /* - * We let the lmem_map for node 0 be allocated from the - * normal bootmem allocator, but other nodes come from the - * remapped KVA area - mbligh - */ - if (!nid) - free_area_init_node(nid, NODE_DATA(nid), - zones_size, start, zholes_size); - else { - unsigned long lmem_map; - lmem_map = (unsigned long)node_remap_start_vaddr[nid]; - lmem_map += sizeof(pg_data_t) + PAGE_SIZE - 1; - lmem_map &= PAGE_MASK; - NODE_DATA(nid)->node_mem_map = (struct page *)lmem_map; - free_area_init_node(nid, NODE_DATA(nid), zones_size, - start, zholes_size); - } + + free_area_init_node(nid, NODE_DATA(nid), zones_size, start, + zholes_size); } return; } @@ -358,24 +395,26 @@ void __init set_highmem_pages_init(int bad_ppro) { #ifdef CONFIG_HIGHMEM struct zone *zone; + struct page *page; for_each_zone(zone) { - unsigned long node_pfn, node_high_size, zone_start_pfn; - struct page * zone_mem_map; - + unsigned long node_pfn, zone_start_pfn, zone_end_pfn; + if (!is_highmem(zone)) continue; - printk("Initializing %s for node %d\n", zone->name, - zone->zone_pgdat->node_id); - - node_high_size = zone->spanned_pages; - zone_mem_map = zone->zone_mem_map; zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone_start_pfn + zone->spanned_pages; + + printk("Initializing %s for node %d (%08lx:%08lx)\n", + zone->name, zone->zone_pgdat->node_id, + zone_start_pfn, zone_end_pfn); - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { - one_highpage_init((struct page *)(zone_mem_map + node_pfn), - zone_start_pfn + node_pfn, bad_ppro); + for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { + if (!pfn_valid(node_pfn)) + continue; + page = pfn_to_page(node_pfn); + one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index fc4c4cad4e9..4b7aaf99d7e 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c @@ -1,4 +1,5 @@ #include <linux/highmem.h> +#include <linux/module.h> void *kmap(struct page *page) { @@ -87,3 +88,8 @@ struct page *kmap_atomic_to_page(void *ptr) return pte_page(*pte); } +EXPORT_SYMBOL(kmap); +EXPORT_SYMBOL(kunmap); +EXPORT_SYMBOL(kmap_atomic); +EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kmap_atomic_to_page); diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 8766c771bb4..3672e2ef51a 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -191,7 +191,7 @@ static inline int page_kills_ppro(unsigned long pagenr) extern int is_available_memory(efi_memory_desc_t *); -static inline int page_is_ram(unsigned long pagenr) +int page_is_ram(unsigned long pagenr) { int i; unsigned long addr, end; @@ -276,7 +276,9 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) SetPageReserved(page); } -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA +extern void set_highmem_pages_init(int); +#else static void __init set_highmem_pages_init(int bad_ppro) { int pfn; @@ -284,9 +286,7 @@ static void __init set_highmem_pages_init(int bad_ppro) one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } -#else -extern void set_highmem_pages_init(int); -#endif /* !CONFIG_DISCONTIGMEM */ +#endif /* CONFIG_FLATMEM */ #else #define kmap_init() do { } while (0) @@ -295,12 +295,13 @@ extern void set_highmem_pages_init(int); #endif /* CONFIG_HIGHMEM */ unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; +EXPORT_SYMBOL(__PAGE_KERNEL); unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; -#ifndef CONFIG_DISCONTIGMEM -#define remap_numa_kva() do {} while (0) -#else +#ifdef CONFIG_NUMA extern void __init remap_numa_kva(void); +#else +#define remap_numa_kva() do {} while (0) #endif static void __init pagetable_init (void) @@ -525,7 +526,7 @@ static void __init set_max_mapnr_init(void) #else num_physpages = max_low_pfn; #endif -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM max_mapnr = num_physpages; #endif } @@ -539,7 +540,7 @@ void __init mem_init(void) int tmp; int bad_ppro; -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM if (!mem_map) BUG(); #endif diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index ab542792b27..d393eefc705 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -11,6 +11,7 @@ #include <linux/vmalloc.h> #include <linux/init.h> #include <linux/slab.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/fixmap.h> #include <asm/cacheflush.h> @@ -165,7 +166,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } return (void __iomem *) (offset + (char __iomem *)addr); } - +EXPORT_SYMBOL(__ioremap); /** * ioremap_nocache - map bus memory into CPU space @@ -222,6 +223,7 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) return p; } +EXPORT_SYMBOL(ioremap_nocache); void iounmap(volatile void __iomem *addr) { @@ -255,6 +257,7 @@ out_unlock: write_unlock(&vmlist_lock); kfree(p); } +EXPORT_SYMBOL(iounmap); void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) { diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dd81479ff88..270c59f099a 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -30,13 +30,14 @@ void show_mem(void) struct page *page; pg_data_t *pgdat; unsigned long i; + struct page_state ps; printk("Mem-info:\n"); show_free_areas(); printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) highmem++; @@ -53,6 +54,13 @@ void show_mem(void) printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); + + get_page_state(&ps); + printk("%lu pages dirty\n", ps.nr_dirty); + printk("%lu pages writeback\n", ps.nr_writeback); + printk("%lu pages mapped\n", ps.nr_mapped); + printk("%lu pages slab\n", ps.nr_slab); + printk("%lu pages pagetables\n", ps.nr_page_table_pages); } /* diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 52d72e074f7..65dfd2edb67 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -91,7 +91,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) head = (struct frame_head *)regs->ebp; #endif - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { while (depth-- && valid_kernel_stack(head, regs)) head = dump_backtrace(head); return; diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index da21b1d07c1..83458f81e66 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -227,6 +227,24 @@ static int pirq_via_set(struct pci_dev *router, struct pci_dev *dev, int pirq, i } /* + * The VIA pirq rules are nibble-based, like ALI, + * but without the ugly irq number munging. + * However, for 82C586, nibble map is different . + */ +static int pirq_via586_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + return read_config_nybble(router, 0x55, pirqmap[pirq-1]); +} + +static int pirq_via586_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) +{ + static unsigned int pirqmap[4] = { 3, 2, 5, 1 }; + write_config_nybble(router, 0x55, pirqmap[pirq-1], irq); + return 1; +} + +/* * ITE 8330G pirq rules are nibble-based * FIXME: pirqmap may be { 1, 0, 3, 2 }, * 2+3 are both mapped to irq 9 on my system @@ -512,6 +530,10 @@ static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, switch(device) { case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; case PCI_DEVICE_ID_VIA_82C596: case PCI_DEVICE_ID_VIA_82C686: case PCI_DEVICE_ID_VIA_8231: diff --git a/arch/i386/pci/pcbios.c b/arch/i386/pci/pcbios.c index 141421b673b..b9d65f0bc2d 100644 --- a/arch/i386/pci/pcbios.c +++ b/arch/i386/pci/pcbios.c @@ -4,6 +4,7 @@ #include <linux/pci.h> #include <linux/init.h> +#include <linux/module.h> #include "pci.h" #include "pci-functions.h" @@ -456,7 +457,7 @@ struct irq_routing_table * __devinit pcibios_get_irq_routing_table(void) free_page(page); return rt; } - +EXPORT_SYMBOL(pcibios_get_irq_routing_table); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) { @@ -473,6 +474,7 @@ int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq) "S" (&pci_indirect)); return !(ret & 0xff00); } +EXPORT_SYMBOL(pcibios_set_irq_routing); static int __init pci_pcbios_init(void) { diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index cf337c673d9..6f521cf19a1 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -94,13 +94,13 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg[7]){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg[6], 6); + set_debugreg(current->thread.debugreg[7], 7); } } diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index ce4dfa8b834..01b78e7f992 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -161,6 +161,8 @@ config IA64_PAGE_SIZE_64KB endchoice +source kernel/Kconfig.hz + config IA64_BRL_EMU bool depends on ITANIUM @@ -197,7 +199,7 @@ config HOLES_IN_ZONE bool default y if VIRTUAL_MEM_MAP -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous memory support" depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA @@ -300,6 +302,8 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +source "mm/Kconfig" + config HAVE_DEC_LOCK bool depends on (SMP || PREEMPT) diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index de9d507ba0f..fda67ac993d 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -2,6 +2,17 @@ menu "Kernel hacking" source "lib/Kconfig.debug" +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". + + choice prompt "Physical memory granularity" default IA64_GRANULE_64MB diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index a01bb02d074..487d2e36b0a 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -78,7 +78,7 @@ CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_NUMA=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y # CONFIG_IA64_CYCLONE is not set CONFIG_IOSAPIC=y CONFIG_IA64_SGI_SN_SIM=y diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 7be8096e056..8444add7638 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -84,7 +84,7 @@ CONFIG_IA64_L1_CACHE_SHIFT=7 CONFIG_NUMA=y CONFIG_VIRTUAL_MEM_MAP=y CONFIG_HOLES_IN_ZONE=y -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_IA64_CYCLONE=y CONFIG_IOSAPIC=y CONFIG_FORCE_MAX_ZONEORDER=18 diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h index b2de948bdae..e3e9290e3ff 100644 --- a/arch/ia64/ia32/ia32priv.h +++ b/arch/ia64/ia32/ia32priv.h @@ -241,7 +241,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ char _pad[sizeof(unsigned int) - sizeof(int)]; compat_sigval_t _sigval; /* same as below */ diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 4c73d8ba2e3..b2e2f6509eb 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o diff --git a/arch/ia64/kernel/jprobes.S b/arch/ia64/kernel/jprobes.S new file mode 100644 index 00000000000..b7fa3ccd2b0 --- /dev/null +++ b/arch/ia64/kernel/jprobes.S @@ -0,0 +1,61 @@ +/* + * Jprobe specific operations + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-May Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> initial implementation + * + * Jprobes (a.k.a. "jump probes" which is built on-top of kprobes) allow a + * probe to be inserted into the beginning of a function call. The fundamental + * difference between a jprobe and a kprobe is the jprobe handler is executed + * in the same context as the target function, while the kprobe handlers + * are executed in interrupt context. + * + * For jprobes we initially gain control by placing a break point in the + * first instruction of the targeted function. When we catch that specific + * break, we: + * * set the return address to our jprobe_inst_return() function + * * jump to the jprobe handler function + * + * Since we fixed up the return address, the jprobe handler will return to our + * jprobe_inst_return() function, giving us control again. At this point we + * are back in the parents frame marker, so we do yet another call to our + * jprobe_break() function to fix up the frame marker as it would normally + * exist in the target function. + * + * Our jprobe_return function then transfers control back to kprobes.c by + * executing a break instruction using one of our reserved numbers. When we + * catch that break in kprobes.c, we continue like we do for a normal kprobe + * by single stepping the emulated instruction, and then returning execution + * to the correct location. + */ +#include <asm/asmmacro.h> + + /* + * void jprobe_break(void) + */ +ENTRY(jprobe_break) + break.m 0x80300 +END(jprobe_break) + + /* + * void jprobe_inst_return(void) + */ +GLOBAL_ENTRY(jprobe_inst_return) + br.call.sptk.many b0=jprobe_break +END(jprobe_inst_return) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c new file mode 100644 index 00000000000..5978823d5c6 --- /dev/null +++ b/arch/ia64/kernel/kprobes.c @@ -0,0 +1,601 @@ +/* + * Kernel Probes (KProbes) + * arch/ia64/kernel/kprobes.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch <rusty.lynch@intel.com> and Anil S Keshavamurthy + * <anil.s.keshavamurthy@intel.com> adapted from i386 + */ + +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/spinlock.h> +#include <linux/string.h> +#include <linux/slab.h> +#include <linux/preempt.h> +#include <linux/moduleloader.h> + +#include <asm/pgtable.h> +#include <asm/kdebug.h> + +extern void jprobe_inst_return(void); + +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 + +static struct kprobe *current_kprobe, *kprobe_prev; +static unsigned long kprobe_status, kprobe_status_prev; +static struct pt_regs jprobe_saved_regs; + +enum instruction_type {A, I, M, F, B, L, X, u}; +static enum instruction_type bundle_encoding[32][3] = { + { M, I, I }, /* 00 */ + { M, I, I }, /* 01 */ + { M, I, I }, /* 02 */ + { M, I, I }, /* 03 */ + { M, L, X }, /* 04 */ + { M, L, X }, /* 05 */ + { u, u, u }, /* 06 */ + { u, u, u }, /* 07 */ + { M, M, I }, /* 08 */ + { M, M, I }, /* 09 */ + { M, M, I }, /* 0A */ + { M, M, I }, /* 0B */ + { M, F, I }, /* 0C */ + { M, F, I }, /* 0D */ + { M, M, F }, /* 0E */ + { M, M, F }, /* 0F */ + { M, I, B }, /* 10 */ + { M, I, B }, /* 11 */ + { M, B, B }, /* 12 */ + { M, B, B }, /* 13 */ + { u, u, u }, /* 14 */ + { u, u, u }, /* 15 */ + { B, B, B }, /* 16 */ + { B, B, B }, /* 17 */ + { M, M, B }, /* 18 */ + { M, M, B }, /* 19 */ + { u, u, u }, /* 1A */ + { u, u, u }, /* 1B */ + { M, F, B }, /* 1C */ + { M, F, B }, /* 1D */ + { u, u, u }, /* 1E */ + { u, u, u }, /* 1F */ +}; + +/* + * In this function we check to see if the instruction + * is IP relative instruction and update the kprobe + * inst flag accordingly + */ +static void update_kprobe_inst_flag(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + p->ainsn.inst_flag = 0; + p->ainsn.target_br_reg = 0; + + if (bundle_encoding[template][slot] == B) { + switch (major_opcode) { + case INDIRECT_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + case IP_RELATIVE_PREDICT_OPCODE: + case IP_RELATIVE_BRANCH_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + break; + case IP_RELATIVE_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } else if (bundle_encoding[template][slot] == X) { + switch (major_opcode) { + case LONG_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } + return; +} + +/* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns 0 if supported + * Returns -EINVAL if unsupported + */ +static int unsupported_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + + if (bundle_encoding[template][slot] == I) { + switch (major_opcode) { + case 0x0: //I_UNIT_MISC_OPCODE: + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + } + return 0; +} + + +/* + * In this function we check to see if the instruction + * (qp) cmpx.crel.ctype p1,p2=r2,r3 + * on which we are inserting kprobe is cmp instruction + * with ctype as unc. + */ +static uint is_cmp_ctype_unc_inst(uint template, uint slot, uint major_opcode, +unsigned long kprobe_inst) +{ + cmp_inst_t cmp_inst; + uint ctype_unc = 0; + + if (!((bundle_encoding[template][slot] == I) || + (bundle_encoding[template][slot] == M))) + goto out; + + if (!((major_opcode == 0xC) || (major_opcode == 0xD) || + (major_opcode == 0xE))) + goto out; + + cmp_inst.l = kprobe_inst; + if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { + /* Integere compare - Register Register (A6 type)*/ + if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) + &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { + /* Integere compare - Immediate Register (A8 type)*/ + if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } +out: + return ctype_unc; +} + +/* + * In this function we override the bundle with + * the break instruction at the given slot. + */ +static void prepare_break_inst(uint template, uint slot, uint major_opcode, + unsigned long kprobe_inst, struct kprobe *p) +{ + unsigned long break_inst = BREAK_INST; + bundle_t *bundle = &p->ainsn.insn.bundle; + + /* + * Copy the original kprobe_inst qualifying predicate(qp) + * to the break instruction iff !is_cmp_ctype_unc_inst + * because for cmp instruction with ctype equal to unc, + * which is a special instruction always needs to be + * executed regradless of qp + */ + if (!is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) + break_inst |= (0x3f & kprobe_inst); + + switch (slot) { + case 0: + bundle->quad0.slot0 = break_inst; + break; + case 1: + bundle->quad0.slot1_p0 = break_inst; + bundle->quad1.slot1_p1 = break_inst >> (64-46); + break; + case 2: + bundle->quad1.slot2 = break_inst; + break; + } + + /* + * Update the instruction flag, so that we can + * emulate the instruction properly after we + * single step on original instruction + */ + update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); +} + +static inline void get_kprobe_inst(bundle_t *bundle, uint slot, + unsigned long *kprobe_inst, uint *major_opcode) +{ + unsigned long kprobe_inst_p0, kprobe_inst_p1; + unsigned int template; + + template = bundle->quad0.template; + + switch (slot) { + case 0: + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; + case 1: + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + break; + case 2: + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; + break; + } +} + +static int valid_kprobe_addr(int template, int slot, unsigned long addr) +{ + if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { + printk(KERN_WARNING "Attempting to insert unaligned kprobe at 0x%lx\n", + addr); + return -EINVAL; + } + return 0; +} + +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; +} + +static inline void set_current_kprobe(struct kprobe *p) +{ + current_kprobe = p; +} + +int arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long) p->addr; + unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); + unsigned long kprobe_inst=0; + unsigned int slot = addr & 0xf, template, major_opcode = 0; + bundle_t *bundle = &p->ainsn.insn.bundle; + + memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t)); + memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t)); + + template = bundle->quad0.template; + + if(valid_kprobe_addr(template, slot, addr)) + return -EINVAL; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get kprobe_inst and major_opcode from the bundle */ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + if (unsupported_inst(template, slot, major_opcode, kprobe_inst, p)) + return -EINVAL; + + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p); + + return 0; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr; + unsigned long arm_addr = addr & ~0xFULL; + + /* p->opcode contains the original unaltered bundle */ + memcpy((char *) arm_addr, (char *) &p->opcode.bundle, sizeof(bundle_t)); + flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} + +/* + * We are resuming execution after a single step fault, so the pt_regs + * structure reflects the register state after we executed the instruction + * located in the kprobe (p->ainsn.insn.bundle). We still need to adjust + * the ip to point back to the original stack address. To set the IP address + * to original stack address, handle the case where we need to fixup the + * relative IP address and/or fixup branch register. + */ +static void resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = ((unsigned long) (&p->opcode.bundle)) & ~0xFULL; + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); + + template = p->opcode.bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + if (p->ainsn.inst_flag) { + + if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { + /* Fix relative IP address */ + regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr; + } + + if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { + /* + * Fix target branch register, software convention is + * to use either b0 or b6 or b7, so just checking + * only those registers + */ + switch (p->ainsn.target_br_reg) { + case 0: + if ((regs->b0 == bundle_addr) || + (regs->b0 == bundle_addr + 0x10)) { + regs->b0 = (regs->b0 - bundle_addr) + + resume_addr; + } + break; + case 6: + if ((regs->b6 == bundle_addr) || + (regs->b6 == bundle_addr + 0x10)) { + regs->b6 = (regs->b6 - bundle_addr) + + resume_addr; + } + break; + case 7: + if ((regs->b7 == bundle_addr) || + (regs->b7 == bundle_addr + 0x10)) { + regs->b7 = (regs->b7 - bundle_addr) + + resume_addr; + } + break; + } /* end switch */ + } + goto turn_ss_off; + } + + if (slot == 2) { + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } + } + +turn_ss_off: + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; +} + +static void prepare_ss(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) &p->opcode.bundle; + unsigned long slot = (unsigned long)p->addr & 0xf; + + /* Update instruction pointer (IIP) and slot number (IPSR.ri) */ + regs->cr_iip = bundle_addr & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn on single stepping */ + ia64_psr(regs)->ss = 1; +} + +static int pre_kprobes_handler(struct die_args *args) +{ + struct kprobe *p; + int ret = 0; + struct pt_regs *regs = args->regs; + kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + + preempt_disable(); + + /* Handle recursion cases */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if (kprobe_status == KPROBE_HIT_SS) { + unlock_kprobes(); + goto no_kprobe; + } + /* We have reentered the pre_kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p); + p->nmissed++; + prepare_ss(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; + } else if (args->err == __IA64_BREAK_JPROBE) { + /* + * jprobe instrumented function just completed + */ + p = current_kprobe; + if (p->break_handler && p->break_handler(p, regs)) { + goto ss_probe; + } + } else { + /* Not our break */ + goto no_kprobe; + } + } + + lock_kprobes(); + p = get_kprobe(addr); + if (!p) { + unlock_kprobes(); + goto no_kprobe; + } + + kprobe_status = KPROBE_HIT_ACTIVE; + set_current_kprobe(p); + + if (p->pre_handler && p->pre_handler(p, regs)) + /* + * Our pre-handler is specifically requesting that we just + * do a return. This is handling the case where the + * pre-handler is really our special jprobe pre-handler. + */ + return 1; + +ss_probe: + prepare_ss(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static int post_kprobes_handler(struct pt_regs *regs) +{ + if (!kprobe_running()) + return 0; + + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; + current_kprobe->post_handler(current_kprobe, regs, 0); + } + + resume_execution(current_kprobe, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } + + unlock_kprobes(); + +out: + preempt_enable_no_resched(); + return 1; +} + +static int kprobes_fault_handler(struct pt_regs *regs, int trapnr) +{ + if (!kprobe_running()) + return 0; + + if (current_kprobe->fault_handler && + current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + resume_execution(current_kprobe, regs); + unlock_kprobes(); + preempt_enable_no_resched(); + } + + return 0; +} + +int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct die_args *args = (struct die_args *)data; + switch(val) { + case DIE_BREAK: + if (pre_kprobes_handler(args)) + return NOTIFY_STOP; + break; + case DIE_SS: + if (post_kprobes_handler(args->regs)) + return NOTIFY_STOP; + break; + case DIE_PAGE_FAULT: + if (kprobes_fault_handler(args->regs, args->trapnr)) + return NOTIFY_STOP; + default: + break; + } + return NOTIFY_DONE; +} + +int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + + /* save architectural state */ + jprobe_saved_regs = *regs; + + /* after rfi, execute the jprobe instrumented function */ + regs->cr_iip = addr & ~0xFULL; + ia64_psr(regs)->ri = addr & 0xf; + regs->r1 = ((struct fnptr *)(jp->entry))->gp; + + /* + * fix the return address to our jprobe_inst_return() function + * in the jprobes.S file + */ + regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip; + + return 1; +} + +int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + *regs = jprobe_saved_regs; + return 1; +} diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 1861173bd4f..e7e520d90f0 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -21,12 +21,26 @@ #include <asm/intrinsics.h> #include <asm/processor.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern spinlock_t timerlist_lock; fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); +struct notifier_block *ia64die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&ia64die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + void __init trap_init (void) { @@ -137,6 +151,10 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ + if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -189,6 +207,15 @@ ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; + case 0x80200: + case 0x80300: + if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) { + return; + } + sig = SIGTRAP; code = TRAP_BRKPT; + break; + default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -548,7 +575,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: + if (notify_die(DIE_SS, "ss", ®s, vector, + vector, SIGTRAP) == NOTIFY_STOP) + return; + siginfo.si_code = TRAP_TRACE; ifa = 0; break; } siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index c0071092939..f3fd528ead3 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -560,14 +560,15 @@ void show_mem(void) int shared = 0, cached = 0, reserved = 0; printk("Node ID: %d\n", pgdat->node_id); for(i = 0; i < pgdat->node_spanned_pages; i++) { + struct page *page = pgdat_page_nr(pgdat, i); if (!ia64_pfn_valid(pgdat->node_start_pfn+i)) continue; - if (PageReserved(pgdat->node_mem_map+i)) + if (PageReserved(page)) reserved++; - else if (PageSwapCache(pgdat->node_mem_map+i)) + else if (PageSwapCache(page)) cached++; - else if (page_count(pgdat->node_mem_map+i)) - shared += page_count(pgdat->node_mem_map+i)-1; + else if (page_count(page)) + shared += page_count(page)-1; } total_present += present; total_reserved += reserved; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 4174ec999dd..ff62551eb3a 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -14,6 +14,7 @@ #include <asm/processor.h> #include <asm/system.h> #include <asm/uaccess.h> +#include <asm/kdebug.h> extern void die (char *, struct pt_regs *, long); @@ -102,6 +103,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re goto bad_area_no_up; #endif + /* + * This is to handle the kprobes on user space access instructions + */ + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, code, TRAP_BRKPT, + SIGSEGV) == NOTIFY_STOP) + return; + down_read(&mm->mmap_sem); vma = find_vma_prev(mm, address, &prev_vma); diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index 64c133344af..42ca8a39798 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -172,11 +172,13 @@ config NOHIGHMEM bool default y -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Internal RAM Support" depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP default y +source "mm/Kconfig" + config IRAM_START hex "Internal memory start address (hex)" default "00f00000" diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c index bc423d838fb..d9a40b1fe8b 100644 --- a/arch/m32r/mm/init.c +++ b/arch/m32r/mm/init.c @@ -49,7 +49,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) highmem++; @@ -152,7 +152,7 @@ int __init reservedpages_count(void) reservedpages = 0; for_each_online_node(nid) for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) - if (PageReserved(NODE_DATA(nid)->node_mem_map + i)) + if (PageReserved(nid_page_nr(nid, i))) reservedpages++; return reservedpages; diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index d0713c7d9f0..691a2469ff3 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -357,6 +357,8 @@ config 060_WRITETHROUGH is hardwired on. The 53c710 SCSI driver is known to suffer from this problem. +source "mm/Kconfig" + endmenu menu "General setup" diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index e729bd28062..dbfcdc8e608 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -532,6 +532,8 @@ config ROMKERNEL endchoice +source "mm/Kconfig" + endmenu config ISA_DMA_API diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ab9944693f1..94f5a8eb2c2 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -492,7 +492,7 @@ config SGI_SN0_N_MODE which allows for more memory. Your system is most probably running in M-Mode, so you should say N here. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool default y if SGI_IP27 help diff --git a/arch/mips/configs/ip27_defconfig b/arch/mips/configs/ip27_defconfig index 13472292d0e..b5bab3a42fc 100644 --- a/arch/mips/configs/ip27_defconfig +++ b/arch/mips/configs/ip27_defconfig @@ -82,7 +82,7 @@ CONFIG_STOP_MACHINE=y # CONFIG_SGI_IP22 is not set CONFIG_SGI_IP27=y # CONFIG_SGI_SN0_N_MODE is not set -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y # CONFIG_MAPPED_KERNEL is not set # CONFIG_REPLICATE_KTEXT is not set diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index 0a44a98d7ad..a160d04f7db 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -549,9 +549,8 @@ void __init mem_init(void) */ numslots = node_getlastslot(node); for (slot = 1; slot <= numslots; slot++) { - p = NODE_DATA(node)->node_mem_map + - (slot_getbasepfn(node, slot) - - slot_getbasepfn(node, 0)); + p = nid_page_nr(node, slot_getbasepfn(node, slot) - + slot_getbasepfn(node, 0)); /* * Free valid memory in current slot. diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index e7e7c56fc21..ce327c799b4 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -148,7 +148,7 @@ config HOTPLUG_CPU default y if SMP select HOTPLUG -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool "Discontiguous memory support (EXPERIMENTAL)" depends on EXPERIMENTAL help @@ -157,6 +157,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config PREEMPT bool # bool "Preemptible Kernel" diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index cac37589e35..2886ad70db4 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -506,7 +506,7 @@ void show_mem(void) for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { struct page *p; - p = node_mem_map(i) + j - node_start_pfn(i); + p = nid_page_nr(i, j) - node_start_pfn(i); total++; if (PageReserved(p)) diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig index 10162b187bc..848f43970a4 100644 --- a/arch/ppc/Kconfig +++ b/arch/ppc/Kconfig @@ -905,6 +905,8 @@ config PREEMPT config HIGHMEM bool "High memory support" +source "mm/Kconfig" + source "fs/Kconfig.binfmt" config PROC_DEVICETREE diff --git a/arch/ppc/boot/simple/misc.c b/arch/ppc/boot/simple/misc.c index ab0f9902cb6..e02de5b467a 100644 --- a/arch/ppc/boot/simple/misc.c +++ b/arch/ppc/boot/simple/misc.c @@ -222,7 +222,7 @@ decompress_kernel(unsigned long load_addr, int num_words, unsigned long cksum) puts("\n"); puts("Uncompressing Linux..."); - gunzip(0x0, 0x400000, zimage_start, &zimage_size); + gunzip(NULL, 0x400000, zimage_start, &zimage_size); puts("done.\n"); /* get the bi_rec address */ diff --git a/arch/ppc/boot/simple/mpc10x_memory.c b/arch/ppc/boot/simple/mpc10x_memory.c index 977daedc14c..20d92a34ceb 100644 --- a/arch/ppc/boot/simple/mpc10x_memory.c +++ b/arch/ppc/boot/simple/mpc10x_memory.c @@ -33,7 +33,7 @@ #define MPC10X_PCI_OP(rw, size, type, op, mask) \ static void \ -mpc10x_##rw##_config_##size(unsigned int *cfg_addr, \ +mpc10x_##rw##_config_##size(unsigned int __iomem *cfg_addr, \ unsigned int *cfg_data, int devfn, int offset, \ type val) \ { \ diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 0f1fa289744..cb27068bfcd 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -77,6 +77,10 @@ config PPC_PSERIES bool " IBM pSeries & new iSeries" default y +config PPC_BPA + bool " Broadband Processor Architecture" + depends on PPC_MULTIPLATFORM + config PPC_PMAC depends on PPC_MULTIPLATFORM bool " Apple G5 based machines" @@ -106,6 +110,21 @@ config PPC_OF bool default y +config XICS + depends on PPC_PSERIES + bool + default y + +config MPIC + depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE + bool + default y + +config BPA_IIC + depends on PPC_BPA + bool + default y + # VMX is pSeries only for now until somebody writes the iSeries # exception vectors for it config ALTIVEC @@ -198,13 +217,49 @@ config HMT This option enables hardware multithreading on RS64 cpus. pSeries systems p620 and p660 have such a cpu type. -config DISCONTIGMEM - bool "Discontiguous Memory Support" +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y depends on SMP && PPC_PSERIES +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + depends on NEED_MULTIPLE_NODES + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. +# +# This is a relatively temporary hack that should +# be able to go away when sparsemem is fully in +# place +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + config NUMA bool "NUMA support" - depends on DISCONTIGMEM + default y if DISCONTIGMEM || SPARSEMEM config SCHED_SMT bool "SMT (Hyperthreading) scheduler support" @@ -256,7 +311,7 @@ config MSCHUNKS config PPC_RTAS bool - depends on PPC_PSERIES + depends on PPC_PSERIES || PPC_BPA default y config RTAS_PROC diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile index 33c752ceca4..731b8475833 100644 --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -90,12 +90,14 @@ boot := arch/ppc64/boot boottarget-$(CONFIG_PPC_PSERIES) := zImage zImage.initrd boottarget-$(CONFIG_PPC_MAPLE) := zImage zImage.initrd boottarget-$(CONFIG_PPC_ISERIES) := vmlinux.sminitrd vmlinux.initrd vmlinux.sm +boottarget-$(CONFIG_PPC_BPA) := zImage zImage.initrd $(boottarget-y): vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ bootimage-$(CONFIG_PPC_PSERIES) := $(boot)/zImage bootimage-$(CONFIG_PPC_PMAC) := vmlinux bootimage-$(CONFIG_PPC_MAPLE) := $(boot)/zImage +bootimage-$(CONFIG_PPC_BPA) := zImage bootimage-$(CONFIG_PPC_ISERIES) := vmlinux BOOTIMAGE := $(bootimage-y) install: vmlinux diff --git a/arch/ppc64/boot/install.sh b/arch/ppc64/boot/install.sh index 955c5681db6..cb2d6626b55 100644 --- a/arch/ppc64/boot/install.sh +++ b/arch/ppc64/boot/install.sh @@ -22,8 +22,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install diff --git a/arch/ppc64/configs/pSeries_defconfig b/arch/ppc64/configs/pSeries_defconfig index 3eb5ef25d3a..d0db8b5966c 100644 --- a/arch/ppc64/configs/pSeries_defconfig +++ b/arch/ppc64/configs/pSeries_defconfig @@ -88,7 +88,7 @@ CONFIG_IBMVIO=y CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_NR_CPUS=128 -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y CONFIG_NUMA=y CONFIG_SCHED_SMT=y # CONFIG_PREEMPT is not set diff --git a/arch/ppc64/defconfig b/arch/ppc64/defconfig index 2f31bf3046f..b8e2066dde7 100644 --- a/arch/ppc64/defconfig +++ b/arch/ppc64/defconfig @@ -89,7 +89,7 @@ CONFIG_BOOTX_TEXT=y CONFIG_IOMMU_VMERGE=y CONFIG_SMP=y CONFIG_NR_CPUS=32 -CONFIG_DISCONTIGMEM=y +CONFIG_ARCH_DISCONTIGMEM_ENABLE=y # CONFIG_NUMA is not set # CONFIG_SCHED_SMT is not set # CONFIG_PREEMPT is not set diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index b5e167cf1a0..dffbfb7ac8d 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -27,17 +27,21 @@ obj-$(CONFIG_PPC_ISERIES) += HvCall.o HvLpConfig.o LparData.o \ mf.o HvLpEvent.o iSeries_proc.o iSeries_htab.o \ iSeries_iommu.o -obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o mpic.o +obj-$(CONFIG_PPC_MULTIPLATFORM) += nvram.o i8259.o prom_init.o prom.o obj-$(CONFIG_PPC_PSERIES) += pSeries_pci.o pSeries_lpar.o pSeries_hvCall.o \ pSeries_nvram.o rtasd.o ras.o pSeries_reconfig.o \ - xics.o rtas.o pSeries_setup.o pSeries_iommu.o + pSeries_setup.o pSeries_iommu.o + +obj-$(CONFIG_PPC_BPA) += bpa_setup.o bpa_iommu.o bpa_nvram.o \ + bpa_iic.o spider-pic.o obj-$(CONFIG_EEH) += eeh.o obj-$(CONFIG_PROC_FS) += proc_ppc64.o obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o +obj-$(CONFIG_PPC_RTAS) += rtas.o rtas_pci.o obj-$(CONFIG_RTAS_PROC) += rtas-proc.o obj-$(CONFIG_SCANLOG) += scanlog.o obj-$(CONFIG_VIOPATH) += viopath.o @@ -46,6 +50,8 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_IBMVIO) += vio.o +obj-$(CONFIG_XICS) += xics.o +obj-$(CONFIG_MPIC) += mpic.o obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ pmac_time.o pmac_nvram.o pmac_low_i2c.o @@ -58,6 +64,7 @@ ifdef CONFIG_SMP obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o obj-$(CONFIG_PPC_ISERIES) += iSeries_smp.o obj-$(CONFIG_PPC_PSERIES) += pSeries_smp.o +obj-$(CONFIG_PPC_BPA) += pSeries_smp.o obj-$(CONFIG_PPC_MAPLE) += smp-tbsync.o endif diff --git a/arch/ppc64/kernel/bpa_iic.c b/arch/ppc64/kernel/bpa_iic.c new file mode 100644 index 00000000000..c8f3dc3fad7 --- /dev/null +++ b/arch/ppc64/kernel/bpa_iic.c @@ -0,0 +1,270 @@ +/* + * BPA Internal Interrupt Controller + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/config.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/percpu.h> +#include <linux/types.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/ptrace.h> + +#include "bpa_iic.h" + +struct iic_pending_bits { + u32 data; + u8 flags; + u8 class; + u8 source; + u8 prio; +}; + +enum iic_pending_flags { + IIC_VALID = 0x80, + IIC_IPI = 0x40, +}; + +struct iic_regs { + struct iic_pending_bits pending; + struct iic_pending_bits pending_destr; + u64 generate; + u64 prio; +}; + +struct iic { + struct iic_regs __iomem *regs; +}; + +static DEFINE_PER_CPU(struct iic, iic); + +void iic_local_enable(void) +{ + out_be64(&__get_cpu_var(iic).regs->prio, 0xff); +} + +void iic_local_disable(void) +{ + out_be64(&__get_cpu_var(iic).regs->prio, 0x0); +} + +static unsigned int iic_startup(unsigned int irq) +{ + return 0; +} + +static void iic_enable(unsigned int irq) +{ + iic_local_enable(); +} + +static void iic_disable(unsigned int irq) +{ +} + +static void iic_end(unsigned int irq) +{ + iic_local_enable(); +} + +static struct hw_interrupt_type iic_pic = { + .typename = " BPA-IIC ", + .startup = iic_startup, + .enable = iic_enable, + .disable = iic_disable, + .end = iic_end, +}; + +static int iic_external_get_irq(struct iic_pending_bits pending) +{ + int irq; + unsigned char node, unit; + + node = pending.source >> 4; + unit = pending.source & 0xf; + irq = -1; + + /* + * This mapping is specific to the Broadband + * Engine. We might need to get the numbers + * from the device tree to support future CPUs. + */ + switch (unit) { + case 0x00: + case 0x0b: + /* + * One of these units can be connected + * to an external interrupt controller. + */ + if (pending.prio > 0x3f || + pending.class != 2) + break; + irq = IIC_EXT_OFFSET + + spider_get_irq(pending.prio + node * IIC_NODE_STRIDE) + + node * IIC_NODE_STRIDE; + break; + case 0x01 ... 0x04: + case 0x07 ... 0x0a: + /* + * These units are connected to the SPEs + */ + if (pending.class > 2) + break; + irq = IIC_SPE_OFFSET + + pending.class * IIC_CLASS_STRIDE + + node * IIC_NODE_STRIDE + + unit; + break; + } + if (irq == -1) + printk(KERN_WARNING "Unexpected interrupt class %02x, " + "source %02x, prio %02x, cpu %02x\n", pending.class, + pending.source, pending.prio, smp_processor_id()); + return irq; +} + +/* Get an IRQ number from the pending state register of the IIC */ +int iic_get_irq(struct pt_regs *regs) +{ + struct iic *iic; + int irq; + struct iic_pending_bits pending; + + iic = &__get_cpu_var(iic); + *(unsigned long *) &pending = + in_be64((unsigned long __iomem *) &iic->regs->pending_destr); + + irq = -1; + if (pending.flags & IIC_VALID) { + if (pending.flags & IIC_IPI) { + irq = IIC_IPI_OFFSET + (pending.prio >> 4); +/* + if (irq > 0x80) + printk(KERN_WARNING "Unexpected IPI prio %02x" + "on CPU %02x\n", pending.prio, + smp_processor_id()); +*/ + } else { + irq = iic_external_get_irq(pending); + } + } + return irq; +} + +static struct iic_regs __iomem *find_iic(int cpu) +{ + struct device_node *np; + int nodeid = cpu / 2; + unsigned long regs; + struct iic_regs __iomem *iic_regs; + + for (np = of_find_node_by_type(NULL, "cpu"); + np; + np = of_find_node_by_type(np, "cpu")) { + if (nodeid == *(int *)get_property(np, "node-id", NULL)) + break; + } + + if (!np) { + printk(KERN_WARNING "IIC: CPU %d not found\n", cpu); + iic_regs = NULL; + } else { + regs = *(long *)get_property(np, "iic", NULL); + + /* hack until we have decided on the devtree info */ + regs += 0x400; + if (cpu & 1) + regs += 0x20; + + printk(KERN_DEBUG "IIC for CPU %d at %lx\n", cpu, regs); + iic_regs = __ioremap(regs, sizeof(struct iic_regs), + _PAGE_NO_CACHE); + } + return iic_regs; +} + +#ifdef CONFIG_SMP +void iic_setup_cpu(void) +{ + out_be64(&__get_cpu_var(iic).regs->prio, 0xff); +} + +void iic_cause_IPI(int cpu, int mesg) +{ + out_be64(&per_cpu(iic, cpu).regs->generate, mesg); +} + +static irqreturn_t iic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + + smp_message_recv(irq - IIC_IPI_OFFSET, regs); + return IRQ_HANDLED; +} + +static void iic_request_ipi(int irq, const char *name) +{ + /* IPIs are marked SA_INTERRUPT as they must run with irqs + * disabled */ + get_irq_desc(irq)->handler = &iic_pic; + get_irq_desc(irq)->status |= IRQ_PER_CPU; + request_irq(irq, iic_ipi_action, SA_INTERRUPT, name, NULL); +} + +void iic_request_IPIs(void) +{ + iic_request_ipi(IIC_IPI_OFFSET + PPC_MSG_CALL_FUNCTION, "IPI-call"); + iic_request_ipi(IIC_IPI_OFFSET + PPC_MSG_RESCHEDULE, "IPI-resched"); +#ifdef CONFIG_DEBUGGER + iic_request_ipi(IIC_IPI_OFFSET + PPC_MSG_DEBUGGER_BREAK, "IPI-debug"); +#endif /* CONFIG_DEBUGGER */ +} +#endif /* CONFIG_SMP */ + +static void iic_setup_spe_handlers(void) +{ + int be, isrc; + + /* Assume two threads per BE are present */ + for (be=0; be < num_present_cpus() / 2; be++) { + for (isrc = 0; isrc < IIC_CLASS_STRIDE * 3; isrc++) { + int irq = IIC_NODE_STRIDE * be + IIC_SPE_OFFSET + isrc; + get_irq_desc(irq)->handler = &iic_pic; + } + } +} + +void iic_init_IRQ(void) +{ + int cpu, irq_offset; + struct iic *iic; + + irq_offset = 0; + for_each_cpu(cpu) { + iic = &per_cpu(iic, cpu); + iic->regs = find_iic(cpu); + if (iic->regs) + out_be64(&iic->regs->prio, 0xff); + } + iic_setup_spe_handlers(); +} diff --git a/arch/ppc64/kernel/bpa_iic.h b/arch/ppc64/kernel/bpa_iic.h new file mode 100644 index 00000000000..6833c302216 --- /dev/null +++ b/arch/ppc64/kernel/bpa_iic.h @@ -0,0 +1,62 @@ +#ifndef ASM_BPA_IIC_H +#define ASM_BPA_IIC_H +#ifdef __KERNEL__ +/* + * Mapping of IIC pending bits into per-node + * interrupt numbers. + * + * IRQ FF CC SS PP FF CC SS PP Description + * + * 00-3f 80 02 +0 00 - 80 02 +0 3f South Bridge + * 00-3f 80 02 +b 00 - 80 02 +b 3f South Bridge + * 41-4a 80 00 +1 ** - 80 00 +a ** SPU Class 0 + * 51-5a 80 01 +1 ** - 80 01 +a ** SPU Class 1 + * 61-6a 80 02 +1 ** - 80 02 +a ** SPU Class 2 + * 70-7f C0 ** ** 00 - C0 ** ** 0f IPI + * + * F flags + * C class + * S source + * P Priority + * + node number + * * don't care + * + * A node consists of a Broadband Engine and an optional + * south bridge device providing a maximum of 64 IRQs. + * The south bridge may be connected to either IOIF0 + * or IOIF1. + * Each SPE is represented as three IRQ lines, one per + * interrupt class. + * 16 IRQ numbers are reserved for inter processor + * interruptions, although these are only used in the + * range of the first node. + * + * This scheme needs 128 IRQ numbers per BIF node ID, + * which means that with the total of 512 lines + * available, we can have a maximum of four nodes. + */ + +enum { + IIC_EXT_OFFSET = 0x00, /* Start of south bridge IRQs */ + IIC_NUM_EXT = 0x40, /* Number of south bridge IRQs */ + IIC_SPE_OFFSET = 0x40, /* Start of SPE interrupts */ + IIC_CLASS_STRIDE = 0x10, /* SPE IRQs per class */ + IIC_IPI_OFFSET = 0x70, /* Start of IPI IRQs */ + IIC_NUM_IPIS = 0x10, /* IRQs reserved for IPI */ + IIC_NODE_STRIDE = 0x80, /* Total IRQs per node */ +}; + +extern void iic_init_IRQ(void); +extern int iic_get_irq(struct pt_regs *regs); +extern void iic_cause_IPI(int cpu, int mesg); +extern void iic_request_IPIs(void); +extern void iic_setup_cpu(void); +extern void iic_local_enable(void); +extern void iic_local_disable(void); + + +extern void spider_init_IRQ(void); +extern int spider_get_irq(unsigned long int_pending); + +#endif +#endif /* ASM_BPA_IIC_H */ diff --git a/arch/ppc64/kernel/bpa_iommu.c b/arch/ppc64/kernel/bpa_iommu.c new file mode 100644 index 00000000000..f33a7bccb0d --- /dev/null +++ b/arch/ppc64/kernel/bpa_iommu.c @@ -0,0 +1,377 @@ +/* + * IOMMU implementation for Broadband Processor Architecture + * We just establish a linear mapping at boot by setting all the + * IOPT cache entries in the CPU. + * The mapping functions should be identical to pci_direct_iommu, + * except for the handling of the high order bit that is required + * by the Spider bridge. These should be split into a separate + * file at the point where we get a different bridge chip. + * + * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH, + * Arnd Bergmann <arndb@de.ibm.com> + * + * Based on linear mapping + * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include <asm/sections.h> +#include <asm/iommu.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/abs_addr.h> +#include <asm/system.h> + +#include "pci.h" +#include "bpa_iommu.h" + +static inline unsigned long +get_iopt_entry(unsigned long real_address, unsigned long ioid, + unsigned long prot) +{ + return (prot & IOPT_PROT_MASK) + | (IOPT_COHERENT) + | (IOPT_ORDER_VC) + | (real_address & IOPT_RPN_MASK) + | (ioid & IOPT_IOID_MASK); +} + +typedef struct { + unsigned long val; +} ioste; + +static inline ioste +mk_ioste(unsigned long val) +{ + ioste ioste = { .val = val, }; + return ioste; +} + +static inline ioste +get_iost_entry(unsigned long iopt_base, unsigned long io_address, unsigned page_size) +{ + unsigned long ps; + unsigned long iostep; + unsigned long nnpt; + unsigned long shift; + + switch (page_size) { + case 0x1000000: + ps = IOST_PS_16M; + nnpt = 0; /* one page per segment */ + shift = 5; /* segment has 16 iopt entries */ + break; + + case 0x100000: + ps = IOST_PS_1M; + nnpt = 0; /* one page per segment */ + shift = 1; /* segment has 256 iopt entries */ + break; + + case 0x10000: + ps = IOST_PS_64K; + nnpt = 0x07; /* 8 pages per io page table */ + shift = 0; /* all entries are used */ + break; + + case 0x1000: + ps = IOST_PS_4K; + nnpt = 0x7f; /* 128 pages per io page table */ + shift = 0; /* all entries are used */ + break; + + default: /* not a known compile time constant */ + BUILD_BUG_ON(1); + break; + } + + iostep = iopt_base + + /* need 8 bytes per iopte */ + (((io_address / page_size * 8) + /* align io page tables on 4k page boundaries */ + << shift) + /* nnpt+1 pages go into each iopt */ + & ~(nnpt << 12)); + + nnpt++; /* this seems to work, but the documentation is not clear + about wether we put nnpt or nnpt-1 into the ioste bits. + In theory, this can't work for 4k pages. */ + return mk_ioste(IOST_VALID_MASK + | (iostep & IOST_PT_BASE_MASK) + | ((nnpt << 5) & IOST_NNPT_MASK) + | (ps & IOST_PS_MASK)); +} + +/* compute the address of an io pte */ +static inline unsigned long +get_ioptep(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopt_base; + unsigned long page_size; + unsigned long page_number; + unsigned long iopt_offset; + + iopt_base = iost_entry.val & IOST_PT_BASE_MASK; + page_size = iost_entry.val & IOST_PS_MASK; + + /* decode page size to compute page number */ + page_number = (io_address & 0x0fffffff) >> (10 + 2 * page_size); + /* page number is an offset into the io page table */ + iopt_offset = (page_number << 3) & 0x7fff8ul; + return iopt_base + iopt_offset; +} + +/* compute the tag field of the iopt cache entry */ +static inline unsigned long +get_ioc_tag(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopte = get_ioptep(iost_entry, io_address); + + return IOPT_VALID_MASK + | ((iopte & 0x00000000000000ff8ul) >> 3) + | ((iopte & 0x0000003fffffc0000ul) >> 9); +} + +/* compute the hashed 6 bit index for the 4-way associative pte cache */ +static inline unsigned long +get_ioc_hash(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopte = get_ioptep(iost_entry, io_address); + + return ((iopte & 0x000000000000001f8ul) >> 3) + ^ ((iopte & 0x00000000000020000ul) >> 17) + ^ ((iopte & 0x00000000000010000ul) >> 15) + ^ ((iopte & 0x00000000000008000ul) >> 13) + ^ ((iopte & 0x00000000000004000ul) >> 11) + ^ ((iopte & 0x00000000000002000ul) >> 9) + ^ ((iopte & 0x00000000000001000ul) >> 7); +} + +/* same as above, but pretend that we have a simpler 1-way associative + pte cache with an 8 bit index */ +static inline unsigned long +get_ioc_hash_1way(ioste iost_entry, unsigned long io_address) +{ + unsigned long iopte = get_ioptep(iost_entry, io_address); + + return ((iopte & 0x000000000000001f8ul) >> 3) + ^ ((iopte & 0x00000000000020000ul) >> 17) + ^ ((iopte & 0x00000000000010000ul) >> 15) + ^ ((iopte & 0x00000000000008000ul) >> 13) + ^ ((iopte & 0x00000000000004000ul) >> 11) + ^ ((iopte & 0x00000000000002000ul) >> 9) + ^ ((iopte & 0x00000000000001000ul) >> 7) + ^ ((iopte & 0x0000000000000c000ul) >> 8); +} + +static inline ioste +get_iost_cache(void __iomem *base, unsigned long index) +{ + unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); + return mk_ioste(in_be64(&p[index])); +} + +static inline void +set_iost_cache(void __iomem *base, unsigned long index, ioste ste) +{ + unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); + pr_debug("ioste %02lx was %016lx, store %016lx", index, + get_iost_cache(base, index).val, ste.val); + out_be64(&p[index], ste.val); + pr_debug(" now %016lx\n", get_iost_cache(base, index).val); +} + +static inline unsigned long +get_iopt_cache(void __iomem *base, unsigned long index, unsigned long *tag) +{ + unsigned long __iomem *tags = (void *)(base + IOC_PT_CACHE_DIR); + unsigned long __iomem *p = (void *)(base + IOC_PT_CACHE_REG); + + *tag = tags[index]; + rmb(); + return *p; +} + +static inline void +set_iopt_cache(void __iomem *base, unsigned long index, + unsigned long tag, unsigned long val) +{ + unsigned long __iomem *tags = base + IOC_PT_CACHE_DIR; + unsigned long __iomem *p = base + IOC_PT_CACHE_REG; + pr_debug("iopt %02lx was v%016lx/t%016lx, store v%016lx/t%016lx\n", + index, get_iopt_cache(base, index, &oldtag), oldtag, val, tag); + + out_be64(p, val); + out_be64(&tags[index], tag); +} + +static inline void +set_iost_origin(void __iomem *base) +{ + unsigned long __iomem *p = base + IOC_ST_ORIGIN; + unsigned long origin = IOSTO_ENABLE | IOSTO_SW; + + pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p), origin); + out_be64(p, origin); +} + +static inline void +set_iocmd_config(void __iomem *base) +{ + unsigned long __iomem *p = base + 0xc00; + unsigned long conf; + + conf = in_be64(p); + pr_debug("iost_conf %016lx, now %016lx\n", conf, conf | IOCMD_CONF_TE); + out_be64(p, conf | IOCMD_CONF_TE); +} + +/* FIXME: get these from the device tree */ +#define ioc_base 0x20000511000ull +#define ioc_mmio_base 0x20000510000ull +#define ioid 0x48a +#define iopt_phys_offset (- 0x20000000) /* We have a 512MB offset from the SB */ +#define io_page_size 0x1000000 + +static unsigned long map_iopt_entry(unsigned long address) +{ + switch (address >> 20) { + case 0x600: + address = 0x24020000000ull; /* spider i/o */ + break; + default: + address += iopt_phys_offset; + break; + } + + return get_iopt_entry(address, ioid, IOPT_PROT_RW); +} + +static void iommu_bus_setup_null(struct pci_bus *b) { } +static void iommu_dev_setup_null(struct pci_dev *d) { } + +/* initialize the iommu to support a simple linear mapping + * for each DMA window used by any device. For now, we + * happen to know that there is only one DMA window in use, + * starting at iopt_phys_offset. */ +static void bpa_map_iommu(void) +{ + unsigned long address; + void __iomem *base; + ioste ioste; + unsigned long index; + + base = __ioremap(ioc_base, 0x1000, _PAGE_NO_CACHE); + pr_debug("%lx mapped to %p\n", ioc_base, base); + set_iocmd_config(base); + iounmap(base); + + base = __ioremap(ioc_mmio_base, 0x1000, _PAGE_NO_CACHE); + pr_debug("%lx mapped to %p\n", ioc_mmio_base, base); + + set_iost_origin(base); + + for (address = 0; address < 0x100000000ul; address += io_page_size) { + ioste = get_iost_entry(0x10000000000ul, address, io_page_size); + if ((address & 0xfffffff) == 0) /* segment start */ + set_iost_cache(base, address >> 28, ioste); + index = get_ioc_hash_1way(ioste, address); + pr_debug("addr %08lx, index %02lx, ioste %016lx\n", + address, index, ioste.val); + set_iopt_cache(base, + get_ioc_hash_1way(ioste, address), + get_ioc_tag(ioste, address), + map_iopt_entry(address)); + } + iounmap(base); +} + + +static void *bpa_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, unsigned int __nocast flag) +{ + void *ret; + + ret = (void *)__get_free_pages(flag, get_order(size)); + if (ret != NULL) { + memset(ret, 0, size); + *dma_handle = virt_to_abs(ret) | BPA_DMA_VALID; + } + return ret; +} + +static void bpa_free_coherent(struct device *hwdev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + free_pages((unsigned long)vaddr, get_order(size)); +} + +static dma_addr_t bpa_map_single(struct device *hwdev, void *ptr, + size_t size, enum dma_data_direction direction) +{ + return virt_to_abs(ptr) | BPA_DMA_VALID; +} + +static void bpa_unmap_single(struct device *hwdev, dma_addr_t dma_addr, + size_t size, enum dma_data_direction direction) +{ +} + +static int bpa_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ + int i; + + for (i = 0; i < nents; i++, sg++) { + sg->dma_address = (page_to_phys(sg->page) + sg->offset) + | BPA_DMA_VALID; + sg->dma_length = sg->length; + } + + return nents; +} + +static void bpa_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, enum dma_data_direction direction) +{ +} + +static int bpa_dma_supported(struct device *dev, u64 mask) +{ + return mask < 0x100000000ull; +} + +void bpa_init_iommu(void) +{ + bpa_map_iommu(); + + /* Direct I/O, IOMMU off */ + ppc_md.iommu_dev_setup = iommu_dev_setup_null; + ppc_md.iommu_bus_setup = iommu_bus_setup_null; + + pci_dma_ops.alloc_coherent = bpa_alloc_coherent; + pci_dma_ops.free_coherent = bpa_free_coherent; + pci_dma_ops.map_single = bpa_map_single; + pci_dma_ops.unmap_single = bpa_unmap_single; + pci_dma_ops.map_sg = bpa_map_sg; + pci_dma_ops.unmap_sg = bpa_unmap_sg; + pci_dma_ops.dma_supported = bpa_dma_supported; +} diff --git a/arch/ppc64/kernel/bpa_iommu.h b/arch/ppc64/kernel/bpa_iommu.h new file mode 100644 index 00000000000..e547d77dfa0 --- /dev/null +++ b/arch/ppc64/kernel/bpa_iommu.h @@ -0,0 +1,65 @@ +#ifndef BPA_IOMMU_H +#define BPA_IOMMU_H + +/* some constants */ +enum { + /* segment table entries */ + IOST_VALID_MASK = 0x8000000000000000ul, + IOST_TAG_MASK = 0x3000000000000000ul, + IOST_PT_BASE_MASK = 0x000003fffffff000ul, + IOST_NNPT_MASK = 0x0000000000000fe0ul, + IOST_PS_MASK = 0x000000000000000ful, + + IOST_PS_4K = 0x1, + IOST_PS_64K = 0x3, + IOST_PS_1M = 0x5, + IOST_PS_16M = 0x7, + + /* iopt tag register */ + IOPT_VALID_MASK = 0x0000000200000000ul, + IOPT_TAG_MASK = 0x00000001fffffffful, + + /* iopt cache register */ + IOPT_PROT_MASK = 0xc000000000000000ul, + IOPT_PROT_NONE = 0x0000000000000000ul, + IOPT_PROT_READ = 0x4000000000000000ul, + IOPT_PROT_WRITE = 0x8000000000000000ul, + IOPT_PROT_RW = 0xc000000000000000ul, + IOPT_COHERENT = 0x2000000000000000ul, + + IOPT_ORDER_MASK = 0x1800000000000000ul, + /* order access to same IOID/VC on same address */ + IOPT_ORDER_ADDR = 0x0800000000000000ul, + /* similar, but only after a write access */ + IOPT_ORDER_WRITES = 0x1000000000000000ul, + /* Order all accesses to same IOID/VC */ + IOPT_ORDER_VC = 0x1800000000000000ul, + + IOPT_RPN_MASK = 0x000003fffffff000ul, + IOPT_HINT_MASK = 0x0000000000000800ul, + IOPT_IOID_MASK = 0x00000000000007fful, + + IOSTO_ENABLE = 0x8000000000000000ul, + IOSTO_ORIGIN = 0x000003fffffff000ul, + IOSTO_HW = 0x0000000000000800ul, + IOSTO_SW = 0x0000000000000400ul, + + IOCMD_CONF_TE = 0x0000800000000000ul, + + /* memory mapped registers */ + IOC_PT_CACHE_DIR = 0x000, + IOC_ST_CACHE_DIR = 0x800, + IOC_PT_CACHE_REG = 0x910, + IOC_ST_ORIGIN = 0x918, + IOC_CONF = 0x930, + + /* The high bit needs to be set on every DMA address, + only 2GB are addressable */ + BPA_DMA_VALID = 0x80000000, + BPA_DMA_MASK = 0x7fffffff, +}; + + +void bpa_init_iommu(void); + +#endif diff --git a/arch/ppc64/kernel/bpa_nvram.c b/arch/ppc64/kernel/bpa_nvram.c new file mode 100644 index 00000000000..06a119cfceb --- /dev/null +++ b/arch/ppc64/kernel/bpa_nvram.c @@ -0,0 +1,118 @@ +/* + * NVRAM for CPBW + * + * (C) Copyright IBM Corp. 2005 + * + * Authors : Utz Bacher <utz.bacher@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/types.h> + +#include <asm/machdep.h> +#include <asm/nvram.h> +#include <asm/prom.h> + +static void __iomem *bpa_nvram_start; +static long bpa_nvram_len; +static spinlock_t bpa_nvram_lock = SPIN_LOCK_UNLOCKED; + +static ssize_t bpa_nvram_read(char *buf, size_t count, loff_t *index) +{ + unsigned long flags; + + if (*index >= bpa_nvram_len) + return 0; + if (*index + count > bpa_nvram_len) + count = bpa_nvram_len - *index; + + spin_lock_irqsave(&bpa_nvram_lock, flags); + + memcpy_fromio(buf, bpa_nvram_start + *index, count); + + spin_unlock_irqrestore(&bpa_nvram_lock, flags); + + *index += count; + return count; +} + +static ssize_t bpa_nvram_write(char *buf, size_t count, loff_t *index) +{ + unsigned long flags; + + if (*index >= bpa_nvram_len) + return 0; + if (*index + count > bpa_nvram_len) + count = bpa_nvram_len - *index; + + spin_lock_irqsave(&bpa_nvram_lock, flags); + + memcpy_toio(bpa_nvram_start + *index, buf, count); + + spin_unlock_irqrestore(&bpa_nvram_lock, flags); + + *index += count; + return count; +} + +static ssize_t bpa_nvram_get_size(void) +{ + return bpa_nvram_len; +} + +int __init bpa_nvram_init(void) +{ + struct device_node *nvram_node; + unsigned long *buffer; + int proplen; + unsigned long nvram_addr; + int ret; + + ret = -ENODEV; + nvram_node = of_find_node_by_type(NULL, "nvram"); + if (!nvram_node) + goto out; + + ret = -EIO; + buffer = (unsigned long *)get_property(nvram_node, "reg", &proplen); + if (proplen != 2*sizeof(unsigned long)) + goto out; + + ret = -ENODEV; + nvram_addr = buffer[0]; + bpa_nvram_len = buffer[1]; + if ( (!bpa_nvram_len) || (!nvram_addr) ) + goto out; + + bpa_nvram_start = ioremap(nvram_addr, bpa_nvram_len); + if (!bpa_nvram_start) + goto out; + + printk(KERN_INFO "BPA NVRAM, %luk mapped to %p\n", + bpa_nvram_len >> 10, bpa_nvram_start); + + ppc_md.nvram_read = bpa_nvram_read; + ppc_md.nvram_write = bpa_nvram_write; + ppc_md.nvram_size = bpa_nvram_get_size; + +out: + of_node_put(nvram_node); + return ret; +} diff --git a/arch/ppc64/kernel/bpa_setup.c b/arch/ppc64/kernel/bpa_setup.c new file mode 100644 index 00000000000..57b3db66f45 --- /dev/null +++ b/arch/ppc64/kernel/bpa_setup.c @@ -0,0 +1,140 @@ +/* + * linux/arch/ppc/kernel/bpa_setup.c + * + * Copyright (C) 1995 Linus Torvalds + * Adapted from 'alpha' version by Gary Thomas + * Modified by Cort Dougan (cort@cs.nmt.edu) + * Modified by PPC64 Team, IBM Corp + * Modified by BPA Team, IBM Deutschland Entwicklung GmbH + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#undef DEBUG + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/reboot.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/console.h> + +#include <asm/mmu.h> +#include <asm/processor.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/rtas.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/dma.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/nvram.h> +#include <asm/cputable.h> + +#include "pci.h" +#include "bpa_iic.h" +#include "bpa_iommu.h" + +#ifdef DEBUG +#define DBG(fmt...) udbg_printf(fmt) +#else +#define DBG(fmt...) +#endif + +void bpa_get_cpuinfo(struct seq_file *m) +{ + struct device_node *root; + const char *model = ""; + + root = of_find_node_by_path("/"); + if (root) + model = get_property(root, "model", NULL); + seq_printf(m, "machine\t\t: BPA %s\n", model); + of_node_put(root); +} + +static void bpa_progress(char *s, unsigned short hex) +{ + printk("*** %04x : %s\n", hex, s ? s : ""); +} + +static void __init bpa_setup_arch(void) +{ + ppc_md.init_IRQ = iic_init_IRQ; + ppc_md.get_irq = iic_get_irq; + +#ifdef CONFIG_SMP + smp_init_pSeries(); +#endif + + /* init to some ~sane value until calibrate_delay() runs */ + loops_per_jiffy = 50000000; + + if (ROOT_DEV == 0) { + printk("No ramdisk, default root is /dev/hda2\n"); + ROOT_DEV = Root_HDA2; + } + + /* Find and initialize PCI host bridges */ + init_pci_config_tokens(); + find_and_init_phbs(); + spider_init_IRQ(); +#ifdef CONFIG_DUMMY_CONSOLE + conswitchp = &dummy_con; +#endif + + bpa_nvram_init(); +} + +/* + * Early initialization. Relocation is on but do not reference unbolted pages + */ +static void __init bpa_init_early(void) +{ + DBG(" -> bpa_init_early()\n"); + + hpte_init_native(); + + bpa_init_iommu(); + + ppc64_interrupt_controller = IC_BPA_IIC; + + DBG(" <- bpa_init_early()\n"); +} + + +static int __init bpa_probe(int platform) +{ + if (platform != PLATFORM_BPA) + return 0; + + return 1; +} + +struct machdep_calls __initdata bpa_md = { + .probe = bpa_probe, + .setup_arch = bpa_setup_arch, + .init_early = bpa_init_early, + .get_cpuinfo = bpa_get_cpuinfo, + .restart = rtas_restart, + .power_off = rtas_power_off, + .halt = rtas_halt, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = bpa_progress, +}; diff --git a/arch/ppc64/kernel/cpu_setup_power4.S b/arch/ppc64/kernel/cpu_setup_power4.S index 3bd95182085..42fc08cf87a 100644 --- a/arch/ppc64/kernel/cpu_setup_power4.S +++ b/arch/ppc64/kernel/cpu_setup_power4.S @@ -73,7 +73,21 @@ _GLOBAL(__970_cpu_preinit) _GLOBAL(__setup_cpu_power4) blr - + +_GLOBAL(__setup_cpu_be) + /* Set large page sizes LP=0: 16MB, LP=1: 64KB */ + addi r3, 0, 0 + ori r3, r3, HID6_LB + sldi r3, r3, 32 + nor r3, r3, r3 + mfspr r4, SPRN_HID6 + and r4, r4, r3 + addi r3, 0, 0x02000 + sldi r3, r3, 32 + or r4, r4, r3 + mtspr SPRN_HID6, r4 + blr + _GLOBAL(__setup_cpu_ppc970) mfspr r0,SPRN_HID0 li r11,5 /* clear DOZE and SLEEP */ diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 8644a864805..1d162c7c59d 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -34,6 +34,7 @@ EXPORT_SYMBOL(cur_cpu_spec); extern void __setup_cpu_power3(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_power4(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); +extern void __setup_cpu_be(unsigned long offset, struct cpu_spec* spec); /* We only set the altivec features if the kernel was compiled with altivec @@ -162,6 +163,16 @@ struct cpu_spec cpu_specs[] = { __setup_cpu_power4, COMMON_PPC64_FW }, + { /* BE DD1.x */ + 0xffff0000, 0x00700000, "Broadband Engine", + CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | + CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | + CPU_FTR_SMT, + COMMON_USER_PPC64 | PPC_FEATURE_HAS_ALTIVEC_COMP, + 128, 128, + __setup_cpu_be, + COMMON_PPC64_FW + }, { /* default match */ 0x00000000, 0x00000000, "POWER4 (compatible)", CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b31962436fe..86966ce76b5 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -671,9 +671,6 @@ static void __init iSeries_bolt_kernel(unsigned long saddr, unsigned long eaddr) } } -extern unsigned long ppc_proc_freq; -extern unsigned long ppc_tb_freq; - /* * Document me. */ @@ -772,8 +769,6 @@ static void iSeries_halt(void) mf_power_off(); } -extern void setup_default_decr(void); - /* * void __init iSeries_calibrate_decr() * diff --git a/arch/ppc64/kernel/irq.c b/arch/ppc64/kernel/irq.c index d860467b8f0..3defc8c33ad 100644 --- a/arch/ppc64/kernel/irq.c +++ b/arch/ppc64/kernel/irq.c @@ -395,6 +395,9 @@ int virt_irq_create_mapping(unsigned int real_irq) if (ppc64_interrupt_controller == IC_OPEN_PIC) return real_irq; /* no mapping for openpic (for now) */ + if (ppc64_interrupt_controller == IC_BPA_IIC) + return real_irq; /* no mapping for iic either */ + /* don't map interrupts < MIN_VIRT_IRQ */ if (real_irq < MIN_VIRT_IRQ) { virt_irq_to_real_map[real_irq] = real_irq; diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c index e950a2058a1..782ce3efa2c 100644 --- a/arch/ppc64/kernel/kprobes.c +++ b/arch/ppc64/kernel/kprobes.c @@ -32,15 +32,14 @@ #include <linux/ptrace.h> #include <linux/spinlock.h> #include <linux/preempt.h> +#include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/sstep.h> -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_saved_msr; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_saved_msr_prev; static struct pt_regs jprobe_saved_regs; int arch_prepare_kprobe(struct kprobe *p) @@ -61,16 +60,25 @@ int arch_prepare_kprobe(struct kprobe *p) void arch_copy_kprobe(struct kprobe *p) { memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +void arch_disarm_kprobe(struct kprobe *p) { *p->addr = p->opcode; - regs->nip = (unsigned long)p->addr; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_remove_kprobe(struct kprobe *p) +{ } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -83,6 +91,20 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->nip = (unsigned long)&p->ainsn.insn; } +static inline void save_previous_kprobe(void) +{ + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_saved_msr_prev = kprobe_saved_msr; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_saved_msr = kprobe_saved_msr_prev; +} + static inline int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -101,8 +123,19 @@ static inline int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + current_kprobe = p; + kprobe_saved_msr = regs->msr; + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -184,13 +217,21 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); regs->msr |= kprobe_saved_msr; + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); /* diff --git a/arch/ppc64/kernel/maple_setup.c b/arch/ppc64/kernel/maple_setup.c index 8cf95a27178..da8900b51f4 100644 --- a/arch/ppc64/kernel/maple_setup.c +++ b/arch/ppc64/kernel/maple_setup.c @@ -78,17 +78,77 @@ extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); extern void generic_find_legacy_serial_ports(u64 *physport, unsigned int *default_speed); - static void maple_restart(char *cmd) { + unsigned int maple_nvram_base; + unsigned int maple_nvram_offset; + unsigned int maple_nvram_command; + struct device_node *rtcs; + + /* find NVRAM device */ + rtcs = find_compatible_devices("nvram", "AMD8111"); + if (rtcs && rtcs->addrs) { + maple_nvram_base = rtcs->addrs[0].address; + } else { + printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); + printk(KERN_EMERG "Maple: Manual Restart Required\n"); + return; + } + + /* find service processor device */ + rtcs = find_devices("service-processor"); + if (!rtcs) { + printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); + printk(KERN_EMERG "Maple: Manual Restart Required\n"); + return; + } + maple_nvram_offset = *(unsigned int*) get_property(rtcs, + "restart-addr", NULL); + maple_nvram_command = *(unsigned int*) get_property(rtcs, + "restart-value", NULL); + + /* send command */ + outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset); + for (;;) ; } static void maple_power_off(void) { + unsigned int maple_nvram_base; + unsigned int maple_nvram_offset; + unsigned int maple_nvram_command; + struct device_node *rtcs; + + /* find NVRAM device */ + rtcs = find_compatible_devices("nvram", "AMD8111"); + if (rtcs && rtcs->addrs) { + maple_nvram_base = rtcs->addrs[0].address; + } else { + printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); + printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); + return; + } + + /* find service processor device */ + rtcs = find_devices("service-processor"); + if (!rtcs) { + printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); + printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); + return; + } + maple_nvram_offset = *(unsigned int*) get_property(rtcs, + "power-off-addr", NULL); + maple_nvram_command = *(unsigned int*) get_property(rtcs, + "power-off-value", NULL); + + /* send command */ + outb_p(maple_nvram_command, maple_nvram_base + maple_nvram_offset); + for (;;) ; } static void maple_halt(void) { + maple_power_off(); } #ifdef CONFIG_SMP @@ -235,6 +295,6 @@ struct machdep_calls __initdata maple_md = { .get_boot_time = maple_get_boot_time, .set_rtc_time = maple_set_rtc_time, .get_rtc_time = maple_get_rtc_time, - .calibrate_decr = maple_calibrate_decr, + .calibrate_decr = generic_calibrate_decr, .progress = maple_progress, }; diff --git a/arch/ppc64/kernel/maple_time.c b/arch/ppc64/kernel/maple_time.c index 07ce7895b43..d65210abcd0 100644 --- a/arch/ppc64/kernel/maple_time.c +++ b/arch/ppc64/kernel/maple_time.c @@ -42,11 +42,8 @@ #define DBG(x...) #endif -extern void setup_default_decr(void); extern void GregorianDay(struct rtc_time * tm); -extern unsigned long ppc_tb_freq; -extern unsigned long ppc_proc_freq; static int maple_rtc_addr; static int maple_clock_read(int addr) @@ -176,51 +173,3 @@ void __init maple_get_boot_time(struct rtc_time *tm) maple_get_rtc_time(tm); } -/* XXX FIXME: Some sane defaults: 125 MHz timebase, 1GHz processor */ -#define DEFAULT_TB_FREQ 125000000UL -#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) - -void __init maple_calibrate_decr(void) -{ - struct device_node *cpu; - struct div_result divres; - unsigned int *fp = NULL; - - /* - * The cpu node should have a timebase-frequency property - * to tell us the rate at which the decrementer counts. - */ - cpu = of_find_node_by_type(NULL, "cpu"); - - ppc_tb_freq = DEFAULT_TB_FREQ; - if (cpu != 0) - fp = (unsigned int *)get_property(cpu, "timebase-frequency", NULL); - if (fp != NULL) - ppc_tb_freq = *fp; - else - printk(KERN_ERR "WARNING: Estimating decrementer frequency (not found)\n"); - fp = NULL; - ppc_proc_freq = DEFAULT_PROC_FREQ; - if (cpu != 0) - fp = (unsigned int *)get_property(cpu, "clock-frequency", NULL); - if (fp != NULL) - ppc_proc_freq = *fp; - else - printk(KERN_ERR "WARNING: Estimating processor frequency (not found)\n"); - - of_node_put(cpu); - - printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", - ppc_tb_freq/1000000, ppc_tb_freq%1000000); - printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", - ppc_proc_freq/1000000, ppc_proc_freq%1000000); - - tb_ticks_per_jiffy = ppc_tb_freq / HZ; - tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; - tb_ticks_per_usec = ppc_tb_freq / 1000000; - tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); - div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); - tb_to_xs = divres.result_low; - - setup_default_decr(); -} diff --git a/arch/ppc64/kernel/mpic.h b/arch/ppc64/kernel/mpic.h index 571b3c99e06..63e177143ea 100644 --- a/arch/ppc64/kernel/mpic.h +++ b/arch/ppc64/kernel/mpic.h @@ -265,3 +265,6 @@ extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); /* This one gets to the primary mpic */ extern int mpic_get_irq(struct pt_regs *regs); + +/* global mpic for pSeries */ +extern struct mpic *pSeries_mpic; diff --git a/arch/ppc64/kernel/pSeries_pci.c b/arch/ppc64/kernel/pSeries_pci.c index 0b1cca28140..1f5f141fb7a 100644 --- a/arch/ppc64/kernel/pSeries_pci.c +++ b/arch/ppc64/kernel/pSeries_pci.c @@ -1,13 +1,11 @@ /* - * pSeries_pci.c + * arch/ppc64/kernel/pSeries_pci.c * * Copyright (C) 2001 Dave Engebretsen, IBM Corporation * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM * * pSeries specific routines for PCI. * - * Based on code from pci.c and chrp_pci.c - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -23,430 +21,18 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/init.h> +#include <linux/ioport.h> #include <linux/kernel.h> -#include <linux/threads.h> #include <linux/pci.h> #include <linux/string.h> -#include <linux/init.h> -#include <linux/bootmem.h> -#include <asm/io.h> -#include <asm/pgtable.h> -#include <asm/irq.h> -#include <asm/prom.h> -#include <asm/machdep.h> #include <asm/pci-bridge.h> -#include <asm/iommu.h> -#include <asm/rtas.h> +#include <asm/prom.h> -#include "mpic.h" #include "pci.h" -/* RTAS tokens */ -static int read_pci_config; -static int write_pci_config; -static int ibm_read_pci_config; -static int ibm_write_pci_config; - -static int s7a_workaround; - -extern struct mpic *pSeries_mpic; - -static int config_access_valid(struct device_node *dn, int where) -{ - if (where < 256) - return 1; - if (where < 4096 && dn->pci_ext_config_space) - return 1; - - return 0; -} - -static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) -{ - int returnval = -1; - unsigned long buid, addr; - int ret; - - if (!dn) - return PCIBIOS_DEVICE_NOT_FOUND; - if (!config_access_valid(dn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = ((where & 0xf00) << 20) | (dn->busno << 16) | - (dn->devfn << 8) | (where & 0xff); - buid = dn->phb->buid; - if (buid) { - ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, - addr, buid >> 32, buid & 0xffffffff, size); - } else { - ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); - } - *val = returnval; - - if (ret) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (returnval == EEH_IO_ERROR_VALUE(size) - && eeh_dn_check_failure (dn, NULL)) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int rtas_pci_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 *val) -{ - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - /* Search only direct children of the bus */ - for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) - return rtas_read_config(dn, where, size, val); - return PCIBIOS_DEVICE_NOT_FOUND; -} - -static int rtas_write_config(struct device_node *dn, int where, int size, u32 val) -{ - unsigned long buid, addr; - int ret; - - if (!dn) - return PCIBIOS_DEVICE_NOT_FOUND; - if (!config_access_valid(dn, where)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = ((where & 0xf00) << 20) | (dn->busno << 16) | - (dn->devfn << 8) | (where & 0xff); - buid = dn->phb->buid; - if (buid) { - ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val); - } else { - ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); - } - - if (ret) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int rtas_pci_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, int size, u32 val) -{ - struct device_node *busdn, *dn; - - if (bus->self) - busdn = pci_device_to_OF_node(bus->self); - else - busdn = bus->sysdata; /* must be a phb */ - - /* Search only direct children of the bus */ - for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) - return rtas_write_config(dn, where, size, val); - return PCIBIOS_DEVICE_NOT_FOUND; -} - -struct pci_ops rtas_pci_ops = { - rtas_pci_read_config, - rtas_pci_write_config -}; - -int is_python(struct device_node *dev) -{ - char *model = (char *)get_property(dev, "model", NULL); - - if (model && strstr(model, "Python")) - return 1; - - return 0; -} - -static int get_phb_reg_prop(struct device_node *dev, - unsigned int addr_size_words, - struct reg_property64 *reg) -{ - unsigned int *ui_ptr = NULL, len; - - /* Found a PHB, now figure out where his registers are mapped. */ - ui_ptr = (unsigned int *)get_property(dev, "reg", &len); - if (ui_ptr == NULL) - return 1; - - if (addr_size_words == 1) { - reg->address = ((struct reg_property32 *)ui_ptr)->address; - reg->size = ((struct reg_property32 *)ui_ptr)->size; - } else { - *reg = *((struct reg_property64 *)ui_ptr); - } - - return 0; -} - -static void python_countermeasures(struct device_node *dev, - unsigned int addr_size_words) -{ - struct reg_property64 reg_struct; - void __iomem *chip_regs; - volatile u32 val; - - if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) - return; - - /* Python's register file is 1 MB in size. */ - chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); - - /* - * Firmware doesn't always clear this bit which is critical - * for good performance - Anton - */ - -#define PRG_CL_RESET_VALID 0x00010000 - - val = in_be32(chip_regs + 0xf6030); - if (val & PRG_CL_RESET_VALID) { - printk(KERN_INFO "Python workaround: "); - val &= ~PRG_CL_RESET_VALID; - out_be32(chip_regs + 0xf6030, val); - /* - * We must read it back for changes to - * take effect - */ - val = in_be32(chip_regs + 0xf6030); - printk("reg0: %x\n", val); - } - - iounmap(chip_regs); -} - -void __init init_pci_config_tokens (void) -{ - read_pci_config = rtas_token("read-pci-config"); - write_pci_config = rtas_token("write-pci-config"); - ibm_read_pci_config = rtas_token("ibm,read-pci-config"); - ibm_write_pci_config = rtas_token("ibm,write-pci-config"); -} - -unsigned long __devinit get_phb_buid (struct device_node *phb) -{ - int addr_cells; - unsigned int *buid_vals; - unsigned int len; - unsigned long buid; - - if (ibm_read_pci_config == -1) return 0; - - /* PHB's will always be children of the root node, - * or so it is promised by the current firmware. */ - if (phb->parent == NULL) - return 0; - if (phb->parent->parent) - return 0; - - buid_vals = (unsigned int *) get_property(phb, "reg", &len); - if (buid_vals == NULL) - return 0; - - addr_cells = prom_n_addr_cells(phb); - if (addr_cells == 1) { - buid = (unsigned long) buid_vals[0]; - } else { - buid = (((unsigned long)buid_vals[0]) << 32UL) | - (((unsigned long)buid_vals[1]) & 0xffffffff); - } - return buid; -} - -static int phb_set_bus_ranges(struct device_node *dev, - struct pci_controller *phb) -{ - int *bus_range; - unsigned int len; - - bus_range = (int *) get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - return 1; - } - - phb->first_busno = bus_range[0]; - phb->last_busno = bus_range[1]; - - return 0; -} - -static int __devinit setup_phb(struct device_node *dev, - struct pci_controller *phb, - unsigned int addr_size_words) -{ - pci_setup_pci_controller(phb); - - if (is_python(dev)) - python_countermeasures(dev, addr_size_words); - - if (phb_set_bus_ranges(dev, phb)) - return 1; - - phb->arch_data = dev; - phb->ops = &rtas_pci_ops; - phb->buid = get_phb_buid(dev); - - return 0; -} - -static void __devinit add_linux_pci_domain(struct device_node *dev, - struct pci_controller *phb, - struct property *of_prop) -{ - memset(of_prop, 0, sizeof(struct property)); - of_prop->name = "linux,pci-domain"; - of_prop->length = sizeof(phb->global_number); - of_prop->value = (unsigned char *)&of_prop[1]; - memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); - prom_add_property(dev, of_prop); -} - -static struct pci_controller * __init alloc_phb(struct device_node *dev, - unsigned int addr_size_words) -{ - struct pci_controller *phb; - struct property *of_prop; - - phb = alloc_bootmem(sizeof(struct pci_controller)); - if (phb == NULL) - return NULL; - - of_prop = alloc_bootmem(sizeof(struct property) + - sizeof(phb->global_number)); - if (!of_prop) - return NULL; - - if (setup_phb(dev, phb, addr_size_words)) - return NULL; - - add_linux_pci_domain(dev, phb, of_prop); - - return phb; -} - -static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words) -{ - struct pci_controller *phb; - - phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), - GFP_KERNEL); - if (phb == NULL) - return NULL; - - if (setup_phb(dev, phb, addr_size_words)) - return NULL; - - phb->is_dynamic = 1; - - /* TODO: linux,pci-domain? */ - - return phb; -} - -unsigned long __init find_and_init_phbs(void) -{ - struct device_node *node; - struct pci_controller *phb; - unsigned int root_size_cells = 0; - unsigned int index; - unsigned int *opprop = NULL; - struct device_node *root = of_find_node_by_path("/"); - - if (ppc64_interrupt_controller == IC_OPEN_PIC) { - opprop = (unsigned int *)get_property(root, - "platform-open-pic", NULL); - } - - root_size_cells = prom_n_size_cells(root); - - index = 0; - - for (node = of_get_next_child(root, NULL); - node != NULL; - node = of_get_next_child(root, node)) { - if (node->type == NULL || strcmp(node->type, "pci") != 0) - continue; - - phb = alloc_phb(node, root_size_cells); - if (!phb) - continue; - - pci_process_bridge_OF_ranges(phb, node); - pci_setup_phb_io(phb, index == 0); - - if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { - int addr = root_size_cells * (index + 2) - 1; - mpic_assign_isu(pSeries_mpic, index, opprop[addr]); - } - - index++; - } - - of_node_put(root); - pci_devs_phb_init(); - - /* - * pci_probe_only and pci_assign_all_buses can be set via properties - * in chosen. - */ - if (of_chosen) { - int *prop; - - prop = (int *)get_property(of_chosen, "linux,pci-probe-only", - NULL); - if (prop) - pci_probe_only = *prop; - - prop = (int *)get_property(of_chosen, - "linux,pci-assign-all-buses", NULL); - if (prop) - pci_assign_all_buses = *prop; - } - - return 0; -} - -struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) -{ - struct device_node *root = of_find_node_by_path("/"); - unsigned int root_size_cells = 0; - struct pci_controller *phb; - struct pci_bus *bus; - int primary; - - root_size_cells = prom_n_size_cells(root); - - primary = list_empty(&hose_list); - phb = alloc_phb_dynamic(dn, root_size_cells); - if (!phb) - return NULL; - - pci_process_bridge_OF_ranges(phb, dn); - - pci_setup_phb_io_dynamic(phb, primary); - of_node_put(root); - - pci_devs_phb_init_dynamic(phb); - phb->last_busno = 0xff; - bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data); - phb->bus = bus; - phb->last_busno = bus->subordinate; - - return phb; -} -EXPORT_SYMBOL(init_phb_dynamic); +static int __initdata s7a_workaround = -1; #if 0 void pcibios_name_device(struct pci_dev *dev) @@ -474,11 +60,12 @@ void pcibios_name_device(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pcibios_name_device); #endif -static void check_s7a(void) +static void __init check_s7a(void) { struct device_node *root; char *model; + s7a_workaround = 0; root = of_find_node_by_path("/"); if (root) { model = get_property(root, "model", NULL); @@ -488,55 +75,23 @@ static void check_s7a(void) } } -/* RPA-specific bits for removing PHBs */ -int pcibios_remove_root_bus(struct pci_controller *phb) +void __devinit pSeries_irq_bus_setup(struct pci_bus *bus) { - struct pci_bus *b = phb->bus; - struct resource *res; - int rc, i; - - res = b->resource[0]; - if (!res->flags) { - printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, - b->name); - return 1; - } - - rc = unmap_bus_range(b); - if (rc) { - printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", - __FUNCTION__, b->name); - return 1; - } + struct pci_dev *dev; - if (release_resource(res)) { - printk(KERN_ERR "%s: failed to release IO on bus %s\n", - __FUNCTION__, b->name); - return 1; - } - - for (i = 1; i < 3; ++i) { - res = b->resource[i]; - if (!res->flags && i == 0) { - printk(KERN_ERR "%s: no MEM resource for PHB %s\n", - __FUNCTION__, b->name); - return 1; - } - if (res->flags && release_resource(res)) { - printk(KERN_ERR - "%s: failed to release IO %d on bus %s\n", - __FUNCTION__, i, b->name); - return 1; + if (s7a_workaround < 0) + check_s7a(); + list_for_each_entry(dev, &bus->devices, bus_list) { + pci_read_irq_line(dev); + if (s7a_workaround) { + if (dev->irq > 16) { + dev->irq -= 3; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, + dev->irq); + } } } - - list_del(&phb->list_node); - if (phb->is_dynamic) - kfree(phb); - - return 0; } -EXPORT_SYMBOL(pcibios_remove_root_bus); static void __init pSeries_request_regions(void) { @@ -553,20 +108,6 @@ static void __init pSeries_request_regions(void) void __init pSeries_final_fixup(void) { - struct pci_dev *dev = NULL; - - check_s7a(); - - for_each_pci_dev(dev) { - pci_read_irq_line(dev); - if (s7a_workaround) { - if (dev->irq > 16) { - dev->irq -= 3; - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); - } - } - } - phbs_remap_io(); pSeries_request_regions(); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 6c0d1d58a55..f2b41243342 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -71,11 +71,6 @@ #define DBG(fmt...) #endif -extern void pSeries_final_fixup(void); - -extern void pSeries_get_boot_time(struct rtc_time *rtc_time); -extern void pSeries_get_rtc_time(struct rtc_time *rtc_time); -extern int pSeries_set_rtc_time(struct rtc_time *rtc_time); extern void find_udbg_vterm(void); extern void system_reset_fwnmi(void); /* from head.S */ extern void machine_check_fwnmi(void); /* from head.S */ @@ -84,9 +79,6 @@ extern void generic_find_legacy_serial_ports(u64 *physport, int fwnmi_active; /* TRUE if an FWNMI handler is present */ -extern unsigned long ppc_proc_freq; -extern unsigned long ppc_tb_freq; - extern void pSeries_system_reset_exception(struct pt_regs *regs); extern int pSeries_machine_check_exception(struct pt_regs *regs); @@ -381,171 +373,6 @@ static void __init pSeries_init_early(void) } -static void pSeries_progress(char *s, unsigned short hex) -{ - struct device_node *root; - int width, *p; - char *os; - static int display_character, set_indicator; - static int max_width; - static DEFINE_SPINLOCK(progress_lock); - static int pending_newline = 0; /* did last write end with unprinted newline? */ - - if (!rtas.base) - return; - - if (max_width == 0) { - if ((root = find_path_device("/rtas")) && - (p = (unsigned int *)get_property(root, - "ibm,display-line-length", - NULL))) - max_width = *p; - else - max_width = 0x10; - display_character = rtas_token("display-character"); - set_indicator = rtas_token("set-indicator"); - } - - if (display_character == RTAS_UNKNOWN_SERVICE) { - /* use hex display if available */ - if (set_indicator != RTAS_UNKNOWN_SERVICE) - rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); - return; - } - - spin_lock(&progress_lock); - - /* - * Last write ended with newline, but we didn't print it since - * it would just clear the bottom line of output. Print it now - * instead. - * - * If no newline is pending, print a CR to start output at the - * beginning of the line. - */ - if (pending_newline) { - rtas_call(display_character, 1, 1, NULL, '\r'); - rtas_call(display_character, 1, 1, NULL, '\n'); - pending_newline = 0; - } else { - rtas_call(display_character, 1, 1, NULL, '\r'); - } - - width = max_width; - os = s; - while (*os) { - if (*os == '\n' || *os == '\r') { - /* Blank to end of line. */ - while (width-- > 0) - rtas_call(display_character, 1, 1, NULL, ' '); - - /* If newline is the last character, save it - * until next call to avoid bumping up the - * display output. - */ - if (*os == '\n' && !os[1]) { - pending_newline = 1; - spin_unlock(&progress_lock); - return; - } - - /* RTAS wants CR-LF, not just LF */ - - if (*os == '\n') { - rtas_call(display_character, 1, 1, NULL, '\r'); - rtas_call(display_character, 1, 1, NULL, '\n'); - } else { - /* CR might be used to re-draw a line, so we'll - * leave it alone and not add LF. - */ - rtas_call(display_character, 1, 1, NULL, *os); - } - - width = max_width; - } else { - width--; - rtas_call(display_character, 1, 1, NULL, *os); - } - - os++; - - /* if we overwrite the screen length */ - if (width <= 0) - while ((*os != 0) && (*os != '\n') && (*os != '\r')) - os++; - } - - /* Blank to end of line. */ - while (width-- > 0) - rtas_call(display_character, 1, 1, NULL, ' '); - - spin_unlock(&progress_lock); -} - -extern void setup_default_decr(void); - -/* Some sane defaults: 125 MHz timebase, 1GHz processor */ -#define DEFAULT_TB_FREQ 125000000UL -#define DEFAULT_PROC_FREQ (DEFAULT_TB_FREQ * 8) - -static void __init pSeries_calibrate_decr(void) -{ - struct device_node *cpu; - struct div_result divres; - unsigned int *fp; - int node_found; - - /* - * The cpu node should have a timebase-frequency property - * to tell us the rate at which the decrementer counts. - */ - cpu = of_find_node_by_type(NULL, "cpu"); - - ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ - node_found = 0; - if (cpu != 0) { - fp = (unsigned int *)get_property(cpu, "timebase-frequency", - NULL); - if (fp != 0) { - node_found = 1; - ppc_tb_freq = *fp; - } - } - if (!node_found) - printk(KERN_ERR "WARNING: Estimating decrementer frequency " - "(not found)\n"); - - ppc_proc_freq = DEFAULT_PROC_FREQ; - node_found = 0; - if (cpu != 0) { - fp = (unsigned int *)get_property(cpu, "clock-frequency", - NULL); - if (fp != 0) { - node_found = 1; - ppc_proc_freq = *fp; - } - } - if (!node_found) - printk(KERN_ERR "WARNING: Estimating processor frequency " - "(not found)\n"); - - of_node_put(cpu); - - printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", - ppc_tb_freq/1000000, ppc_tb_freq%1000000); - printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", - ppc_proc_freq/1000000, ppc_proc_freq%1000000); - - tb_ticks_per_jiffy = ppc_tb_freq / HZ; - tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; - tb_ticks_per_usec = ppc_tb_freq / 1000000; - tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); - div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); - tb_to_xs = divres.result_low; - - setup_default_decr(); -} - static int pSeries_check_legacy_ioport(unsigned int baseport) { struct device_node *np; @@ -596,16 +423,17 @@ struct machdep_calls __initdata pSeries_md = { .get_cpuinfo = pSeries_get_cpuinfo, .log_error = pSeries_log_error, .pcibios_fixup = pSeries_final_fixup, + .irq_bus_setup = pSeries_irq_bus_setup, .restart = rtas_restart, .power_off = rtas_power_off, .halt = rtas_halt, .panic = rtas_os_term, .cpu_die = pSeries_mach_cpu_die, - .get_boot_time = pSeries_get_boot_time, - .get_rtc_time = pSeries_get_rtc_time, - .set_rtc_time = pSeries_set_rtc_time, - .calibrate_decr = pSeries_calibrate_decr, - .progress = pSeries_progress, + .get_boot_time = rtas_get_boot_time, + .get_rtc_time = rtas_get_rtc_time, + .set_rtc_time = rtas_set_rtc_time, + .calibrate_decr = generic_calibrate_decr, + .progress = rtas_progress, .check_legacy_ioport = pSeries_check_legacy_ioport, .system_reset_exception = pSeries_system_reset_exception, .machine_check_exception = pSeries_machine_check_exception, diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 4203bd020c8..30154140f7e 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -1,5 +1,5 @@ /* - * SMP support for pSeries machines. + * SMP support for pSeries and BPA machines. * * Dave Engebretsen, Peter Bergner, and * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com @@ -47,6 +47,7 @@ #include <asm/pSeries_reconfig.h> #include "mpic.h" +#include "bpa_iic.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -286,6 +287,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) return 1; } +#ifdef CONFIG_XICS static inline void smp_xics_do_message(int cpu, int msg) { set_bit(msg, &xics_ipi_message[cpu].value); @@ -327,6 +329,37 @@ static void __devinit smp_xics_setup_cpu(int cpu) cpu_clear(cpu, of_spin_map); } +#endif /* CONFIG_XICS */ +#ifdef CONFIG_BPA_IIC +static void smp_iic_message_pass(int target, int msg) +{ + unsigned int i; + + if (target < NR_CPUS) { + iic_cause_IPI(target, msg); + } else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + iic_cause_IPI(i, msg); + } + } +} + +static int __init smp_iic_probe(void) +{ + iic_request_IPIs(); + + return cpus_weight(cpu_possible_map); +} + +static void __devinit smp_iic_setup_cpu(int cpu) +{ + if (cpu != boot_cpuid) + iic_setup_cpu(); +} +#endif /* CONFIG_BPA_IIC */ static DEFINE_SPINLOCK(timebase_lock); static unsigned long timebase = 0; @@ -381,14 +414,15 @@ static int smp_pSeries_cpu_bootable(unsigned int nr) return 1; } - +#ifdef CONFIG_MPIC static struct smp_ops_t pSeries_mpic_smp_ops = { .message_pass = smp_mpic_message_pass, .probe = smp_mpic_probe, .kick_cpu = smp_pSeries_kick_cpu, .setup_cpu = smp_mpic_setup_cpu, }; - +#endif +#ifdef CONFIG_XICS static struct smp_ops_t pSeries_xics_smp_ops = { .message_pass = smp_xics_message_pass, .probe = smp_xics_probe, @@ -396,6 +430,16 @@ static struct smp_ops_t pSeries_xics_smp_ops = { .setup_cpu = smp_xics_setup_cpu, .cpu_bootable = smp_pSeries_cpu_bootable, }; +#endif +#ifdef CONFIG_BPA_IIC +static struct smp_ops_t bpa_iic_smp_ops = { + .message_pass = smp_iic_message_pass, + .probe = smp_iic_probe, + .kick_cpu = smp_pSeries_kick_cpu, + .setup_cpu = smp_iic_setup_cpu, + .cpu_bootable = smp_pSeries_cpu_bootable, +}; +#endif /* This is called very early */ void __init smp_init_pSeries(void) @@ -404,10 +448,25 @@ void __init smp_init_pSeries(void) DBG(" -> smp_init_pSeries()\n"); - if (ppc64_interrupt_controller == IC_OPEN_PIC) + switch (ppc64_interrupt_controller) { +#ifdef CONFIG_MPIC + case IC_OPEN_PIC: smp_ops = &pSeries_mpic_smp_ops; - else + break; +#endif +#ifdef CONFIG_XICS + case IC_PPC_XIC: smp_ops = &pSeries_xics_smp_ops; + break; +#endif +#ifdef CONFIG_BPA_IIC + case IC_BPA_IIC: + smp_ops = &bpa_iic_smp_ops; + break; +#endif + default: + panic("Invalid interrupt controller"); + } #ifdef CONFIG_HOTPLUG_CPU smp_ops->cpu_disable = pSeries_cpu_disable; diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 2bf0513f3ec..580676f87d2 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -902,6 +902,9 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) ppc_md.iommu_dev_setup(dev); + if (ppc_md.irq_bus_setup) + ppc_md.irq_bus_setup(bus); + if (!pci_probe_only) return; diff --git a/arch/ppc64/kernel/pci.h b/arch/ppc64/kernel/pci.h index 0fd7d849aa7..26be78b13af 100644 --- a/arch/ppc64/kernel/pci.h +++ b/arch/ppc64/kernel/pci.h @@ -40,10 +40,14 @@ struct device_node *fetch_dev_dn(struct pci_dev *dev); void pci_addr_cache_insert_device(struct pci_dev *dev); void pci_addr_cache_remove_device(struct pci_dev *dev); -/* From pSeries_pci.h */ +/* From rtas_pci.h */ void init_pci_config_tokens (void); unsigned long get_phb_buid (struct device_node *); +/* From pSeries_pci.h */ +extern void pSeries_final_fixup(void); +extern void pSeries_irq_bus_setup(struct pci_bus *bus); + extern unsigned long pci_probe_only; extern unsigned long pci_assign_all_buses; extern int pci_read_irq_line(struct pci_dev *pci_dev); diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c index f24827581dd..3059edb09cc 100644 --- a/arch/ppc64/kernel/pmac_time.c +++ b/arch/ppc64/kernel/pmac_time.c @@ -40,11 +40,6 @@ #define DBG(x...) #endif -extern void setup_default_decr(void); - -extern unsigned long ppc_tb_freq; -extern unsigned long ppc_proc_freq; - /* Apparently the RTC stores seconds since 1 Jan 1904 */ #define RTC_OFFSET 2082844800 @@ -161,8 +156,7 @@ void __init pmac_get_boot_time(struct rtc_time *tm) /* * Query the OF and get the decr frequency. - * This was taken from the pmac time_init() when merging the prep/pmac - * time functions. + * FIXME: merge this with generic_calibrate_decr */ void __init pmac_calibrate_decr(void) { diff --git a/arch/ppc64/kernel/proc_ppc64.c b/arch/ppc64/kernel/proc_ppc64.c index 0914b0669b0..a87c66a9652 100644 --- a/arch/ppc64/kernel/proc_ppc64.c +++ b/arch/ppc64/kernel/proc_ppc64.c @@ -53,7 +53,7 @@ static int __init proc_ppc64_create(void) if (!root) return 1; - if (!(systemcfg->platform & PLATFORM_PSERIES)) + if (!(systemcfg->platform & (PLATFORM_PSERIES | PLATFORM_BPA))) return 0; if (!proc_mkdir("rtas", root)) diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index b7683abfbe6..e248a7950ae 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1915,9 +1915,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, unsigned long prom_send_capabilities(); /* - * On pSeries, copy the CPU hold code + * On pSeries and BPA, copy the CPU hold code */ - if (RELOC(of_platform) & PLATFORM_PSERIES) + if (RELOC(of_platform) & (PLATFORM_PSERIES | PLATFORM_BPA)) copy_and_flush(0, KERNELBASE - offset, 0x100, 0); /* diff --git a/arch/ppc64/kernel/rtas-proc.c b/arch/ppc64/kernel/rtas-proc.c index 28b1f1521f2..1f3ff860fdf 100644 --- a/arch/ppc64/kernel/rtas-proc.c +++ b/arch/ppc64/kernel/rtas-proc.c @@ -371,11 +371,11 @@ static ssize_t ppc_rtas_progress_write(struct file *file, /* Lets see if the user passed hexdigits */ hex = simple_strtoul(progress_led, NULL, 10); - ppc_md.progress ((char *)progress_led, hex); + rtas_progress ((char *)progress_led, hex); return count; /* clear the line */ - /* ppc_md.progress(" ", 0xffff);*/ + /* rtas_progress(" ", 0xffff);*/ } /* ****************************************************************** */ static int ppc_rtas_progress_show(struct seq_file *m, void *v) diff --git a/arch/ppc64/kernel/rtas.c b/arch/ppc64/kernel/rtas.c index 5575603def2..5e8eb33b8e5 100644 --- a/arch/ppc64/kernel/rtas.c +++ b/arch/ppc64/kernel/rtas.c @@ -91,6 +91,123 @@ call_rtas_display_status_delay(unsigned char c) } } +void +rtas_progress(char *s, unsigned short hex) +{ + struct device_node *root; + int width, *p; + char *os; + static int display_character, set_indicator; + static int display_width, display_lines, *row_width, form_feed; + static DEFINE_SPINLOCK(progress_lock); + static int current_line; + static int pending_newline = 0; /* did last write end with unprinted newline? */ + + if (!rtas.base) + return; + + if (display_width == 0) { + display_width = 0x10; + if ((root = find_path_device("/rtas"))) { + if ((p = (unsigned int *)get_property(root, + "ibm,display-line-length", NULL))) + display_width = *p; + if ((p = (unsigned int *)get_property(root, + "ibm,form-feed", NULL))) + form_feed = *p; + if ((p = (unsigned int *)get_property(root, + "ibm,display-number-of-lines", NULL))) + display_lines = *p; + row_width = (unsigned int *)get_property(root, + "ibm,display-truncation-length", NULL); + } + display_character = rtas_token("display-character"); + set_indicator = rtas_token("set-indicator"); + } + + if (display_character == RTAS_UNKNOWN_SERVICE) { + /* use hex display if available */ + if (set_indicator != RTAS_UNKNOWN_SERVICE) + rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex); + return; + } + + spin_lock(&progress_lock); + + /* + * Last write ended with newline, but we didn't print it since + * it would just clear the bottom line of output. Print it now + * instead. + * + * If no newline is pending and form feed is supported, clear the + * display with a form feed; otherwise, print a CR to start output + * at the beginning of the line. + */ + if (pending_newline) { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + pending_newline = 0; + } else { + current_line = 0; + if (form_feed) + rtas_call(display_character, 1, 1, NULL, + (char)form_feed); + else + rtas_call(display_character, 1, 1, NULL, '\r'); + } + + if (row_width) + width = row_width[current_line]; + else + width = display_width; + os = s; + while (*os) { + if (*os == '\n' || *os == '\r') { + /* If newline is the last character, save it + * until next call to avoid bumping up the + * display output. + */ + if (*os == '\n' && !os[1]) { + pending_newline = 1; + current_line++; + if (current_line > display_lines-1) + current_line = display_lines-1; + spin_unlock(&progress_lock); + return; + } + + /* RTAS wants CR-LF, not just LF */ + + if (*os == '\n') { + rtas_call(display_character, 1, 1, NULL, '\r'); + rtas_call(display_character, 1, 1, NULL, '\n'); + } else { + /* CR might be used to re-draw a line, so we'll + * leave it alone and not add LF. + */ + rtas_call(display_character, 1, 1, NULL, *os); + } + + if (row_width) + width = row_width[current_line]; + else + width = display_width; + } else { + width--; + rtas_call(display_character, 1, 1, NULL, *os); + } + + os++; + + /* if we overwrite the screen length */ + if (width <= 0) + while ((*os != 0) && (*os != '\n') && (*os != '\r')) + os++; + } + + spin_unlock(&progress_lock); +} + int rtas_token(const char *service) { @@ -425,8 +542,8 @@ rtas_flash_firmware(void) printk(KERN_ALERT "FLASH: flash image is %ld bytes\n", image_size); printk(KERN_ALERT "FLASH: performing flash and reboot\n"); - ppc_md.progress("Flashing \n", 0x0); - ppc_md.progress("Please Wait... ", 0x0); + rtas_progress("Flashing \n", 0x0); + rtas_progress("Please Wait... ", 0x0); printk(KERN_ALERT "FLASH: this will take several minutes. Do not power off!\n"); status = rtas_call(update_token, 1, 1, NULL, rtas_block_list); switch (status) { /* should only get "bad" status */ diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c new file mode 100644 index 00000000000..1048817befb --- /dev/null +++ b/arch/ppc64/kernel/rtas_pci.c @@ -0,0 +1,495 @@ +/* + * arch/ppc64/kernel/rtas_pci.c + * + * Copyright (C) 2001 Dave Engebretsen, IBM Corporation + * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM + * + * RTAS specific routines for PCI. + * + * Based on code from pci.c, chrp_pci.c and pSeries_pci.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/threads.h> +#include <linux/pci.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/iommu.h> +#include <asm/rtas.h> + +#include "mpic.h" +#include "pci.h" + +/* RTAS tokens */ +static int read_pci_config; +static int write_pci_config; +static int ibm_read_pci_config; +static int ibm_write_pci_config; + +static int config_access_valid(struct device_node *dn, int where) +{ + if (where < 256) + return 1; + if (where < 4096 && dn->pci_ext_config_space) + return 1; + + return 0; +} + +static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) +{ + int returnval = -1; + unsigned long buid, addr; + int ret; + + if (!dn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(dn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (dn->busno << 16) | + (dn->devfn << 8) | (where & 0xff); + buid = dn->phb->buid; + if (buid) { + ret = rtas_call(ibm_read_pci_config, 4, 2, &returnval, + addr, buid >> 32, buid & 0xffffffff, size); + } else { + ret = rtas_call(read_pci_config, 2, 2, &returnval, addr, size); + } + *val = returnval; + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (returnval == EEH_IO_ERROR_VALUE(size) + && eeh_dn_check_failure (dn, NULL)) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_read_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 *val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + return rtas_read_config(dn, where, size, val); + return PCIBIOS_DEVICE_NOT_FOUND; +} + +static int rtas_write_config(struct device_node *dn, int where, int size, u32 val) +{ + unsigned long buid, addr; + int ret; + + if (!dn) + return PCIBIOS_DEVICE_NOT_FOUND; + if (!config_access_valid(dn, where)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + addr = ((where & 0xf00) << 20) | (dn->busno << 16) | + (dn->devfn << 8) | (where & 0xff); + buid = dn->phb->buid; + if (buid) { + ret = rtas_call(ibm_write_pci_config, 5, 1, NULL, addr, buid >> 32, buid & 0xffffffff, size, (ulong) val); + } else { + ret = rtas_call(write_pci_config, 3, 1, NULL, addr, size, (ulong)val); + } + + if (ret) + return PCIBIOS_DEVICE_NOT_FOUND; + + return PCIBIOS_SUCCESSFUL; +} + +static int rtas_pci_write_config(struct pci_bus *bus, + unsigned int devfn, + int where, int size, u32 val) +{ + struct device_node *busdn, *dn; + + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = bus->sysdata; /* must be a phb */ + + /* Search only direct children of the bus */ + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->devfn == devfn) + return rtas_write_config(dn, where, size, val); + return PCIBIOS_DEVICE_NOT_FOUND; +} + +struct pci_ops rtas_pci_ops = { + rtas_pci_read_config, + rtas_pci_write_config +}; + +int is_python(struct device_node *dev) +{ + char *model = (char *)get_property(dev, "model", NULL); + + if (model && strstr(model, "Python")) + return 1; + + return 0; +} + +static int get_phb_reg_prop(struct device_node *dev, + unsigned int addr_size_words, + struct reg_property64 *reg) +{ + unsigned int *ui_ptr = NULL, len; + + /* Found a PHB, now figure out where his registers are mapped. */ + ui_ptr = (unsigned int *)get_property(dev, "reg", &len); + if (ui_ptr == NULL) + return 1; + + if (addr_size_words == 1) { + reg->address = ((struct reg_property32 *)ui_ptr)->address; + reg->size = ((struct reg_property32 *)ui_ptr)->size; + } else { + *reg = *((struct reg_property64 *)ui_ptr); + } + + return 0; +} + +static void python_countermeasures(struct device_node *dev, + unsigned int addr_size_words) +{ + struct reg_property64 reg_struct; + void __iomem *chip_regs; + volatile u32 val; + + if (get_phb_reg_prop(dev, addr_size_words, ®_struct)) + return; + + /* Python's register file is 1 MB in size. */ + chip_regs = ioremap(reg_struct.address & ~(0xfffffUL), 0x100000); + + /* + * Firmware doesn't always clear this bit which is critical + * for good performance - Anton + */ + +#define PRG_CL_RESET_VALID 0x00010000 + + val = in_be32(chip_regs + 0xf6030); + if (val & PRG_CL_RESET_VALID) { + printk(KERN_INFO "Python workaround: "); + val &= ~PRG_CL_RESET_VALID; + out_be32(chip_regs + 0xf6030, val); + /* + * We must read it back for changes to + * take effect + */ + val = in_be32(chip_regs + 0xf6030); + printk("reg0: %x\n", val); + } + + iounmap(chip_regs); +} + +void __init init_pci_config_tokens (void) +{ + read_pci_config = rtas_token("read-pci-config"); + write_pci_config = rtas_token("write-pci-config"); + ibm_read_pci_config = rtas_token("ibm,read-pci-config"); + ibm_write_pci_config = rtas_token("ibm,write-pci-config"); +} + +unsigned long __devinit get_phb_buid (struct device_node *phb) +{ + int addr_cells; + unsigned int *buid_vals; + unsigned int len; + unsigned long buid; + + if (ibm_read_pci_config == -1) return 0; + + /* PHB's will always be children of the root node, + * or so it is promised by the current firmware. */ + if (phb->parent == NULL) + return 0; + if (phb->parent->parent) + return 0; + + buid_vals = (unsigned int *) get_property(phb, "reg", &len); + if (buid_vals == NULL) + return 0; + + addr_cells = prom_n_addr_cells(phb); + if (addr_cells == 1) { + buid = (unsigned long) buid_vals[0]; + } else { + buid = (((unsigned long)buid_vals[0]) << 32UL) | + (((unsigned long)buid_vals[1]) & 0xffffffff); + } + return buid; +} + +static int phb_set_bus_ranges(struct device_node *dev, + struct pci_controller *phb) +{ + int *bus_range; + unsigned int len; + + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + return 1; + } + + phb->first_busno = bus_range[0]; + phb->last_busno = bus_range[1]; + + return 0; +} + +static int __devinit setup_phb(struct device_node *dev, + struct pci_controller *phb, + unsigned int addr_size_words) +{ + pci_setup_pci_controller(phb); + + if (is_python(dev)) + python_countermeasures(dev, addr_size_words); + + if (phb_set_bus_ranges(dev, phb)) + return 1; + + phb->arch_data = dev; + phb->ops = &rtas_pci_ops; + phb->buid = get_phb_buid(dev); + + return 0; +} + +static void __devinit add_linux_pci_domain(struct device_node *dev, + struct pci_controller *phb, + struct property *of_prop) +{ + memset(of_prop, 0, sizeof(struct property)); + of_prop->name = "linux,pci-domain"; + of_prop->length = sizeof(phb->global_number); + of_prop->value = (unsigned char *)&of_prop[1]; + memcpy(of_prop->value, &phb->global_number, sizeof(phb->global_number)); + prom_add_property(dev, of_prop); +} + +static struct pci_controller * __init alloc_phb(struct device_node *dev, + unsigned int addr_size_words) +{ + struct pci_controller *phb; + struct property *of_prop; + + phb = alloc_bootmem(sizeof(struct pci_controller)); + if (phb == NULL) + return NULL; + + of_prop = alloc_bootmem(sizeof(struct property) + + sizeof(phb->global_number)); + if (!of_prop) + return NULL; + + if (setup_phb(dev, phb, addr_size_words)) + return NULL; + + add_linux_pci_domain(dev, phb, of_prop); + + return phb; +} + +static struct pci_controller * __devinit alloc_phb_dynamic(struct device_node *dev, unsigned int addr_size_words) +{ + struct pci_controller *phb; + + phb = (struct pci_controller *)kmalloc(sizeof(struct pci_controller), + GFP_KERNEL); + if (phb == NULL) + return NULL; + + if (setup_phb(dev, phb, addr_size_words)) + return NULL; + + phb->is_dynamic = 1; + + /* TODO: linux,pci-domain? */ + + return phb; +} + +unsigned long __init find_and_init_phbs(void) +{ + struct device_node *node; + struct pci_controller *phb; + unsigned int root_size_cells = 0; + unsigned int index; + unsigned int *opprop = NULL; + struct device_node *root = of_find_node_by_path("/"); + + if (ppc64_interrupt_controller == IC_OPEN_PIC) { + opprop = (unsigned int *)get_property(root, + "platform-open-pic", NULL); + } + + root_size_cells = prom_n_size_cells(root); + + index = 0; + + for (node = of_get_next_child(root, NULL); + node != NULL; + node = of_get_next_child(root, node)) { + if (node->type == NULL || strcmp(node->type, "pci") != 0) + continue; + + phb = alloc_phb(node, root_size_cells); + if (!phb) + continue; + + pci_process_bridge_OF_ranges(phb, node); + pci_setup_phb_io(phb, index == 0); +#ifdef CONFIG_PPC_PSERIES + if (ppc64_interrupt_controller == IC_OPEN_PIC && pSeries_mpic) { + int addr = root_size_cells * (index + 2) - 1; + mpic_assign_isu(pSeries_mpic, index, opprop[addr]); + } +#endif + index++; + } + + of_node_put(root); + pci_devs_phb_init(); + + /* + * pci_probe_only and pci_assign_all_buses can be set via properties + * in chosen. + */ + if (of_chosen) { + int *prop; + + prop = (int *)get_property(of_chosen, "linux,pci-probe-only", + NULL); + if (prop) + pci_probe_only = *prop; + + prop = (int *)get_property(of_chosen, + "linux,pci-assign-all-buses", NULL); + if (prop) + pci_assign_all_buses = *prop; + } + + return 0; +} + +struct pci_controller * __devinit init_phb_dynamic(struct device_node *dn) +{ + struct device_node *root = of_find_node_by_path("/"); + unsigned int root_size_cells = 0; + struct pci_controller *phb; + struct pci_bus *bus; + int primary; + + root_size_cells = prom_n_size_cells(root); + + primary = list_empty(&hose_list); + phb = alloc_phb_dynamic(dn, root_size_cells); + if (!phb) + return NULL; + + pci_process_bridge_OF_ranges(phb, dn); + + pci_setup_phb_io_dynamic(phb, primary); + of_node_put(root); + + pci_devs_phb_init_dynamic(phb); + phb->last_busno = 0xff; + bus = pci_scan_bus(phb->first_busno, phb->ops, phb->arch_data); + phb->bus = bus; + phb->last_busno = bus->subordinate; + + return phb; +} +EXPORT_SYMBOL(init_phb_dynamic); + +/* RPA-specific bits for removing PHBs */ +int pcibios_remove_root_bus(struct pci_controller *phb) +{ + struct pci_bus *b = phb->bus; + struct resource *res; + int rc, i; + + res = b->resource[0]; + if (!res->flags) { + printk(KERN_ERR "%s: no IO resource for PHB %s\n", __FUNCTION__, + b->name); + return 1; + } + + rc = unmap_bus_range(b); + if (rc) { + printk(KERN_ERR "%s: failed to unmap IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + if (release_resource(res)) { + printk(KERN_ERR "%s: failed to release IO on bus %s\n", + __FUNCTION__, b->name); + return 1; + } + + for (i = 1; i < 3; ++i) { + res = b->resource[i]; + if (!res->flags && i == 0) { + printk(KERN_ERR "%s: no MEM resource for PHB %s\n", + __FUNCTION__, b->name); + return 1; + } + if (res->flags && release_resource(res)) { + printk(KERN_ERR + "%s: failed to release IO %d on bus %s\n", + __FUNCTION__, i, b->name); + return 1; + } + } + + list_del(&phb->list_node); + if (phb->is_dynamic) + kfree(phb); + + return 0; +} +EXPORT_SYMBOL(pcibios_remove_root_bus); diff --git a/arch/ppc64/kernel/rtc.c b/arch/ppc64/kernel/rtc.c index de02aedbe08..d729fefa0df 100644 --- a/arch/ppc64/kernel/rtc.c +++ b/arch/ppc64/kernel/rtc.c @@ -301,7 +301,7 @@ void iSeries_get_boot_time(struct rtc_time *tm) #ifdef CONFIG_PPC_RTAS #define MAX_RTC_WAIT 5000 /* 5 sec */ #define RTAS_CLOCK_BUSY (-2) -void pSeries_get_boot_time(struct rtc_time *rtc_tm) +void rtas_get_boot_time(struct rtc_time *rtc_tm) { int ret[8]; int error, wait_time; @@ -336,7 +336,7 @@ void pSeries_get_boot_time(struct rtc_time *rtc_tm) * and if a delay is needed to read the clock. In this case we just * silently return without updating rtc_tm. */ -void pSeries_get_rtc_time(struct rtc_time *rtc_tm) +void rtas_get_rtc_time(struct rtc_time *rtc_tm) { int ret[8]; int error, wait_time; @@ -371,7 +371,7 @@ void pSeries_get_rtc_time(struct rtc_time *rtc_tm) rtc_tm->tm_year = ret[0] - 1900; } -int pSeries_set_rtc_time(struct rtc_time *tm) +int rtas_set_rtc_time(struct rtc_time *tm) { int error, wait_time; unsigned long max_wait_tb; diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index 8e439a81764..0a47a5ef428 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -344,6 +344,7 @@ static void __init setup_cpu_maps(void) extern struct machdep_calls pSeries_md; extern struct machdep_calls pmac_md; extern struct machdep_calls maple_md; +extern struct machdep_calls bpa_md; /* Ultimately, stuff them in an elf section like initcalls... */ static struct machdep_calls __initdata *machines[] = { @@ -356,6 +357,9 @@ static struct machdep_calls __initdata *machines[] = { #ifdef CONFIG_PPC_MAPLE &maple_md, #endif /* CONFIG_PPC_MAPLE */ +#ifdef CONFIG_PPC_BPA + &bpa_md, +#endif NULL }; @@ -679,6 +683,12 @@ void machine_restart(char *cmd) if (ppc_md.nvram_sync) ppc_md.nvram_sync(); ppc_md.restart(cmd); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; } EXPORT_SYMBOL(machine_restart); @@ -688,6 +698,12 @@ void machine_power_off(void) if (ppc_md.nvram_sync) ppc_md.nvram_sync(); ppc_md.power_off(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; } EXPORT_SYMBOL(machine_power_off); @@ -697,13 +713,16 @@ void machine_halt(void) if (ppc_md.nvram_sync) ppc_md.nvram_sync(); ppc_md.halt(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif + printk(KERN_EMERG "System Halted, OK to turn off power\n"); + local_irq_disable(); + while (1) ; } EXPORT_SYMBOL(machine_halt); -unsigned long ppc_proc_freq; -unsigned long ppc_tb_freq; - static int ppc64_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -1055,6 +1074,7 @@ void __init setup_arch(char **cmdline_p) /* set up the bootmem stuff with available memory */ do_init_bootmem(); + sparse_init(); /* initialize the syscall map in systemcfg */ setup_syscall_map(); @@ -1079,11 +1099,11 @@ void __init setup_arch(char **cmdline_p) static void ppc64_do_msg(unsigned int src, const char *msg) { if (ppc_md.progress) { - char buf[32]; + char buf[128]; - sprintf(buf, "%08x \n", src); + sprintf(buf, "%08X\n", src); ppc_md.progress(buf, 0); - sprintf(buf, "%-16s", msg); + snprintf(buf, 128, "%s", msg); ppc_md.progress(buf, 0); } } @@ -1117,7 +1137,7 @@ void ppc64_dump_msg(unsigned int src, const char *msg) } /* This should only be called on processor 0 during calibrate decr */ -void setup_default_decr(void) +void __init setup_default_decr(void) { struct paca_struct *lpaca = get_paca(); diff --git a/arch/ppc64/kernel/smp.c b/arch/ppc64/kernel/smp.c index 9ef5d36d6b2..2fcddfcb594 100644 --- a/arch/ppc64/kernel/smp.c +++ b/arch/ppc64/kernel/smp.c @@ -71,7 +71,7 @@ void smp_call_function_interrupt(void); int smt_enabled_at_boot = 1; -#ifdef CONFIG_PPC_MULTIPLATFORM +#ifdef CONFIG_MPIC void smp_mpic_message_pass(int target, int msg) { /* make sure we're sending something that translates to an IPI */ @@ -128,7 +128,7 @@ void __devinit smp_generic_kick_cpu(int nr) smp_mb(); } -#endif /* CONFIG_PPC_MULTIPLATFORM */ +#endif /* CONFIG_MPIC */ static void __init smp_space_timers(unsigned int max_cpus) { diff --git a/arch/ppc64/kernel/spider-pic.c b/arch/ppc64/kernel/spider-pic.c new file mode 100644 index 00000000000..d5c9a02fb11 --- /dev/null +++ b/arch/ppc64/kernel/spider-pic.c @@ -0,0 +1,191 @@ +/* + * External Interrupt Controller on Spider South Bridge + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Arnd Bergmann <arndb@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> + +#include <asm/pgtable.h> +#include <asm/prom.h> +#include <asm/io.h> + +#include "bpa_iic.h" + +/* register layout taken from Spider spec, table 7.4-4 */ +enum { + TIR_DEN = 0x004, /* Detection Enable Register */ + TIR_MSK = 0x084, /* Mask Level Register */ + TIR_EDC = 0x0c0, /* Edge Detection Clear Register */ + TIR_PNDA = 0x100, /* Pending Register A */ + TIR_PNDB = 0x104, /* Pending Register B */ + TIR_CS = 0x144, /* Current Status Register */ + TIR_LCSA = 0x150, /* Level Current Status Register A */ + TIR_LCSB = 0x154, /* Level Current Status Register B */ + TIR_LCSC = 0x158, /* Level Current Status Register C */ + TIR_LCSD = 0x15c, /* Level Current Status Register D */ + TIR_CFGA = 0x200, /* Setting Register A0 */ + TIR_CFGB = 0x204, /* Setting Register B0 */ + /* 0x208 ... 0x3ff Setting Register An/Bn */ + TIR_PPNDA = 0x400, /* Packet Pending Register A */ + TIR_PPNDB = 0x404, /* Packet Pending Register B */ + TIR_PIERA = 0x408, /* Packet Output Error Register A */ + TIR_PIERB = 0x40c, /* Packet Output Error Register B */ + TIR_PIEN = 0x444, /* Packet Output Enable Register */ + TIR_PIPND = 0x454, /* Packet Output Pending Register */ + TIRDID = 0x484, /* Spider Device ID Register */ + REISTIM = 0x500, /* Reissue Command Timeout Time Setting */ + REISTIMEN = 0x504, /* Reissue Command Timeout Setting */ + REISWAITEN = 0x508, /* Reissue Wait Control*/ +}; + +static void __iomem *spider_pics[4]; + +static void __iomem *spider_get_pic(int irq) +{ + int node = irq / IIC_NODE_STRIDE; + irq %= IIC_NODE_STRIDE; + + if (irq >= IIC_EXT_OFFSET && + irq < IIC_EXT_OFFSET + IIC_NUM_EXT && + spider_pics) + return spider_pics[node]; + return NULL; +} + +static int spider_get_nr(unsigned int irq) +{ + return (irq % IIC_NODE_STRIDE) - IIC_EXT_OFFSET; +} + +static void __iomem *spider_get_irq_config(int irq) +{ + void __iomem *pic; + pic = spider_get_pic(irq); + return pic + TIR_CFGA + 8 * spider_get_nr(irq); +} + +static void spider_enable_irq(unsigned int irq) +{ + void __iomem *cfg = spider_get_irq_config(irq); + irq = spider_get_nr(irq); + + out_be32(cfg, in_be32(cfg) | 0x3107000eu); + out_be32(cfg + 4, in_be32(cfg + 4) | 0x00020000u | irq); +} + +static void spider_disable_irq(unsigned int irq) +{ + void __iomem *cfg = spider_get_irq_config(irq); + irq = spider_get_nr(irq); + + out_be32(cfg, in_be32(cfg) & ~0x30000000u); +} + +static unsigned int spider_startup_irq(unsigned int irq) +{ + spider_enable_irq(irq); + return 0; +} + +static void spider_shutdown_irq(unsigned int irq) +{ + spider_disable_irq(irq); +} + +static void spider_end_irq(unsigned int irq) +{ + spider_enable_irq(irq); +} + +static void spider_ack_irq(unsigned int irq) +{ + spider_disable_irq(irq); + iic_local_enable(); +} + +static struct hw_interrupt_type spider_pic = { + .typename = " SPIDER ", + .startup = spider_startup_irq, + .shutdown = spider_shutdown_irq, + .enable = spider_enable_irq, + .disable = spider_disable_irq, + .ack = spider_ack_irq, + .end = spider_end_irq, +}; + + +int spider_get_irq(unsigned long int_pending) +{ + void __iomem *regs = spider_get_pic(int_pending); + unsigned long cs; + int irq; + + cs = in_be32(regs + TIR_CS); + + irq = cs >> 24; + if (irq != 63) + return irq; + + return -1; +} + +void spider_init_IRQ(void) +{ + int node; + struct device_node *dn; + unsigned int *property; + long spiderpic; + int n; + +/* FIXME: detect multiple PICs as soon as the device tree has them */ + for (node = 0; node < 1; node++) { + dn = of_find_node_by_path("/"); + n = prom_n_addr_cells(dn); + property = (unsigned int *) get_property(dn, + "platform-spider-pic", NULL); + + if (!property) + continue; + for (spiderpic = 0; n > 0; --n) + spiderpic = (spiderpic << 32) + *property++; + printk(KERN_DEBUG "SPIDER addr: %lx\n", spiderpic); + spider_pics[node] = __ioremap(spiderpic, 0x800, _PAGE_NO_CACHE); + for (n = 0; n < IIC_NUM_EXT; n++) { + int irq = n + IIC_EXT_OFFSET + node * IIC_NODE_STRIDE; + get_irq_desc(irq)->handler = &spider_pic; + + /* do not mask any interrupts because of level */ + out_be32(spider_pics[node] + TIR_MSK, 0x0); + + /* disable edge detection clear */ + /* out_be32(spider_pics[node] + TIR_EDC, 0x0); */ + + /* enable interrupt packets to be output */ + out_be32(spider_pics[node] + TIR_PIEN, + in_be32(spider_pics[node] + TIR_PIEN) | 0x1); + + /* Enable the interrupt detection enable bit. Do this last! */ + out_be32(spider_pics[node] + TIR_DEN, + in_be32(spider_pics[node] +TIR_DEN) | 0x1); + + } + } +} diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 33364a7d2cd..2348a75e050 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -107,6 +107,9 @@ void ppc_adjtimex(void); static unsigned adjusting_time = 0; +unsigned long ppc_proc_freq; +unsigned long ppc_tb_freq; + static __inline__ void timer_check_rtc(void) { /* @@ -472,6 +475,66 @@ int do_settimeofday(struct timespec *tv) EXPORT_SYMBOL(do_settimeofday); +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_MAPLE) || defined(CONFIG_PPC_BPA) +void __init generic_calibrate_decr(void) +{ + struct device_node *cpu; + struct div_result divres; + unsigned int *fp; + int node_found; + + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = of_find_node_by_type(NULL, "cpu"); + + ppc_tb_freq = DEFAULT_TB_FREQ; /* hardcoded default */ + node_found = 0; + if (cpu != 0) { + fp = (unsigned int *)get_property(cpu, "timebase-frequency", + NULL); + if (fp != 0) { + node_found = 1; + ppc_tb_freq = *fp; + } + } + if (!node_found) + printk(KERN_ERR "WARNING: Estimating decrementer frequency " + "(not found)\n"); + + ppc_proc_freq = DEFAULT_PROC_FREQ; + node_found = 0; + if (cpu != 0) { + fp = (unsigned int *)get_property(cpu, "clock-frequency", + NULL); + if (fp != 0) { + node_found = 1; + ppc_proc_freq = *fp; + } + } + if (!node_found) + printk(KERN_ERR "WARNING: Estimating processor frequency " + "(not found)\n"); + + of_node_put(cpu); + + printk(KERN_INFO "time_init: decrementer frequency = %lu.%.6lu MHz\n", + ppc_tb_freq/1000000, ppc_tb_freq%1000000); + printk(KERN_INFO "time_init: processor frequency = %lu.%.6lu MHz\n", + ppc_proc_freq/1000000, ppc_proc_freq%1000000); + + tb_ticks_per_jiffy = ppc_tb_freq / HZ; + tb_ticks_per_sec = tb_ticks_per_jiffy * HZ; + tb_ticks_per_usec = ppc_tb_freq / 1000000; + tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000); + div128_by_32(1024*1024, 0, tb_ticks_per_sec, &divres); + tb_to_xs = divres.result_low; + + setup_default_decr(); +} +#endif + void __init time_init(void) { /* This function is only called on the boot processor */ diff --git a/arch/ppc64/kernel/traps.c b/arch/ppc64/kernel/traps.c index 7e52cb2605e..a8d5e83ee89 100644 --- a/arch/ppc64/kernel/traps.c +++ b/arch/ppc64/kernel/traps.c @@ -126,6 +126,10 @@ int die(const char *str, struct pt_regs *regs, long err) printk("POWERMAC "); nl = 1; break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; } if (nl) printk("\n"); diff --git a/arch/ppc64/mm/Makefile b/arch/ppc64/mm/Makefile index ac522d57b2a..3695d00d347 100644 --- a/arch/ppc64/mm/Makefile +++ b/arch/ppc64/mm/Makefile @@ -6,6 +6,6 @@ EXTRA_CFLAGS += -mno-minimal-toc obj-y := fault.o init.o imalloc.o hash_utils.o hash_low.o tlb.o \ slb_low.o slb.o stab.o mmap.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PPC_MULTIPLATFORM) += hash_native.o diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 6fa1e6490b5..b50b3a446db 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -98,7 +98,7 @@ void show_mem(void) printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); total++; if (PageReserved(page)) reserved++; @@ -531,7 +531,7 @@ EXPORT_SYMBOL(page_is_ram); * Initialize the bootmem system and give it all the memory we * have available. */ -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NEED_MULTIPLE_NODES void __init do_init_bootmem(void) { unsigned long i; @@ -553,12 +553,20 @@ void __init do_init_bootmem(void) max_pfn = max_low_pfn; - /* add all physical memory to the bootmem map. Also find the first */ + /* Add all physical memory to the bootmem map, mark each area + * present. + */ for (i=0; i < lmb.memory.cnt; i++) { unsigned long physbase, size; + unsigned long start_pfn, end_pfn; physbase = lmb.memory.region[i].physbase; size = lmb.memory.region[i].size; + + start_pfn = physbase >> PAGE_SHIFT; + end_pfn = start_pfn + (size >> PAGE_SHIFT); + memory_present(0, start_pfn, end_pfn); + free_bootmem(physbase, size); } @@ -597,7 +605,7 @@ void __init paging_init(void) free_area_init_node(0, NODE_DATA(0), zones_size, __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); } -#endif /* CONFIG_DISCONTIGMEM */ +#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ static struct kcore_list kcore_vmem; @@ -628,7 +636,7 @@ module_init(setup_kcore); void __init mem_init(void) { -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES int nid; #endif pg_data_t *pgdat; @@ -639,7 +647,7 @@ void __init mem_init(void) num_physpages = max_low_pfn; /* RAM is assumed contiguous */ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NEED_MULTIPLE_NODES for_each_online_node(nid) { if (NODE_DATA(nid)->node_spanned_pages != 0) { printk("freeing bootmem node %x\n", nid); @@ -654,7 +662,7 @@ void __init mem_init(void) for_each_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; i++) { - page = pgdat->node_mem_map + i; + page = pgdat_page_nr(pgdat, i); if (PageReserved(page)) reservedpages++; } diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index ea862ec643d..cafd91aef28 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -440,6 +440,8 @@ new_range: for (i = start ; i < (start+size); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain; + memory_present(numa_domain, start >> PAGE_SHIFT, + (start + size) >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -481,6 +483,7 @@ static void __init setup_nonnuma(void) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; + memory_present(0, 0, init_node_data[0].node_end_pfn); } static void __init dump_numa_topology(void) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ab79af84699..32696c1d928 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -226,6 +226,8 @@ config WARN_STACK_SIZE This allows you to specify the maximum frame size a function may have without the compiler complaining about it. +source "mm/Kconfig" + comment "I/O subsystem configuration" config MACHCHK_WARNING diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index 278a8139cb1..d4026f62cb0 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h index bf33dcfec7d..3898f66d0b2 100644 --- a/arch/s390/kernel/compat_linux.h +++ b/arch/s390/kernel/compat_linux.h @@ -45,7 +45,7 @@ typedef struct compat_siginfo { /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 3468d512722..a7c8bfc1160 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -486,7 +486,7 @@ config CPU_SUBTYPE_ST40 depends on CPU_SUBTYPE_ST40STB1 || CPU_SUBTYPE_ST40GX1 default y -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on SH_HP690 default y @@ -496,6 +496,8 @@ config DISCONTIGMEM or have huge holes in the physical address space for other reasons. See <file:Documentation/vm/numa> for more. +source "mm/Kconfig" + config ZERO_PAGE_OFFSET hex "Zero page offset" default "0x00001000" if !(SH_MPC1211 || SH_SH03) diff --git a/arch/sh64/Kconfig b/arch/sh64/Kconfig index 76eb81fba45..708e59736a4 100644 --- a/arch/sh64/Kconfig +++ b/arch/sh64/Kconfig @@ -217,6 +217,8 @@ config PREEMPT bool "Preemptible Kernel (EXPERIMENTAL)" depends on EXPERIMENTAL +source "mm/Kconfig" + endmenu menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)" diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 237f922520f..262e13d086f 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -264,7 +264,11 @@ config SUNOS_EMUL want to run SunOS binaries on an Ultra you must also say Y to "Kernel support for 32-bit a.out binaries" above. -source "drivers/parport/Kconfig" +source "mm/Kconfig" + +endmenu + +source "drivers/Kconfig" config PRINTER tristate "Parallel printer support" @@ -291,6 +295,8 @@ config PRINTER If you have more than 8 printers, you need to increase the LP_NO macro in lp.c and the PARPORT_MAX macro in parport.h. +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" @@ -372,18 +378,8 @@ config UNIX98_PTY_COUNT endmenu -source "drivers/input/Kconfig" - source "fs/Kconfig" -source "sound/Kconfig" - -source "drivers/usb/Kconfig" - -source "drivers/infiniband/Kconfig" - -source "drivers/char/watchdog/Kconfig" - source "arch/sparc/Kconfig.debug" source "security/Kconfig" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index a72fd15d5ea..e2b050eb3b9 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -484,6 +484,8 @@ config CMDLINE NOTE: This option WILL override the PROM bootargs setting! +source "mm/Kconfig" + endmenu source "drivers/base/Kconfig" diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c index 7066d7ba667..bdac631cf01 100644 --- a/arch/sparc64/kernel/kprobes.c +++ b/arch/sparc64/kernel/kprobes.c @@ -6,7 +6,6 @@ #include <linux/config.h> #include <linux/kernel.h> #include <linux/kprobes.h> - #include <asm/kdebug.h> #include <asm/signal.h> @@ -47,25 +46,59 @@ void arch_copy_kprobe(struct kprobe *p) { p->ainsn.insn[0] = *p->addr; p->ainsn.insn[1] = BREAKPOINT_INSTRUCTION_2; + p->opcode = *p->addr; } -void arch_remove_kprobe(struct kprobe *p) +void arch_arm_kprobe(struct kprobe *p) { + *p->addr = BREAKPOINT_INSTRUCTION; + flushi(p->addr); } -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flushi(p->addr); +} + +void arch_remove_kprobe(struct kprobe *p) +{ +} static struct kprobe *current_kprobe; static unsigned long current_kprobe_orig_tnpc; static unsigned long current_kprobe_orig_tstate_pil; static unsigned int kprobe_status; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_orig_tnpc_prev; +static unsigned long kprobe_orig_tstate_pil_prev; +static unsigned int kprobe_status_prev; -static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) +{ + kprobe_status_prev = kprobe_status; + kprobe_orig_tnpc_prev = current_kprobe_orig_tnpc; + kprobe_orig_tstate_pil_prev = current_kprobe_orig_tstate_pil; + kprobe_prev = current_kprobe; +} + +static inline void restore_previous_kprobe(void) +{ + kprobe_status = kprobe_status_prev; + current_kprobe_orig_tnpc = kprobe_orig_tnpc_prev; + current_kprobe_orig_tstate_pil = kprobe_orig_tstate_pil_prev; + current_kprobe = kprobe_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) { current_kprobe_orig_tnpc = regs->tnpc; current_kprobe_orig_tstate_pil = (regs->tstate & TSTATE_PIL); + current_kprobe = p; +} + +static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) +{ regs->tstate |= TSTATE_PIL; /*single step inline, if it a breakpoint instruction*/ @@ -78,17 +111,6 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) } } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) -{ - *p->addr = p->opcode; - flushi(p->addr); - - regs->tpc = (unsigned long) p->addr; - regs->tnpc = current_kprobe_orig_tnpc; - regs->tstate = ((regs->tstate & ~TSTATE_PIL) | - current_kprobe_orig_tstate_pil); -} - static int kprobe_handler(struct pt_regs *regs) { struct kprobe *p; @@ -109,8 +131,18 @@ static int kprobe_handler(struct pt_regs *regs) unlock_kprobes(); goto no_kprobe; } - disarm_kprobe(p, regs); - ret = 1; + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + kprobe_status = KPROBE_REENTER; + prepare_singlestep(p, regs); + return 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) @@ -138,8 +170,8 @@ static int kprobe_handler(struct pt_regs *regs) goto no_kprobe; } + set_current_kprobe(p, regs); kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; if (p->pre_handler && p->pre_handler(p, regs)) return 1; @@ -245,12 +277,20 @@ static inline int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } resume_execution(current_kprobe, regs); + /*Restore back the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } unlock_kprobes(); +out: preempt_enable_no_resched(); return 1; @@ -392,3 +432,4 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) } return 0; } + diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c index 9a375e975cf..f28428f4170 100644 --- a/arch/sparc64/kernel/signal32.c +++ b/arch/sparc64/kernel/signal32.c @@ -102,7 +102,7 @@ typedef struct compat_siginfo{ /* POSIX.1b timers */ struct { - timer_t _tid; /* timer id */ + compat_timer_t _tid; /* timer id */ int _overrun; /* overrun count */ compat_sigval_t _sigval; /* same as below */ int _sys_private; /* not to be passed to user */ diff --git a/arch/um/Kconfig b/arch/um/Kconfig index b8e952c88fd..9469e77303e 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -74,6 +74,7 @@ config MODE_SKAS option will shrink the UML binary slightly. source "arch/um/Kconfig_arch" +source "mm/Kconfig" config LD_SCRIPT_STATIC bool diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 804c6bbdf67..157584ae479 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -8,6 +8,7 @@ #include "linux/kernel.h" #include "linux/sched.h" #include "linux/interrupt.h" +#include "linux/string.h" #include "linux/mm.h" #include "linux/slab.h" #include "linux/utsname.h" @@ -322,12 +323,7 @@ void do_uml_exitcalls(void) char *uml_strdup(char *string) { - char *new; - - new = kmalloc(strlen(string) + 1, GFP_KERNEL); - if(new == NULL) return(NULL); - strcpy(new, string); - return(new); + return kstrdup(string, GFP_KERNEL); } int copy_to_user_proc(void __user *to, void *from, int size) diff --git a/arch/v850/Kconfig b/arch/v850/Kconfig index 90cd4baa75e..27febd6ffa8 100644 --- a/arch/v850/Kconfig +++ b/arch/v850/Kconfig @@ -218,6 +218,8 @@ menu "Processor type and features" a lot of RAM, and you need to able to allocate very large contiguous chunks. If unsure, say N. +source "mm/Kconfig" + endmenu diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 289f448ac89..db259757dc8 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -265,7 +265,7 @@ config NUMA_EMU into virtual nodes when booted with "numa=fake=N", where N is the number of nodes. This is only useful for debugging. -config DISCONTIGMEM +config ARCH_DISCONTIGMEM_ENABLE bool depends on NUMA default y @@ -274,6 +274,27 @@ config NUMA bool default n +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on NUMA + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on NUMA + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on !NUMA + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + config HAVE_DEC_LOCK bool depends on SMP @@ -381,6 +402,8 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source kernel/Kconfig.hz + endmenu # diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh index 90f2452b3b9..f17b40dfc0f 100644 --- a/arch/x86_64/boot/install.sh +++ b/arch/x86_64/boot/install.sh @@ -21,8 +21,8 @@ # User may have a custom install script -if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi -if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi +if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi +if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi # Default install - same as make zlilo diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index fbd09b5126c..66e2821533d 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c @@ -428,8 +428,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) return (void __user *)((rsp - frame_size) & -8UL); } -void ia32_setup_frame(int sig, struct k_sigaction *ka, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_frame(int sig, struct k_sigaction *ka, + compat_sigset_t *set, struct pt_regs * regs) { struct sigframe __user *frame; int err = 0; @@ -514,14 +514,15 @@ void ia32_setup_frame(int sig, struct k_sigaction *ka, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - compat_sigset_t *set, struct pt_regs * regs) +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + compat_sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; int err = 0; @@ -613,9 +614,9 @@ void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } - diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 504e6347499..c9a6b812e92 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -40,11 +40,7 @@ int fix_aperture __initdata = 1; static u32 __init allocate_aperture(void) { -#ifdef CONFIG_DISCONTIGMEM pg_data_t *nd0 = NODE_DATA(0); -#else - pg_data_t *nd0 = &contig_page_data; -#endif u32 aper_size; void *p; diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index e3a19e8ebbf..9631c747c5e 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -2,20 +2,24 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/string.h> +#include <linux/tty.h> #include <asm/io.h> #include <asm/processor.h> /* Simple VGA output */ #ifdef __i386__ +#include <asm/setup.h> #define VGABASE (__ISA_IO_base + 0xb8000) #else +#include <asm/bootsetup.h> #define VGABASE ((void __iomem *)0xffffffff800b8000UL) #endif -#define MAX_YPOS 25 -#define MAX_XPOS 80 +#define MAX_YPOS max_ypos +#define MAX_XPOS max_xpos +static int max_ypos = 25, max_xpos = 80; static int current_ypos = 1, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) @@ -196,7 +200,10 @@ int __init setup_early_printk(char *opt) } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); early_console = &early_serial_console; - } else if (!strncmp(buf, "vga", 3)) { + } else if (!strncmp(buf, "vga", 3) + && SCREEN_INFO.orig_video_isVGA == 1) { + max_xpos = SCREEN_INFO.orig_video_cols; + max_ypos = SCREEN_INFO.orig_video_lines; early_console = &early_vga_console; } early_console_initialized = 1; diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 0f8c78dcd38..cf6ab147a2a 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c @@ -94,7 +94,7 @@ void __init x86_64_start_kernel(char * real_mode_data) s = strstr(saved_command_line, "earlyprintk="); if (s != NULL) setup_early_printk(s); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA s = strstr(saved_command_line, "numa="); if (s != NULL) numa_setup(s+5); diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index 7873d9ba881..19eafa0aa95 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c @@ -157,14 +157,13 @@ static unsigned int startup_8259A_irq(unsigned int irq) } static struct hw_interrupt_type i8259A_irq_type = { - "XT-PIC", - startup_8259A_irq, - shutdown_8259A_irq, - enable_8259A_irq, - disable_8259A_irq, - mask_and_ack_8259A, - end_8259A_irq, - NULL + .typename = "XT-PIC", + .startup = startup_8259A_irq, + .shutdown = shutdown_8259A_irq, + .enable = enable_8259A_irq, + .disable = disable_8259A_irq, + .ack = mask_and_ack_8259A, + .end = end_8259A_irq, }; /* diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index f77f8a0ff18..4e680f87a75 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -27,6 +27,8 @@ * <prasanna@in.ibm.com> adapted for x86_64 * 2005-Mar Roland McGrath <roland@redhat.com> * Fixed to handle %rip-relative addressing mode correctly. + * 2005-May Rusty Lynch <rusty.lynch@intel.com> + * Added function return probes functionality */ #include <linux/config.h> @@ -37,18 +39,16 @@ #include <linux/slab.h> #include <linux/preempt.h> #include <linux/moduleloader.h> - +#include <asm/cacheflush.h> #include <asm/pgtable.h> #include <asm/kdebug.h> static DECLARE_MUTEX(kprobe_mutex); -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - static struct kprobe *current_kprobe; static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; +static struct kprobe *kprobe_prev; +static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; static struct pt_regs jprobe_saved_regs; static long *jprobe_saved_rsp; static kprobe_opcode_t *get_insn_slot(void); @@ -214,6 +214,21 @@ void arch_copy_kprobe(struct kprobe *p) BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ *ripdisp = disp; } + p->opcode = *p->addr; +} + +void arch_arm_kprobe(struct kprobe *p) +{ + *p->addr = BREAKPOINT_INSTRUCTION; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); +} + +void arch_disarm_kprobe(struct kprobe *p) +{ + *p->addr = p->opcode; + flush_icache_range((unsigned long) p->addr, + (unsigned long) p->addr + sizeof(kprobe_opcode_t)); } void arch_remove_kprobe(struct kprobe *p) @@ -223,10 +238,29 @@ void arch_remove_kprobe(struct kprobe *p) down(&kprobe_mutex); } -static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void save_previous_kprobe(void) { - *p->addr = p->opcode; - regs->rip = (unsigned long)p->addr; + kprobe_prev = current_kprobe; + kprobe_status_prev = kprobe_status; + kprobe_old_rflags_prev = kprobe_old_rflags; + kprobe_saved_rflags_prev = kprobe_saved_rflags; +} + +static inline void restore_previous_kprobe(void) +{ + current_kprobe = kprobe_prev; + kprobe_status = kprobe_status_prev; + kprobe_old_rflags = kprobe_old_rflags_prev; + kprobe_saved_rflags = kprobe_saved_rflags_prev; +} + +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + current_kprobe = p; + kprobe_saved_rflags = kprobe_old_rflags + = (regs->eflags & (TF_MASK | IF_MASK)); + if (is_IF_modifier(p->ainsn.insn)) + kprobe_saved_rflags &= ~IF_MASK; } static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -240,6 +274,50 @@ static void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->rip = (unsigned long)p->ainsn.insn; } +struct task_struct *arch_get_kprobe_task(void *ptr) +{ + return ((struct thread_info *) (((unsigned long) ptr) & + (~(THREAD_SIZE -1))))->task; +} + +void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) +{ + unsigned long *sara = (unsigned long *)regs->rsp; + struct kretprobe_instance *ri; + static void *orig_ret_addr; + + /* + * Save the return address when the return probe hits + * the first time, and use it to populate the (krprobe + * instance)->ret_addr for subsequent return probes at + * the same addrress since stack address would have + * the kretprobe_trampoline by then. + */ + if (((void*) *sara) != kretprobe_trampoline) + orig_ret_addr = (void*) *sara; + + if ((ri = get_free_rp_inst(rp)) != NULL) { + ri->rp = rp; + ri->stack_addr = sara; + ri->ret_addr = orig_ret_addr; + add_rp_inst(ri); + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; + } else { + rp->nmissed++; + } +} + +void arch_kprobe_flush_task(struct task_struct *tk) +{ + struct kretprobe_instance *ri; + while ((ri = get_rp_inst_tsk(tk)) != NULL) { + *((unsigned long *)(ri->stack_addr)) = + (unsigned long) ri->ret_addr; + recycle_rp_inst(ri); + } +} + /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled thorough out this function. @@ -264,9 +342,30 @@ int kprobe_handler(struct pt_regs *regs) regs->eflags |= kprobe_saved_rflags; unlock_kprobes(); goto no_kprobe; + } else if (kprobe_status == KPROBE_HIT_SSDONE) { + /* TODO: Provide re-entrancy from + * post_kprobes_handler() and avoid exception + * stack corruption while single-stepping on + * the instruction of the new probe. + */ + arch_disarm_kprobe(p); + regs->rip = (unsigned long)p->addr; + ret = 1; + } else { + /* We have reentered the kprobe_handler(), since + * another probe was hit while within the + * handler. We here save the original kprobe + * variables and just single step on instruction + * of the new probe without calling any user + * handlers. + */ + save_previous_kprobe(); + set_current_kprobe(p, regs); + p->nmissed++; + prepare_singlestep(p, regs); + kprobe_status = KPROBE_REENTER; + return 1; } - disarm_kprobe(p, regs); - ret = 1; } else { p = current_kprobe; if (p->break_handler && p->break_handler(p, regs)) { @@ -296,11 +395,7 @@ int kprobe_handler(struct pt_regs *regs) } kprobe_status = KPROBE_HIT_ACTIVE; - current_kprobe = p; - kprobe_saved_rflags = kprobe_old_rflags - = (regs->eflags & (TF_MASK | IF_MASK)); - if (is_IF_modifier(p->ainsn.insn)) - kprobe_saved_rflags &= ~IF_MASK; + set_current_kprobe(p, regs); if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -317,6 +412,55 @@ no_kprobe: } /* + * For function-return probes, init_kprobes() establishes a probepoint + * here. When a retprobed function returns, this probe is hit and + * trampoline_probe_handler() runs, calling the kretprobe's handler. + */ + void kretprobe_trampoline_holder(void) + { + asm volatile ( ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" + "nop\n"); + } + +/* + * Called when we hit the probe point at kretprobe_trampoline + */ +int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct task_struct *tsk; + struct kretprobe_instance *ri; + struct hlist_head *head; + struct hlist_node *node; + unsigned long *sara = (unsigned long *)regs->rsp - 1; + + tsk = arch_get_kprobe_task(sara); + head = kretprobe_inst_table_head(tsk); + + hlist_for_each_entry(ri, node, head, hlist) { + if (ri->stack_addr == sara && ri->rp) { + if (ri->rp->handler) + ri->rp->handler(ri, regs); + } + } + return 0; +} + +void trampoline_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + struct kretprobe_instance *ri; + /* RA already popped */ + unsigned long *sara = ((unsigned long *)regs->rsp) - 1; + + while ((ri = get_rp_inst(sara))) { + regs->rip = (unsigned long)ri->ret_addr; + recycle_rp_inst(ri); + } + regs->eflags &= ~TF_MASK; +} + +/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int 3" * instruction. To avoid the SMP problems that can occur when we @@ -401,13 +545,23 @@ int post_kprobe_handler(struct pt_regs *regs) if (!kprobe_running()) return 0; - if (current_kprobe->post_handler) + if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { + kprobe_status = KPROBE_HIT_SSDONE; current_kprobe->post_handler(current_kprobe, regs, 0); + } - resume_execution(current_kprobe, regs); + if (current_kprobe->post_handler != trampoline_post_handler) + resume_execution(current_kprobe, regs); regs->eflags |= kprobe_saved_rflags; - unlock_kprobes(); + /* Restore the original saved kprobes variables and continue. */ + if (kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(); + goto out; + } else { + unlock_kprobes(); + } +out: preempt_enable_no_resched(); /* diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 61a63be6b29..9c5aa2a790c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -23,6 +23,7 @@ #include <linux/kernel_stat.h> #include <linux/mc146818rtc.h> #include <linux/acpi.h> +#include <linux/module.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -45,7 +46,8 @@ int acpi_found_madt; int apic_version [MAX_APICS]; unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; -cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL }; +unsigned char pci_bus_to_node [256]; +EXPORT_SYMBOL(pci_bus_to_node); static int mp_current_pci_id = 0; /* I/O APIC entries */ @@ -904,11 +906,20 @@ void __init mp_config_acpi_legacy_irqs (void) return; } +#define MAX_GSI_NUM 4096 + int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) { int ioapic = -1; int ioapic_pin = 0; int idx, bit = 0; + static int pci_irq = 16; + /* + * Mapping between Global System Interrupts, which + * represent all possible interrupts, to the IRQs + * assigned to actual devices. + */ + static int gsi_to_irq[MAX_GSI_NUM]; if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) return gsi; @@ -943,11 +954,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); - return gsi; + return gsi_to_irq[gsi]; } mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit); + if (edge_level) { + /* + * For PCI devices assign IRQs in order, avoiding gaps + * due to unused I/O APIC pins. + */ + int irq = gsi; + gsi = pci_irq++; + gsi_to_irq[irq] = gsi; + } + io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, edge_level == ACPI_EDGE_SENSITIVE ? 0 : 1, active_high_low == ACPI_ACTIVE_HIGH ? 0 : 1); diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index dce8bab4306..e59d1f9d616 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -34,6 +34,7 @@ #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/random.h> +#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -293,6 +294,14 @@ void exit_thread(void) { struct task_struct *me = current; struct thread_struct *t = &me->thread; + + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(me); + if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); @@ -312,6 +321,13 @@ void flush_thread(void) struct task_struct *tsk = current; struct thread_info *t = current_thread_info(); + /* + * Remove function-return probe instances associated with this task + * and put them back on the free list. Do not insert an exit probe for + * this function, it will be disabled by kprobe_flush_task if you do. + */ + kprobe_flush_task(tsk); + if (t->flags & _TIF_ABI_PENDING) t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 99f038ede23..000015dd5a8 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -40,6 +40,8 @@ #include <linux/acpi.h> #include <linux/kallsyms.h> #include <linux/edd.h> +#include <linux/mmzone.h> + #include <asm/mtrr.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -348,7 +350,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) if (!memcmp(from, "mem=", 4)) parse_memopt(from+4, &from); -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA if (!memcmp(from, "numa=", 5)) numa_setup(from+5); #endif @@ -377,17 +379,20 @@ static __init void parse_cmdline_early (char ** cmdline_p) *cmdline_p = command_line; } -#ifndef CONFIG_DISCONTIGMEM -static void __init contig_initmem_init(void) +#ifndef CONFIG_NUMA +static void __init +contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) { - unsigned long bootmap_size, bootmap; - bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; - bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); - if (bootmap == -1L) - panic("Cannot find bootmem map of size %ld\n",bootmap_size); - bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); - e820_bootmem_free(&contig_page_data, 0, end_pfn << PAGE_SHIFT); - reserve_bootmem(bootmap, bootmap_size); + unsigned long bootmap_size, bootmap; + + memory_present(0, start_pfn, end_pfn); + bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; + bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); + if (bootmap == -1L) + panic("Cannot find bootmem map of size %ld\n",bootmap_size); + bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); + e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT); + reserve_bootmem(bootmap, bootmap_size); } #endif @@ -554,10 +559,10 @@ void __init setup_arch(char **cmdline_p) acpi_numa_init(); #endif -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA numa_initmem_init(0, end_pfn); #else - contig_initmem_init(); + contig_initmem_init(0, end_pfn); #endif /* Reserve direct mapping */ @@ -618,6 +623,8 @@ void __init setup_arch(char **cmdline_p) } } #endif + + sparse_init(); paging_init(); check_ioapic(); diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 429c0269dc4..98b7ba95d58 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -34,9 +34,9 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -void ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs); -void ia32_setup_frame(int sig, struct k_sigaction *ka, +int ia32_setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs); asmlinkage long @@ -238,7 +238,7 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) return (void __user *)round_down(rsp - size, 16); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs * regs) { struct rt_sigframe __user *frame; @@ -327,20 +327,23 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->rip, frame->pretcode); #endif - return; + return 1; give_sigsegv: force_sigsegv(sig, current); + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { + int ret; + #ifdef DEBUG_SIG printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, @@ -384,20 +387,22 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, #ifdef CONFIG_IA32_EMULATION if (test_thread_flag(TIF_IA32)) { if (ka->sa.sa_flags & SA_SIGINFO) - ia32_setup_rt_frame(sig, ka, info, oldset, regs); + ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else - ia32_setup_frame(sig, ka, oldset, regs); + ret = ia32_setup_frame(sig, ka, oldset, regs); } else #endif - setup_rt_frame(sig, ka, info, oldset, regs); + ret = setup_rt_frame(sig, ka, info, oldset, regs); - if (!(ka->sa.sa_flags & SA_NODEFER)) { + if (ret && !(ka->sa.sa_flags & SA_NODEFER)) { spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); } + + return ret; } /* @@ -417,7 +422,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * kernel mode. Just return without doing anything * if so. */ - if ((regs->cs & 3) != 3) + if (!user_mode(regs)) return 1; if (try_to_freeze(0)) @@ -434,11 +439,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * inside the kernel. */ if (current->thread.debugreg7) - asm volatile("movq %0,%%db7" : : "r" (current->thread.debugreg7)); + set_debugreg(current->thread.debugreg7, 7); /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, oldset, regs); - return 1; + return handle_signal(signr, &info, &ka, oldset, regs); } no_signal: diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fb8c809b4cd..66bf6ddeb0c 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -64,6 +64,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ +static int hpet_use_timer; unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -105,7 +106,9 @@ static inline unsigned int do_gettimeoffset_tsc(void) static inline unsigned int do_gettimeoffset_hpet(void) { - return ((hpet_readl(HPET_COUNTER) - vxtime.last) * vxtime.quot) >> 32; + /* cap counter read to one tick to avoid inconsistencies */ + unsigned long counter = hpet_readl(HPET_COUNTER) - vxtime.last; + return (min(counter,hpet_tick) * vxtime.quot) >> 32; } unsigned int (*do_gettimeoffset)(void) = do_gettimeoffset_tsc; @@ -301,7 +304,7 @@ unsigned long long monotonic_clock(void) last_offset = vxtime.last; base = monotonic_base; - this_offset = hpet_readl(HPET_T0_CMP) - hpet_tick; + this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); @@ -377,7 +380,14 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) write_seqlock(&xtime_lock); - if (vxtime.hpet_address) { + if (vxtime.hpet_address) + offset = hpet_readl(HPET_COUNTER); + + if (hpet_use_timer) { + /* if we're using the hpet timer functionality, + * we can more accurately know the counter value + * when the timer interrupt occured. + */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; } else { @@ -803,17 +813,18 @@ static int hpet_timer_stop_set_go(unsigned long tick) * Set up timer 0, as periodic with first interrupt to happen at hpet_tick, * and period also hpet_tick. */ - - hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | + if (hpet_use_timer) { + hpet_writel(HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT, HPET_T0_CFG); - hpet_writel(hpet_tick, HPET_T0_CMP); - hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ - + hpet_writel(hpet_tick, HPET_T0_CMP); + hpet_writel(hpet_tick, HPET_T0_CMP); /* AK: why twice? */ + cfg |= HPET_CFG_LEGACY; + } /* * Go! */ - cfg |= HPET_CFG_ENABLE | HPET_CFG_LEGACY; + cfg |= HPET_CFG_ENABLE; hpet_writel(cfg, HPET_CFG); return 0; @@ -834,8 +845,7 @@ static int hpet_init(void) id = hpet_readl(HPET_ID); - if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER) || - !(id & HPET_ID_LEGSUP)) + if (!(id & HPET_ID_VENDOR) || !(id & HPET_ID_NUMBER)) return -1; hpet_period = hpet_readl(HPET_PERIOD); @@ -845,6 +855,8 @@ static int hpet_init(void) hpet_tick = (1000000000L * (USEC_PER_SEC / HZ) + hpet_period / 2) / hpet_period; + hpet_use_timer = (id & HPET_ID_LEGSUP); + return hpet_timer_stop_set_go(hpet_tick); } @@ -901,9 +913,11 @@ void __init time_init(void) set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); - if (!hpet_init()) { + if (!hpet_init()) vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; + + if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER @@ -968,7 +982,7 @@ void __init time_init_gtod(void) if (unsynchronized_tsc()) notsc = 1; if (vxtime.hpet_address && notsc) { - timetype = "HPET"; + timetype = hpet_use_timer ? "HPET" : "PIT/HPET"; vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; vxtime.mode = VXTIME_HPET; do_gettimeoffset = do_gettimeoffset_hpet; @@ -983,7 +997,7 @@ void __init time_init_gtod(void) printk(KERN_INFO "Disabling vsyscall due to use of PM timer\n"); #endif } else { - timetype = vxtime.hpet_address ? "HPET/TSC" : "PIT/TSC"; + timetype = hpet_use_timer ? "HPET/TSC" : "PIT/TSC"; vxtime.mode = VXTIME_TSC; } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 3dfec8fdabc..121646fc43f 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -274,7 +274,7 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = (regs->cs & 3) == 0; + int in_kernel = !user_mode(regs); unsigned long rsp; const int cpu = safe_smp_processor_id(); struct task_struct *cur = cpu_pda[cpu].pcurrent; @@ -318,7 +318,7 @@ void handle_BUG(struct pt_regs *regs) struct bug_frame f; char tmp; - if (regs->cs & 3) + if (user_mode(regs)) return; if (__copy_from_user(&f, (struct bug_frame *) regs->rip, sizeof(struct bug_frame))) @@ -437,7 +437,7 @@ static void do_trap(int trapnr, int signr, char *str, } #endif - if ((regs->cs & 3) != 0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, signr)) @@ -522,7 +522,7 @@ asmlinkage void do_general_protection(struct pt_regs * regs, long error_code) } #endif - if ((regs->cs & 3)!=0) { + if (user_mode(regs)) { struct task_struct *tsk = current; if (exception_trace && unhandled_signal(tsk, SIGSEGV)) @@ -638,7 +638,7 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) if (eregs == (struct pt_regs *)eregs->rsp) ; /* Exception from user space */ - else if (eregs->cs & 3) + else if (user_mode(eregs)) regs = ((struct pt_regs *)current->thread.rsp0) - 1; /* Exception from kernel and interrupts are enabled. Move to kernel process stack. */ @@ -669,7 +669,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) } #endif - asm("movq %%db6,%0" : "=r" (condition)); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, SIGTRAP) == NOTIFY_STOP) @@ -697,7 +697,7 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) * allowing programs to debug themselves without the ptrace() * interface. */ - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_TF_reenable; /* * Was the TF flag set by a debugger? If so, clear it now, @@ -715,13 +715,13 @@ asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code) info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; - if ((regs->cs & 3) == 0) + if (!user_mode(regs)) goto clear_dr7; info.si_addr = (void __user *)regs->rip; force_sig_info(SIGTRAP, &info, tsk); clear_dr7: - asm volatile("movq %0,%%db7"::"r"(0UL)); + set_debugreg(0UL, 7); return; clear_TF_reenable: @@ -756,7 +756,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) unsigned short cwd, swd; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel x87 math error")) return; @@ -822,7 +822,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) unsigned short mxcsr; conditional_sti(regs); - if ((regs->cs & 3) == 0 && + if (!user_mode(regs) && kernel_math_error(regs, "kernel simd math error")) return; diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index aed61a668a1..33a873a3c22 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/delay.h> #include <asm/delay.h> +#include <asm/msr.h> #ifdef CONFIG_SMP #include <asm/smp.h> @@ -19,6 +20,12 @@ int x86_udelay_tsc = 0; /* Delay via TSC */ +int read_current_timer(unsigned long *timer_value) +{ + rdtscll(*timer_value); + return 0; +} + void __delay(unsigned long loops) { unsigned bclock, now; diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 66c354ad80c..1d232a87f11 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -4,7 +4,7 @@ obj-y := init.o fault.o ioremap.o extable.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_DISCONTIGMEM) += numa.o +obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o obj-$(CONFIG_ACPI_NUMA) += srat.o diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 57d3ab15a5c..2f187986f94 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -74,7 +74,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, instr = (unsigned char *)convert_rip_to_linear(current, regs); max_instr = instr + 15; - if ((regs->cs & 3) != 0 && instr >= (unsigned char *)TASK_SIZE) + if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE) return 0; while (scan_more && instr < max_instr) { @@ -106,7 +106,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, /* Could check the LDT for lm, but for now it's good enough to assume that long mode only uses well known segments or kernel. */ - scan_more = ((regs->cs & 3) == 0) || (regs->cs == __USER_CS); + scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS); break; case 0x60: diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index dbe53b4c7e6..72e4b364ed7 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -318,7 +318,7 @@ void zap_low_mappings(void) flush_tlb_all(); } -#ifndef CONFIG_DISCONTIGMEM +#ifndef CONFIG_NUMA void __init paging_init(void) { { @@ -427,13 +427,16 @@ void __init mem_init(void) reservedpages = 0; /* this will put all low memory onto the freelists */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA totalram_pages += numa_free_all_bootmem(); tmp = 0; /* should count reserved pages here for all nodes */ #else + +#ifdef CONFIG_FLATMEM max_mapnr = end_pfn; if (!mem_map) BUG(); +#endif totalram_pages += free_all_bootmem(); @@ -515,7 +518,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) void __init reserve_bootmem_generic(unsigned long phys, unsigned len) { /* Should check here against the e820 map to avoid double free */ -#ifdef CONFIG_DISCONTIGMEM +#ifdef CONFIG_NUMA int nid = phys_to_nid(phys); reserve_bootmem_node(NODE_DATA(nid), phys, len); #else diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 58aac23760e..6972df480d2 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -178,7 +178,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l if (phys_addr >= ISA_START_ADDRESS && last_addr < ISA_END_ADDRESS) return (__force void __iomem *)phys_to_virt(phys_addr); -#ifndef CONFIG_DISCONTIGMEM +#ifdef CONFIG_FLATMEM /* * Don't allow anybody to remap normal RAM that we're using.. */ diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index fd9f25d7a6c..84cde796ecb 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -66,6 +66,13 @@ int __init compute_hash_shift(struct node *nodes, int numnodes) return -1; } +#ifdef CONFIG_SPARSEMEM +int early_pfn_to_nid(unsigned long pfn) +{ + return phys_to_nid(pfn << PAGE_SHIFT); +} +#endif + /* Initialize bootmem allocator for a node */ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) { @@ -80,6 +87,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en start_pfn = start >> PAGE_SHIFT; end_pfn = end >> PAGE_SHIFT; + memory_present(nodeid, start_pfn, end_pfn); nodedata_phys = find_e820_area(start, end, pgdat_size); if (nodedata_phys == -1L) panic("Cannot find memory pgdat in node %d\n", nodeid); diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 62349c78db5..7e7d0c2a002 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -53,25 +53,11 @@ fill_mp_bus_to_cpumask(void) for (j = SECONDARY_LDT_BUS_NUMBER(ldtbus); j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus); j++) - pci_bus_to_cpumask[j] = - node_to_cpumask(NODE_ID(nid)); + pci_bus_to_node[j] = NODE_ID(nid); } } } - /* quick sanity check */ - printed = 0; - for (i = 0; i < 256; i++) { - if (cpus_empty(pci_bus_to_cpumask[i])) { - pci_bus_to_cpumask[i] = CPU_MASK_ALL; - if (printed) - continue; - printk(KERN_ERR - "k8-bus.c: some busses have empty cpu mask\n"); - printed = 1; - } - } - return 0; } |