From dae2cdf3e25bf1c63f8012ae19c133e3b3b187ca Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Mon, 28 Jul 2008 21:14:50 -0400 Subject: parisc: add arch/parisc/kernel/.gitignore --- arch/parisc/kernel/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/parisc/kernel/.gitignore (limited to 'arch') diff --git a/arch/parisc/kernel/.gitignore b/arch/parisc/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/parisc/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds -- cgit v1.2.3 From 6c86cb8237bf08443806089130dc108051569a93 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Mon, 28 Jul 2008 22:52:18 -0400 Subject: parisc: move pdc_result to real2.S --- arch/parisc/kernel/asm-offsets.c | 3 +++ arch/parisc/kernel/firmware.c | 4 ++-- arch/parisc/kernel/real2.S | 12 ++++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 3efc0b73e4f..699cf8ef211 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -290,5 +290,8 @@ int main(void) DEFINE(EXCDATA_IP, offsetof(struct exception_data, fault_ip)); DEFINE(EXCDATA_SPACE, offsetof(struct exception_data, fault_space)); DEFINE(EXCDATA_ADDR, offsetof(struct exception_data, fault_addr)); + BLANK(); + DEFINE(ASM_PDC_RESULT_SIZE, NUM_PDC_RESULT * sizeof(unsigned long)); + BLANK(); return 0; } diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 7177a6cd1b7..99a9e505edf 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -71,8 +71,8 @@ #include /* for boot_cpu_data */ static DEFINE_SPINLOCK(pdc_lock); -static unsigned long pdc_result[32] __attribute__ ((aligned (8))); -static unsigned long pdc_result2[32] __attribute__ ((aligned (8))); +extern unsigned long pdc_result[NUM_PDC_RESULT]; +extern unsigned long pdc_result2[NUM_PDC_RESULT]; #ifdef CONFIG_64BIT #define WIDE_FIRMWARE 0x1 diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S index 7a92695d95a..5f3d3a1f903 100644 --- a/arch/parisc/kernel/real2.S +++ b/arch/parisc/kernel/real2.S @@ -8,12 +8,24 @@ * */ +#include #include #include +#include #include + .section .bss + + .export pdc_result + .export pdc_result2 + .align 8 +pdc_result: + .block ASM_PDC_RESULT_SIZE +pdc_result2: + .block ASM_PDC_RESULT_SIZE + .export real_stack .export real32_stack .export real64_stack -- cgit v1.2.3 From deae26bf6a10e47983606f5df080b91e97650ead Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Mon, 28 Jul 2008 23:02:13 -0400 Subject: parisc: move include/asm-parisc to arch/parisc/include/asm --- arch/parisc/include/asm/Kbuild | 3 + arch/parisc/include/asm/a.out.h | 20 + arch/parisc/include/asm/agp.h | 24 + arch/parisc/include/asm/asmregs.h | 183 +++++ arch/parisc/include/asm/assembly.h | 519 ++++++++++++++ arch/parisc/include/asm/atomic.h | 348 ++++++++++ arch/parisc/include/asm/auxvec.h | 4 + arch/parisc/include/asm/bitops.h | 239 +++++++ arch/parisc/include/asm/bug.h | 92 +++ arch/parisc/include/asm/bugs.h | 19 + arch/parisc/include/asm/byteorder.h | 82 +++ arch/parisc/include/asm/cache.h | 60 ++ arch/parisc/include/asm/cacheflush.h | 121 ++++ arch/parisc/include/asm/checksum.h | 210 ++++++ arch/parisc/include/asm/compat.h | 165 +++++ arch/parisc/include/asm/compat_rt_sigframe.h | 50 ++ arch/parisc/include/asm/compat_signal.h | 2 + arch/parisc/include/asm/compat_ucontext.h | 17 + arch/parisc/include/asm/cputime.h | 6 + arch/parisc/include/asm/current.h | 15 + arch/parisc/include/asm/delay.h | 43 ++ arch/parisc/include/asm/device.h | 7 + arch/parisc/include/asm/div64.h | 1 + arch/parisc/include/asm/dma-mapping.h | 253 +++++++ arch/parisc/include/asm/dma.h | 186 +++++ arch/parisc/include/asm/eisa_bus.h | 23 + arch/parisc/include/asm/eisa_eeprom.h | 153 +++++ arch/parisc/include/asm/elf.h | 342 +++++++++ arch/parisc/include/asm/emergency-restart.h | 6 + arch/parisc/include/asm/errno.h | 124 ++++ arch/parisc/include/asm/fb.h | 19 + arch/parisc/include/asm/fcntl.h | 39 ++ arch/parisc/include/asm/fixmap.h | 30 + arch/parisc/include/asm/floppy.h | 271 ++++++++ arch/parisc/include/asm/futex.h | 77 +++ arch/parisc/include/asm/grfioctl.h | 113 +++ arch/parisc/include/asm/hardirq.h | 29 + arch/parisc/include/asm/hardware.h | 127 ++++ arch/parisc/include/asm/hw_irq.h | 8 + arch/parisc/include/asm/ide.h | 61 ++ arch/parisc/include/asm/io.h | 293 ++++++++ arch/parisc/include/asm/ioctl.h | 44 ++ arch/parisc/include/asm/ioctls.h | 90 +++ arch/parisc/include/asm/ipcbuf.h | 27 + arch/parisc/include/asm/irq.h | 57 ++ arch/parisc/include/asm/irq_regs.h | 1 + arch/parisc/include/asm/kdebug.h | 1 + arch/parisc/include/asm/kmap_types.h | 30 + arch/parisc/include/asm/led.h | 42 ++ arch/parisc/include/asm/linkage.h | 31 + arch/parisc/include/asm/local.h | 1 + arch/parisc/include/asm/machdep.h | 16 + arch/parisc/include/asm/mc146818rtc.h | 9 + arch/parisc/include/asm/mckinley.h | 9 + arch/parisc/include/asm/mman.h | 61 ++ arch/parisc/include/asm/mmu.h | 7 + arch/parisc/include/asm/mmu_context.h | 75 ++ arch/parisc/include/asm/mmzone.h | 73 ++ arch/parisc/include/asm/module.h | 32 + arch/parisc/include/asm/msgbuf.h | 37 + arch/parisc/include/asm/mutex.h | 9 + arch/parisc/include/asm/page.h | 173 +++++ arch/parisc/include/asm/param.h | 22 + arch/parisc/include/asm/parisc-device.h | 64 ++ arch/parisc/include/asm/parport.h | 18 + arch/parisc/include/asm/pci.h | 294 ++++++++ arch/parisc/include/asm/pdc.h | 760 ++++++++++++++++++++ arch/parisc/include/asm/pdc_chassis.h | 381 ++++++++++ arch/parisc/include/asm/pdcpat.h | 308 +++++++++ arch/parisc/include/asm/percpu.h | 7 + arch/parisc/include/asm/perf.h | 74 ++ arch/parisc/include/asm/pgalloc.h | 149 ++++ arch/parisc/include/asm/pgtable.h | 508 ++++++++++++++ arch/parisc/include/asm/poll.h | 1 + arch/parisc/include/asm/posix_types.h | 129 ++++ arch/parisc/include/asm/prefetch.h | 39 ++ arch/parisc/include/asm/processor.h | 357 ++++++++++ arch/parisc/include/asm/psw.h | 62 ++ arch/parisc/include/asm/ptrace.h | 58 ++ arch/parisc/include/asm/real.h | 5 + arch/parisc/include/asm/resource.h | 7 + arch/parisc/include/asm/ropes.h | 322 +++++++++ arch/parisc/include/asm/rt_sigframe.h | 23 + arch/parisc/include/asm/rtc.h | 131 ++++ arch/parisc/include/asm/runway.h | 12 + arch/parisc/include/asm/scatterlist.h | 27 + arch/parisc/include/asm/sections.h | 12 + arch/parisc/include/asm/segment.h | 6 + arch/parisc/include/asm/sembuf.h | 29 + arch/parisc/include/asm/serial.h | 10 + arch/parisc/include/asm/setup.h | 6 + arch/parisc/include/asm/shmbuf.h | 58 ++ arch/parisc/include/asm/shmparam.h | 8 + arch/parisc/include/asm/sigcontext.h | 20 + arch/parisc/include/asm/siginfo.h | 14 + arch/parisc/include/asm/signal.h | 153 +++++ arch/parisc/include/asm/smp.h | 68 ++ arch/parisc/include/asm/socket.h | 62 ++ arch/parisc/include/asm/sockios.h | 13 + arch/parisc/include/asm/spinlock.h | 194 ++++++ arch/parisc/include/asm/spinlock_types.h | 21 + arch/parisc/include/asm/stat.h | 100 +++ arch/parisc/include/asm/statfs.h | 58 ++ arch/parisc/include/asm/string.h | 10 + arch/parisc/include/asm/superio.h | 85 +++ arch/parisc/include/asm/system.h | 182 +++++ arch/parisc/include/asm/termbits.h | 200 ++++++ arch/parisc/include/asm/termios.h | 90 +++ arch/parisc/include/asm/thread_info.h | 74 ++ arch/parisc/include/asm/timex.h | 20 + arch/parisc/include/asm/tlb.h | 27 + arch/parisc/include/asm/tlbflush.h | 80 +++ arch/parisc/include/asm/topology.h | 6 + arch/parisc/include/asm/traps.h | 16 + arch/parisc/include/asm/types.h | 36 + arch/parisc/include/asm/uaccess.h | 244 +++++++ arch/parisc/include/asm/ucontext.h | 12 + arch/parisc/include/asm/unaligned.h | 16 + arch/parisc/include/asm/unistd.h | 991 +++++++++++++++++++++++++++ arch/parisc/include/asm/unwind.h | 77 +++ arch/parisc/include/asm/user.h | 5 + arch/parisc/include/asm/vga.h | 6 + arch/parisc/include/asm/xor.h | 1 + 123 files changed, 11977 insertions(+) create mode 100644 arch/parisc/include/asm/Kbuild create mode 100644 arch/parisc/include/asm/a.out.h create mode 100644 arch/parisc/include/asm/agp.h create mode 100644 arch/parisc/include/asm/asmregs.h create mode 100644 arch/parisc/include/asm/assembly.h create mode 100644 arch/parisc/include/asm/atomic.h create mode 100644 arch/parisc/include/asm/auxvec.h create mode 100644 arch/parisc/include/asm/bitops.h create mode 100644 arch/parisc/include/asm/bug.h create mode 100644 arch/parisc/include/asm/bugs.h create mode 100644 arch/parisc/include/asm/byteorder.h create mode 100644 arch/parisc/include/asm/cache.h create mode 100644 arch/parisc/include/asm/cacheflush.h create mode 100644 arch/parisc/include/asm/checksum.h create mode 100644 arch/parisc/include/asm/compat.h create mode 100644 arch/parisc/include/asm/compat_rt_sigframe.h create mode 100644 arch/parisc/include/asm/compat_signal.h create mode 100644 arch/parisc/include/asm/compat_ucontext.h create mode 100644 arch/parisc/include/asm/cputime.h create mode 100644 arch/parisc/include/asm/current.h create mode 100644 arch/parisc/include/asm/delay.h create mode 100644 arch/parisc/include/asm/device.h create mode 100644 arch/parisc/include/asm/div64.h create mode 100644 arch/parisc/include/asm/dma-mapping.h create mode 100644 arch/parisc/include/asm/dma.h create mode 100644 arch/parisc/include/asm/eisa_bus.h create mode 100644 arch/parisc/include/asm/eisa_eeprom.h create mode 100644 arch/parisc/include/asm/elf.h create mode 100644 arch/parisc/include/asm/emergency-restart.h create mode 100644 arch/parisc/include/asm/errno.h create mode 100644 arch/parisc/include/asm/fb.h create mode 100644 arch/parisc/include/asm/fcntl.h create mode 100644 arch/parisc/include/asm/fixmap.h create mode 100644 arch/parisc/include/asm/floppy.h create mode 100644 arch/parisc/include/asm/futex.h create mode 100644 arch/parisc/include/asm/grfioctl.h create mode 100644 arch/parisc/include/asm/hardirq.h create mode 100644 arch/parisc/include/asm/hardware.h create mode 100644 arch/parisc/include/asm/hw_irq.h create mode 100644 arch/parisc/include/asm/ide.h create mode 100644 arch/parisc/include/asm/io.h create mode 100644 arch/parisc/include/asm/ioctl.h create mode 100644 arch/parisc/include/asm/ioctls.h create mode 100644 arch/parisc/include/asm/ipcbuf.h create mode 100644 arch/parisc/include/asm/irq.h create mode 100644 arch/parisc/include/asm/irq_regs.h create mode 100644 arch/parisc/include/asm/kdebug.h create mode 100644 arch/parisc/include/asm/kmap_types.h create mode 100644 arch/parisc/include/asm/led.h create mode 100644 arch/parisc/include/asm/linkage.h create mode 100644 arch/parisc/include/asm/local.h create mode 100644 arch/parisc/include/asm/machdep.h create mode 100644 arch/parisc/include/asm/mc146818rtc.h create mode 100644 arch/parisc/include/asm/mckinley.h create mode 100644 arch/parisc/include/asm/mman.h create mode 100644 arch/parisc/include/asm/mmu.h create mode 100644 arch/parisc/include/asm/mmu_context.h create mode 100644 arch/parisc/include/asm/mmzone.h create mode 100644 arch/parisc/include/asm/module.h create mode 100644 arch/parisc/include/asm/msgbuf.h create mode 100644 arch/parisc/include/asm/mutex.h create mode 100644 arch/parisc/include/asm/page.h create mode 100644 arch/parisc/include/asm/param.h create mode 100644 arch/parisc/include/asm/parisc-device.h create mode 100644 arch/parisc/include/asm/parport.h create mode 100644 arch/parisc/include/asm/pci.h create mode 100644 arch/parisc/include/asm/pdc.h create mode 100644 arch/parisc/include/asm/pdc_chassis.h create mode 100644 arch/parisc/include/asm/pdcpat.h create mode 100644 arch/parisc/include/asm/percpu.h create mode 100644 arch/parisc/include/asm/perf.h create mode 100644 arch/parisc/include/asm/pgalloc.h create mode 100644 arch/parisc/include/asm/pgtable.h create mode 100644 arch/parisc/include/asm/poll.h create mode 100644 arch/parisc/include/asm/posix_types.h create mode 100644 arch/parisc/include/asm/prefetch.h create mode 100644 arch/parisc/include/asm/processor.h create mode 100644 arch/parisc/include/asm/psw.h create mode 100644 arch/parisc/include/asm/ptrace.h create mode 100644 arch/parisc/include/asm/real.h create mode 100644 arch/parisc/include/asm/resource.h create mode 100644 arch/parisc/include/asm/ropes.h create mode 100644 arch/parisc/include/asm/rt_sigframe.h create mode 100644 arch/parisc/include/asm/rtc.h create mode 100644 arch/parisc/include/asm/runway.h create mode 100644 arch/parisc/include/asm/scatterlist.h create mode 100644 arch/parisc/include/asm/sections.h create mode 100644 arch/parisc/include/asm/segment.h create mode 100644 arch/parisc/include/asm/sembuf.h create mode 100644 arch/parisc/include/asm/serial.h create mode 100644 arch/parisc/include/asm/setup.h create mode 100644 arch/parisc/include/asm/shmbuf.h create mode 100644 arch/parisc/include/asm/shmparam.h create mode 100644 arch/parisc/include/asm/sigcontext.h create mode 100644 arch/parisc/include/asm/siginfo.h create mode 100644 arch/parisc/include/asm/signal.h create mode 100644 arch/parisc/include/asm/smp.h create mode 100644 arch/parisc/include/asm/socket.h create mode 100644 arch/parisc/include/asm/sockios.h create mode 100644 arch/parisc/include/asm/spinlock.h create mode 100644 arch/parisc/include/asm/spinlock_types.h create mode 100644 arch/parisc/include/asm/stat.h create mode 100644 arch/parisc/include/asm/statfs.h create mode 100644 arch/parisc/include/asm/string.h create mode 100644 arch/parisc/include/asm/superio.h create mode 100644 arch/parisc/include/asm/system.h create mode 100644 arch/parisc/include/asm/termbits.h create mode 100644 arch/parisc/include/asm/termios.h create mode 100644 arch/parisc/include/asm/thread_info.h create mode 100644 arch/parisc/include/asm/timex.h create mode 100644 arch/parisc/include/asm/tlb.h create mode 100644 arch/parisc/include/asm/tlbflush.h create mode 100644 arch/parisc/include/asm/topology.h create mode 100644 arch/parisc/include/asm/traps.h create mode 100644 arch/parisc/include/asm/types.h create mode 100644 arch/parisc/include/asm/uaccess.h create mode 100644 arch/parisc/include/asm/ucontext.h create mode 100644 arch/parisc/include/asm/unaligned.h create mode 100644 arch/parisc/include/asm/unistd.h create mode 100644 arch/parisc/include/asm/unwind.h create mode 100644 arch/parisc/include/asm/user.h create mode 100644 arch/parisc/include/asm/vga.h create mode 100644 arch/parisc/include/asm/xor.h (limited to 'arch') diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild new file mode 100644 index 00000000000..f88b252e419 --- /dev/null +++ b/arch/parisc/include/asm/Kbuild @@ -0,0 +1,3 @@ +include include/asm-generic/Kbuild.asm + +unifdef-y += pdc.h diff --git a/arch/parisc/include/asm/a.out.h b/arch/parisc/include/asm/a.out.h new file mode 100644 index 00000000000..eb04e34c5bb --- /dev/null +++ b/arch/parisc/include/asm/a.out.h @@ -0,0 +1,20 @@ +#ifndef __PARISC_A_OUT_H__ +#define __PARISC_A_OUT_H__ + +struct exec +{ + unsigned int a_info; /* Use macros N_MAGIC, etc for access */ + unsigned a_text; /* length of text, in bytes */ + unsigned a_data; /* length of data, in bytes */ + unsigned a_bss; /* length of uninitialized data area for file, in bytes */ + unsigned a_syms; /* length of symbol table data in file, in bytes */ + unsigned a_entry; /* start address */ + unsigned a_trsize; /* length of relocation info for text, in bytes */ + unsigned a_drsize; /* length of relocation info for data, in bytes */ +}; + +#define N_TRSIZE(a) ((a).a_trsize) +#define N_DRSIZE(a) ((a).a_drsize) +#define N_SYMSIZE(a) ((a).a_syms) + +#endif /* __A_OUT_GNU_H__ */ diff --git a/arch/parisc/include/asm/agp.h b/arch/parisc/include/asm/agp.h new file mode 100644 index 00000000000..9651660da63 --- /dev/null +++ b/arch/parisc/include/asm/agp.h @@ -0,0 +1,24 @@ +#ifndef _ASM_PARISC_AGP_H +#define _ASM_PARISC_AGP_H + +/* + * PARISC specific AGP definitions. + * Copyright (c) 2006 Kyle McMartin + * + */ + +#define map_page_into_agp(page) /* nothing */ +#define unmap_page_from_agp(page) /* nothing */ +#define flush_agp_cache() mb() + +/* Convert a physical address to an address suitable for the GART. */ +#define phys_to_gart(x) (x) +#define gart_to_phys(x) (x) + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif /* _ASM_PARISC_AGP_H */ diff --git a/arch/parisc/include/asm/asmregs.h b/arch/parisc/include/asm/asmregs.h new file mode 100644 index 00000000000..d93c646e188 --- /dev/null +++ b/arch/parisc/include/asm/asmregs.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 1999 Hewlett-Packard (Frank Rowand) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _PARISC_ASMREGS_H +#define _PARISC_ASMREGS_H + +;! General Registers + +rp: .reg %r2 +arg3: .reg %r23 +arg2: .reg %r24 +arg1: .reg %r25 +arg0: .reg %r26 +dp: .reg %r27 +ret0: .reg %r28 +ret1: .reg %r29 +sl: .reg %r29 +sp: .reg %r30 + +#if 0 +/* PA20_REVISIT */ +arg7: .reg r19 +arg6: .reg r20 +arg5: .reg r21 +arg4: .reg r22 +gp: .reg r27 +ap: .reg r29 +#endif + + +r0: .reg %r0 +r1: .reg %r1 +r2: .reg %r2 +r3: .reg %r3 +r4: .reg %r4 +r5: .reg %r5 +r6: .reg %r6 +r7: .reg %r7 +r8: .reg %r8 +r9: .reg %r9 +r10: .reg %r10 +r11: .reg %r11 +r12: .reg %r12 +r13: .reg %r13 +r14: .reg %r14 +r15: .reg %r15 +r16: .reg %r16 +r17: .reg %r17 +r18: .reg %r18 +r19: .reg %r19 +r20: .reg %r20 +r21: .reg %r21 +r22: .reg %r22 +r23: .reg %r23 +r24: .reg %r24 +r25: .reg %r25 +r26: .reg %r26 +r27: .reg %r27 +r28: .reg %r28 +r29: .reg %r29 +r30: .reg %r30 +r31: .reg %r31 + + +;! Space Registers + +sr0: .reg %sr0 +sr1: .reg %sr1 +sr2: .reg %sr2 +sr3: .reg %sr3 +sr4: .reg %sr4 +sr5: .reg %sr5 +sr6: .reg %sr6 +sr7: .reg %sr7 + + +;! Floating Point Registers + +fr0: .reg %fr0 +fr1: .reg %fr1 +fr2: .reg %fr2 +fr3: .reg %fr3 +fr4: .reg %fr4 +fr5: .reg %fr5 +fr6: .reg %fr6 +fr7: .reg %fr7 +fr8: .reg %fr8 +fr9: .reg %fr9 +fr10: .reg %fr10 +fr11: .reg %fr11 +fr12: .reg %fr12 +fr13: .reg %fr13 +fr14: .reg %fr14 +fr15: .reg %fr15 +fr16: .reg %fr16 +fr17: .reg %fr17 +fr18: .reg %fr18 +fr19: .reg %fr19 +fr20: .reg %fr20 +fr21: .reg %fr21 +fr22: .reg %fr22 +fr23: .reg %fr23 +fr24: .reg %fr24 +fr25: .reg %fr25 +fr26: .reg %fr26 +fr27: .reg %fr27 +fr28: .reg %fr28 +fr29: .reg %fr29 +fr30: .reg %fr30 +fr31: .reg %fr31 + + +;! Control Registers + +rctr: .reg %cr0 +pidr1: .reg %cr8 +pidr2: .reg %cr9 +ccr: .reg %cr10 +sar: .reg %cr11 +pidr3: .reg %cr12 +pidr4: .reg %cr13 +iva: .reg %cr14 +eiem: .reg %cr15 +itmr: .reg %cr16 +pcsq: .reg %cr17 +pcoq: .reg %cr18 +iir: .reg %cr19 +isr: .reg %cr20 +ior: .reg %cr21 +ipsw: .reg %cr22 +eirr: .reg %cr23 +tr0: .reg %cr24 +tr1: .reg %cr25 +tr2: .reg %cr26 +tr3: .reg %cr27 +tr4: .reg %cr28 +tr5: .reg %cr29 +tr6: .reg %cr30 +tr7: .reg %cr31 + + +cr0: .reg %cr0 +cr8: .reg %cr8 +cr9: .reg %cr9 +cr10: .reg %cr10 +cr11: .reg %cr11 +cr12: .reg %cr12 +cr13: .reg %cr13 +cr14: .reg %cr14 +cr15: .reg %cr15 +cr16: .reg %cr16 +cr17: .reg %cr17 +cr18: .reg %cr18 +cr19: .reg %cr19 +cr20: .reg %cr20 +cr21: .reg %cr21 +cr22: .reg %cr22 +cr23: .reg %cr23 +cr24: .reg %cr24 +cr25: .reg %cr25 +cr26: .reg %cr26 +cr27: .reg %cr27 +cr28: .reg %cr28 +cr29: .reg %cr29 +cr30: .reg %cr30 +cr31: .reg %cr31 + +#endif diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h new file mode 100644 index 00000000000..ffb208840ec --- /dev/null +++ b/arch/parisc/include/asm/assembly.h @@ -0,0 +1,519 @@ +/* + * Copyright (C) 1999 Hewlett-Packard (Frank Rowand) + * Copyright (C) 1999 Philipp Rumpf + * Copyright (C) 1999 SuSE GmbH + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _PARISC_ASSEMBLY_H +#define _PARISC_ASSEMBLY_H + +#define CALLEE_FLOAT_FRAME_SIZE 80 + +#ifdef CONFIG_64BIT +#define LDREG ldd +#define STREG std +#define LDREGX ldd,s +#define LDREGM ldd,mb +#define STREGM std,ma +#define SHRREG shrd +#define SHLREG shld +#define ANDCM andcm,* +#define COND(x) * ## x +#define RP_OFFSET 16 +#define FRAME_SIZE 128 +#define CALLEE_REG_FRAME_SIZE 144 +#define ASM_ULONG_INSN .dword +#else /* CONFIG_64BIT */ +#define LDREG ldw +#define STREG stw +#define LDREGX ldwx,s +#define LDREGM ldwm +#define STREGM stwm +#define SHRREG shr +#define SHLREG shlw +#define ANDCM andcm +#define COND(x) x +#define RP_OFFSET 20 +#define FRAME_SIZE 64 +#define CALLEE_REG_FRAME_SIZE 128 +#define ASM_ULONG_INSN .word +#endif + +#define CALLEE_SAVE_FRAME_SIZE (CALLEE_REG_FRAME_SIZE + CALLEE_FLOAT_FRAME_SIZE) + +#ifdef CONFIG_PA20 +#define LDCW ldcw,co +#define BL b,l +# ifdef CONFIG_64BIT +# define LEVEL 2.0w +# else +# define LEVEL 2.0 +# endif +#else +#define LDCW ldcw +#define BL bl +#define LEVEL 1.1 +#endif + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_64BIT +/* the 64-bit pa gnu assembler unfortunately defaults to .level 1.1 or 2.0 so + * work around that for now... */ + .level 2.0w +#endif + +#include +#include + +#include + + sp = 30 + gp = 27 + ipsw = 22 + + /* + * We provide two versions of each macro to convert from physical + * to virtual and vice versa. The "_r1" versions take one argument + * register, but trashes r1 to do the conversion. The other + * version takes two arguments: a src and destination register. + * However, the source and destination registers can not be + * the same register. + */ + + .macro tophys grvirt, grphys + ldil L%(__PAGE_OFFSET), \grphys + sub \grvirt, \grphys, \grphys + .endm + + .macro tovirt grphys, grvirt + ldil L%(__PAGE_OFFSET), \grvirt + add \grphys, \grvirt, \grvirt + .endm + + .macro tophys_r1 gr + ldil L%(__PAGE_OFFSET), %r1 + sub \gr, %r1, \gr + .endm + + .macro tovirt_r1 gr + ldil L%(__PAGE_OFFSET), %r1 + add \gr, %r1, \gr + .endm + + .macro delay value + ldil L%\value, 1 + ldo R%\value(1), 1 + addib,UV,n -1,1,. + addib,NUV,n -1,1,.+8 + nop + .endm + + .macro debug value + .endm + + + /* Shift Left - note the r and t can NOT be the same! */ + .macro shl r, sa, t + dep,z \r, 31-\sa, 32-\sa, \t + .endm + + /* The PA 2.0 shift left */ + .macro shlw r, sa, t + depw,z \r, 31-\sa, 32-\sa, \t + .endm + + /* And the PA 2.0W shift left */ + .macro shld r, sa, t + depd,z \r, 63-\sa, 64-\sa, \t + .endm + + /* Shift Right - note the r and t can NOT be the same! */ + .macro shr r, sa, t + extru \r, 31-\sa, 32-\sa, \t + .endm + + /* pa20w version of shift right */ + .macro shrd r, sa, t + extrd,u \r, 63-\sa, 64-\sa, \t + .endm + + /* load 32-bit 'value' into 'reg' compensating for the ldil + * sign-extension when running in wide mode. + * WARNING!! neither 'value' nor 'reg' can be expressions + * containing '.'!!!! */ + .macro load32 value, reg + ldil L%\value, \reg + ldo R%\value(\reg), \reg + .endm + + .macro loadgp +#ifdef CONFIG_64BIT + ldil L%__gp, %r27 + ldo R%__gp(%r27), %r27 +#else + ldil L%$global$, %r27 + ldo R%$global$(%r27), %r27 +#endif + .endm + +#define SAVE_SP(r, where) mfsp r, %r1 ! STREG %r1, where +#define REST_SP(r, where) LDREG where, %r1 ! mtsp %r1, r +#define SAVE_CR(r, where) mfctl r, %r1 ! STREG %r1, where +#define REST_CR(r, where) LDREG where, %r1 ! mtctl %r1, r + + .macro save_general regs + STREG %r1, PT_GR1 (\regs) + STREG %r2, PT_GR2 (\regs) + STREG %r3, PT_GR3 (\regs) + STREG %r4, PT_GR4 (\regs) + STREG %r5, PT_GR5 (\regs) + STREG %r6, PT_GR6 (\regs) + STREG %r7, PT_GR7 (\regs) + STREG %r8, PT_GR8 (\regs) + STREG %r9, PT_GR9 (\regs) + STREG %r10, PT_GR10(\regs) + STREG %r11, PT_GR11(\regs) + STREG %r12, PT_GR12(\regs) + STREG %r13, PT_GR13(\regs) + STREG %r14, PT_GR14(\regs) + STREG %r15, PT_GR15(\regs) + STREG %r16, PT_GR16(\regs) + STREG %r17, PT_GR17(\regs) + STREG %r18, PT_GR18(\regs) + STREG %r19, PT_GR19(\regs) + STREG %r20, PT_GR20(\regs) + STREG %r21, PT_GR21(\regs) + STREG %r22, PT_GR22(\regs) + STREG %r23, PT_GR23(\regs) + STREG %r24, PT_GR24(\regs) + STREG %r25, PT_GR25(\regs) + /* r26 is saved in get_stack and used to preserve a value across virt_map */ + STREG %r27, PT_GR27(\regs) + STREG %r28, PT_GR28(\regs) + /* r29 is saved in get_stack and used to point to saved registers */ + /* r30 stack pointer saved in get_stack */ + STREG %r31, PT_GR31(\regs) + .endm + + .macro rest_general regs + /* r1 used as a temp in rest_stack and is restored there */ + LDREG PT_GR2 (\regs), %r2 + LDREG PT_GR3 (\regs), %r3 + LDREG PT_GR4 (\regs), %r4 + LDREG PT_GR5 (\regs), %r5 + LDREG PT_GR6 (\regs), %r6 + LDREG PT_GR7 (\regs), %r7 + LDREG PT_GR8 (\regs), %r8 + LDREG PT_GR9 (\regs), %r9 + LDREG PT_GR10(\regs), %r10 + LDREG PT_GR11(\regs), %r11 + LDREG PT_GR12(\regs), %r12 + LDREG PT_GR13(\regs), %r13 + LDREG PT_GR14(\regs), %r14 + LDREG PT_GR15(\regs), %r15 + LDREG PT_GR16(\regs), %r16 + LDREG PT_GR17(\regs), %r17 + LDREG PT_GR18(\regs), %r18 + LDREG PT_GR19(\regs), %r19 + LDREG PT_GR20(\regs), %r20 + LDREG PT_GR21(\regs), %r21 + LDREG PT_GR22(\regs), %r22 + LDREG PT_GR23(\regs), %r23 + LDREG PT_GR24(\regs), %r24 + LDREG PT_GR25(\regs), %r25 + LDREG PT_GR26(\regs), %r26 + LDREG PT_GR27(\regs), %r27 + LDREG PT_GR28(\regs), %r28 + /* r29 points to register save area, and is restored in rest_stack */ + /* r30 stack pointer restored in rest_stack */ + LDREG PT_GR31(\regs), %r31 + .endm + + .macro save_fp regs + fstd,ma %fr0, 8(\regs) + fstd,ma %fr1, 8(\regs) + fstd,ma %fr2, 8(\regs) + fstd,ma %fr3, 8(\regs) + fstd,ma %fr4, 8(\regs) + fstd,ma %fr5, 8(\regs) + fstd,ma %fr6, 8(\regs) + fstd,ma %fr7, 8(\regs) + fstd,ma %fr8, 8(\regs) + fstd,ma %fr9, 8(\regs) + fstd,ma %fr10, 8(\regs) + fstd,ma %fr11, 8(\regs) + fstd,ma %fr12, 8(\regs) + fstd,ma %fr13, 8(\regs) + fstd,ma %fr14, 8(\regs) + fstd,ma %fr15, 8(\regs) + fstd,ma %fr16, 8(\regs) + fstd,ma %fr17, 8(\regs) + fstd,ma %fr18, 8(\regs) + fstd,ma %fr19, 8(\regs) + fstd,ma %fr20, 8(\regs) + fstd,ma %fr21, 8(\regs) + fstd,ma %fr22, 8(\regs) + fstd,ma %fr23, 8(\regs) + fstd,ma %fr24, 8(\regs) + fstd,ma %fr25, 8(\regs) + fstd,ma %fr26, 8(\regs) + fstd,ma %fr27, 8(\regs) + fstd,ma %fr28, 8(\regs) + fstd,ma %fr29, 8(\regs) + fstd,ma %fr30, 8(\regs) + fstd %fr31, 0(\regs) + .endm + + .macro rest_fp regs + fldd 0(\regs), %fr31 + fldd,mb -8(\regs), %fr30 + fldd,mb -8(\regs), %fr29 + fldd,mb -8(\regs), %fr28 + fldd,mb -8(\regs), %fr27 + fldd,mb -8(\regs), %fr26 + fldd,mb -8(\regs), %fr25 + fldd,mb -8(\regs), %fr24 + fldd,mb -8(\regs), %fr23 + fldd,mb -8(\regs), %fr22 + fldd,mb -8(\regs), %fr21 + fldd,mb -8(\regs), %fr20 + fldd,mb -8(\regs), %fr19 + fldd,mb -8(\regs), %fr18 + fldd,mb -8(\regs), %fr17 + fldd,mb -8(\regs), %fr16 + fldd,mb -8(\regs), %fr15 + fldd,mb -8(\regs), %fr14 + fldd,mb -8(\regs), %fr13 + fldd,mb -8(\regs), %fr12 + fldd,mb -8(\regs), %fr11 + fldd,mb -8(\regs), %fr10 + fldd,mb -8(\regs), %fr9 + fldd,mb -8(\regs), %fr8 + fldd,mb -8(\regs), %fr7 + fldd,mb -8(\regs), %fr6 + fldd,mb -8(\regs), %fr5 + fldd,mb -8(\regs), %fr4 + fldd,mb -8(\regs), %fr3 + fldd,mb -8(\regs), %fr2 + fldd,mb -8(\regs), %fr1 + fldd,mb -8(\regs), %fr0 + .endm + + .macro callee_save_float + fstd,ma %fr12, 8(%r30) + fstd,ma %fr13, 8(%r30) + fstd,ma %fr14, 8(%r30) + fstd,ma %fr15, 8(%r30) + fstd,ma %fr16, 8(%r30) + fstd,ma %fr17, 8(%r30) + fstd,ma %fr18, 8(%r30) + fstd,ma %fr19, 8(%r30) + fstd,ma %fr20, 8(%r30) + fstd,ma %fr21, 8(%r30) + .endm + + .macro callee_rest_float + fldd,mb -8(%r30), %fr21 + fldd,mb -8(%r30), %fr20 + fldd,mb -8(%r30), %fr19 + fldd,mb -8(%r30), %fr18 + fldd,mb -8(%r30), %fr17 + fldd,mb -8(%r30), %fr16 + fldd,mb -8(%r30), %fr15 + fldd,mb -8(%r30), %fr14 + fldd,mb -8(%r30), %fr13 + fldd,mb -8(%r30), %fr12 + .endm + +#ifdef CONFIG_64BIT + .macro callee_save + std,ma %r3, CALLEE_REG_FRAME_SIZE(%r30) + mfctl %cr27, %r3 + std %r4, -136(%r30) + std %r5, -128(%r30) + std %r6, -120(%r30) + std %r7, -112(%r30) + std %r8, -104(%r30) + std %r9, -96(%r30) + std %r10, -88(%r30) + std %r11, -80(%r30) + std %r12, -72(%r30) + std %r13, -64(%r30) + std %r14, -56(%r30) + std %r15, -48(%r30) + std %r16, -40(%r30) + std %r17, -32(%r30) + std %r18, -24(%r30) + std %r3, -16(%r30) + .endm + + .macro callee_rest + ldd -16(%r30), %r3 + ldd -24(%r30), %r18 + ldd -32(%r30), %r17 + ldd -40(%r30), %r16 + ldd -48(%r30), %r15 + ldd -56(%r30), %r14 + ldd -64(%r30), %r13 + ldd -72(%r30), %r12 + ldd -80(%r30), %r11 + ldd -88(%r30), %r10 + ldd -96(%r30), %r9 + ldd -104(%r30), %r8 + ldd -112(%r30), %r7 + ldd -120(%r30), %r6 + ldd -128(%r30), %r5 + ldd -136(%r30), %r4 + mtctl %r3, %cr27 + ldd,mb -CALLEE_REG_FRAME_SIZE(%r30), %r3 + .endm + +#else /* ! CONFIG_64BIT */ + + .macro callee_save + stw,ma %r3, CALLEE_REG_FRAME_SIZE(%r30) + mfctl %cr27, %r3 + stw %r4, -124(%r30) + stw %r5, -120(%r30) + stw %r6, -116(%r30) + stw %r7, -112(%r30) + stw %r8, -108(%r30) + stw %r9, -104(%r30) + stw %r10, -100(%r30) + stw %r11, -96(%r30) + stw %r12, -92(%r30) + stw %r13, -88(%r30) + stw %r14, -84(%r30) + stw %r15, -80(%r30) + stw %r16, -76(%r30) + stw %r17, -72(%r30) + stw %r18, -68(%r30) + stw %r3, -64(%r30) + .endm + + .macro callee_rest + ldw -64(%r30), %r3 + ldw -68(%r30), %r18 + ldw -72(%r30), %r17 + ldw -76(%r30), %r16 + ldw -80(%r30), %r15 + ldw -84(%r30), %r14 + ldw -88(%r30), %r13 + ldw -92(%r30), %r12 + ldw -96(%r30), %r11 + ldw -100(%r30), %r10 + ldw -104(%r30), %r9 + ldw -108(%r30), %r8 + ldw -112(%r30), %r7 + ldw -116(%r30), %r6 + ldw -120(%r30), %r5 + ldw -124(%r30), %r4 + mtctl %r3, %cr27 + ldw,mb -CALLEE_REG_FRAME_SIZE(%r30), %r3 + .endm +#endif /* ! CONFIG_64BIT */ + + .macro save_specials regs + + SAVE_SP (%sr0, PT_SR0 (\regs)) + SAVE_SP (%sr1, PT_SR1 (\regs)) + SAVE_SP (%sr2, PT_SR2 (\regs)) + SAVE_SP (%sr3, PT_SR3 (\regs)) + SAVE_SP (%sr4, PT_SR4 (\regs)) + SAVE_SP (%sr5, PT_SR5 (\regs)) + SAVE_SP (%sr6, PT_SR6 (\regs)) + SAVE_SP (%sr7, PT_SR7 (\regs)) + + SAVE_CR (%cr17, PT_IASQ0(\regs)) + mtctl %r0, %cr17 + SAVE_CR (%cr17, PT_IASQ1(\regs)) + + SAVE_CR (%cr18, PT_IAOQ0(\regs)) + mtctl %r0, %cr18 + SAVE_CR (%cr18, PT_IAOQ1(\regs)) + +#ifdef CONFIG_64BIT + /* cr11 (sar) is a funny one. 5 bits on PA1.1 and 6 bit on PA2.0 + * For PA2.0 mtsar or mtctl always write 6 bits, but mfctl only + * reads 5 bits. Use mfctl,w to read all six bits. Otherwise + * we lose the 6th bit on a save/restore over interrupt. + */ + mfctl,w %cr11, %r1 + STREG %r1, PT_SAR (\regs) +#else + SAVE_CR (%cr11, PT_SAR (\regs)) +#endif + SAVE_CR (%cr19, PT_IIR (\regs)) + + /* + * Code immediately following this macro (in intr_save) relies + * on r8 containing ipsw. + */ + mfctl %cr22, %r8 + STREG %r8, PT_PSW(\regs) + .endm + + .macro rest_specials regs + + REST_SP (%sr0, PT_SR0 (\regs)) + REST_SP (%sr1, PT_SR1 (\regs)) + REST_SP (%sr2, PT_SR2 (\regs)) + REST_SP (%sr3, PT_SR3 (\regs)) + REST_SP (%sr4, PT_SR4 (\regs)) + REST_SP (%sr5, PT_SR5 (\regs)) + REST_SP (%sr6, PT_SR6 (\regs)) + REST_SP (%sr7, PT_SR7 (\regs)) + + REST_CR (%cr17, PT_IASQ0(\regs)) + REST_CR (%cr17, PT_IASQ1(\regs)) + + REST_CR (%cr18, PT_IAOQ0(\regs)) + REST_CR (%cr18, PT_IAOQ1(\regs)) + + REST_CR (%cr11, PT_SAR (\regs)) + + REST_CR (%cr22, PT_PSW (\regs)) + .endm + + + /* First step to create a "relied upon translation" + * See PA 2.0 Arch. page F-4 and F-5. + * + * The ssm was originally necessary due to a "PCxT bug". + * But someone decided it needed to be added to the architecture + * and this "feature" went into rev3 of PA-RISC 1.1 Arch Manual. + * It's been carried forward into PA 2.0 Arch as well. :^( + * + * "ssm 0,%r0" is a NOP with side effects (prefetch barrier). + * rsm/ssm prevents the ifetch unit from speculatively fetching + * instructions past this line in the code stream. + * PA 2.0 processor will single step all insn in the same QUAD (4 insn). + */ + .macro pcxt_ssm_bug + rsm PSW_SM_I,%r0 + nop /* 1 */ + nop /* 2 */ + nop /* 3 */ + nop /* 4 */ + nop /* 5 */ + nop /* 6 */ + nop /* 7 */ + .endm + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h new file mode 100644 index 00000000000..57fcc4a5ebb --- /dev/null +++ b/arch/parisc/include/asm/atomic.h @@ -0,0 +1,348 @@ +/* Copyright (C) 2000 Philipp Rumpf + * Copyright (C) 2006 Kyle McMartin + */ + +#ifndef _ASM_PARISC_ATOMIC_H_ +#define _ASM_PARISC_ATOMIC_H_ + +#include +#include + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + * + * And probably incredibly slow on parisc. OTOH, we don't + * have to write any serious assembly. prumpf + */ + +#ifdef CONFIG_SMP +#include +#include /* we use L1_CACHE_BYTES */ + +/* Use an array of spinlocks for our atomic_ts. + * Hash function to index into a different SPINLOCK. + * Since "a" is usually an address, use one spinlock per cacheline. + */ +# define ATOMIC_HASH_SIZE 4 +# define ATOMIC_HASH(a) (&(__atomic_hash[ (((unsigned long) a)/L1_CACHE_BYTES) & (ATOMIC_HASH_SIZE-1) ])) + +extern raw_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned; + +/* Can't use raw_spin_lock_irq because of #include problems, so + * this is the substitute */ +#define _atomic_spin_lock_irqsave(l,f) do { \ + raw_spinlock_t *s = ATOMIC_HASH(l); \ + local_irq_save(f); \ + __raw_spin_lock(s); \ +} while(0) + +#define _atomic_spin_unlock_irqrestore(l,f) do { \ + raw_spinlock_t *s = ATOMIC_HASH(l); \ + __raw_spin_unlock(s); \ + local_irq_restore(f); \ +} while(0) + + +#else +# define _atomic_spin_lock_irqsave(l,f) do { local_irq_save(f); } while (0) +# define _atomic_spin_unlock_irqrestore(l,f) do { local_irq_restore(f); } while (0) +#endif + +/* This should get optimized out since it's never called. +** Or get a link error if xchg is used "wrong". +*/ +extern void __xchg_called_with_bad_pointer(void); + + +/* __xchg32/64 defined in arch/parisc/lib/bitops.c */ +extern unsigned long __xchg8(char, char *); +extern unsigned long __xchg32(int, int *); +#ifdef CONFIG_64BIT +extern unsigned long __xchg64(unsigned long, unsigned long *); +#endif + +/* optimizer better get rid of switch since size is a constant */ +static __inline__ unsigned long +__xchg(unsigned long x, __volatile__ void * ptr, int size) +{ + switch(size) { +#ifdef CONFIG_64BIT + case 8: return __xchg64(x,(unsigned long *) ptr); +#endif + case 4: return __xchg32((int) x, (int *) ptr); + case 1: return __xchg8((char) x, (char *) ptr); + } + __xchg_called_with_bad_pointer(); + return x; +} + + +/* +** REVISIT - Abandoned use of LDCW in xchg() for now: +** o need to test sizeof(*ptr) to avoid clearing adjacent bytes +** o and while we are at it, could CONFIG_64BIT code use LDCD too? +** +** if (__builtin_constant_p(x) && (x == NULL)) +** if (((unsigned long)p & 0xf) == 0) +** return __ldcw(p); +*/ +#define xchg(ptr,x) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) + + +#define __HAVE_ARCH_CMPXCHG 1 + +/* bug catcher for when unsupported size is used - won't link */ +extern void __cmpxchg_called_with_bad_pointer(void); + +/* __cmpxchg_u32/u64 defined in arch/parisc/lib/bitops.c */ +extern unsigned long __cmpxchg_u32(volatile unsigned int *m, unsigned int old, unsigned int new_); +extern unsigned long __cmpxchg_u64(volatile unsigned long *ptr, unsigned long old, unsigned long new_); + +/* don't worry...optimizer will get rid of most of this */ +static __inline__ unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size) +{ + switch(size) { +#ifdef CONFIG_64BIT + case 8: return __cmpxchg_u64((unsigned long *)ptr, old, new_); +#endif + case 4: return __cmpxchg_u32((unsigned int *)ptr, (unsigned int) old, (unsigned int) new_); + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + +#include + +static inline unsigned long __cmpxchg_local(volatile void *ptr, + unsigned long old, + unsigned long new_, int size) +{ + switch (size) { +#ifdef CONFIG_64BIT + case 8: return __cmpxchg_u64((unsigned long *)ptr, old, new_); +#endif + case 4: return __cmpxchg_u32(ptr, old, new_); + default: + return __cmpxchg_local_generic(ptr, old, new_, size); + } +} + +/* + * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make + * them available. + */ +#define cmpxchg_local(ptr, o, n) \ + ((__typeof__(*(ptr)))__cmpxchg_local((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr)))) +#ifdef CONFIG_64BIT +#define cmpxchg64_local(ptr, o, n) \ + ({ \ + BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ + cmpxchg_local((ptr), (o), (n)); \ + }) +#else +#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n)) +#endif + +/* Note that we need not lock read accesses - aligned word writes/reads + * are atomic, so a reader never sees unconsistent values. + * + * Cache-line alignment would conflict with, for example, linux/module.h + */ + +typedef struct { volatile int counter; } atomic_t; + +/* It's possible to reduce all atomic operations to either + * __atomic_add_return, atomic_set and atomic_read (the latter + * is there only for consistency). + */ + +static __inline__ int __atomic_add_return(int i, atomic_t *v) +{ + int ret; + unsigned long flags; + _atomic_spin_lock_irqsave(v, flags); + + ret = (v->counter += i); + + _atomic_spin_unlock_irqrestore(v, flags); + return ret; +} + +static __inline__ void atomic_set(atomic_t *v, int i) +{ + unsigned long flags; + _atomic_spin_lock_irqsave(v, flags); + + v->counter = i; + + _atomic_spin_unlock_irqrestore(v, flags); +} + +static __inline__ int atomic_read(const atomic_t *v) +{ + return v->counter; +} + +/* exported interface */ +#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) +#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic_add_unless - add unless the number is a given value + * @v: pointer of type atomic_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +#define atomic_add(i,v) ((void)(__atomic_add_return( ((int)i),(v)))) +#define atomic_sub(i,v) ((void)(__atomic_add_return(-((int)i),(v)))) +#define atomic_inc(v) ((void)(__atomic_add_return( 1,(v)))) +#define atomic_dec(v) ((void)(__atomic_add_return( -1,(v)))) + +#define atomic_add_return(i,v) (__atomic_add_return( ((int)i),(v))) +#define atomic_sub_return(i,v) (__atomic_add_return(-((int)i),(v))) +#define atomic_inc_return(v) (__atomic_add_return( 1,(v))) +#define atomic_dec_return(v) (__atomic_add_return( -1,(v))) + +#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) + +/* + * atomic_inc_and_test - increment and test + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1 + * and returns true if the result is zero, or false for all + * other cases. + */ +#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) + +#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) + +#define atomic_sub_and_test(i,v) (atomic_sub_return((i),(v)) == 0) + +#define ATOMIC_INIT(i) ((atomic_t) { (i) }) + +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() + +#ifdef CONFIG_64BIT + +typedef struct { volatile s64 counter; } atomic64_t; + +#define ATOMIC64_INIT(i) ((atomic64_t) { (i) }) + +static __inline__ int +__atomic64_add_return(s64 i, atomic64_t *v) +{ + int ret; + unsigned long flags; + _atomic_spin_lock_irqsave(v, flags); + + ret = (v->counter += i); + + _atomic_spin_unlock_irqrestore(v, flags); + return ret; +} + +static __inline__ void +atomic64_set(atomic64_t *v, s64 i) +{ + unsigned long flags; + _atomic_spin_lock_irqsave(v, flags); + + v->counter = i; + + _atomic_spin_unlock_irqrestore(v, flags); +} + +static __inline__ s64 +atomic64_read(const atomic64_t *v) +{ + return v->counter; +} + +#define atomic64_add(i,v) ((void)(__atomic64_add_return( ((s64)i),(v)))) +#define atomic64_sub(i,v) ((void)(__atomic64_add_return(-((s64)i),(v)))) +#define atomic64_inc(v) ((void)(__atomic64_add_return( 1,(v)))) +#define atomic64_dec(v) ((void)(__atomic64_add_return( -1,(v)))) + +#define atomic64_add_return(i,v) (__atomic64_add_return( ((s64)i),(v))) +#define atomic64_sub_return(i,v) (__atomic64_add_return(-((s64)i),(v))) +#define atomic64_inc_return(v) (__atomic64_add_return( 1,(v))) +#define atomic64_dec_return(v) (__atomic64_add_return( -1,(v))) + +#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) + +#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) +#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) +#define atomic64_sub_and_test(i,v) (atomic64_sub_return((i),(v)) == 0) + +/* exported interface */ +#define atomic64_cmpxchg(v, o, n) \ + ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) +#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) + +/** + * atomic64_add_unless - add unless the number is a given value + * @v: pointer of type atomic64_t + * @a: the amount to add to v... + * @u: ...unless v is equal to u. + * + * Atomically adds @a to @v, so long as it was not @u. + * Returns non-zero if @v was not @u, and zero otherwise. + */ +static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) +{ + long c, old; + c = atomic64_read(v); + for (;;) { + if (unlikely(c == (u))) + break; + old = atomic64_cmpxchg((v), c, c + (a)); + if (likely(old == c)) + break; + c = old; + } + return c != (u); +} + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) + +#endif /* CONFIG_64BIT */ + +#include + +#endif /* _ASM_PARISC_ATOMIC_H_ */ diff --git a/arch/parisc/include/asm/auxvec.h b/arch/parisc/include/asm/auxvec.h new file mode 100644 index 00000000000..9c3ac4b89dc --- /dev/null +++ b/arch/parisc/include/asm/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMPARISC_AUXVEC_H +#define __ASMPARISC_AUXVEC_H + +#endif diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h new file mode 100644 index 00000000000..7a6ea10bd23 --- /dev/null +++ b/arch/parisc/include/asm/bitops.h @@ -0,0 +1,239 @@ +#ifndef _PARISC_BITOPS_H +#define _PARISC_BITOPS_H + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include +#include /* for BITS_PER_LONG/SHIFT_PER_LONG */ +#include +#include + +/* + * HP-PARISC specific bit operations + * for a detailed description of the functions please refer + * to include/asm-i386/bitops.h or kerneldoc + */ + +#define CHOP_SHIFTCOUNT(x) (((unsigned long) (x)) & (BITS_PER_LONG - 1)) + + +#define smp_mb__before_clear_bit() smp_mb() +#define smp_mb__after_clear_bit() smp_mb() + +/* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion + * on use of volatile and __*_bit() (set/clear/change): + * *_bit() want use of volatile. + * __*_bit() are "relaxed" and don't use spinlock or volatile. + */ + +static __inline__ void set_bit(int nr, volatile unsigned long * addr) +{ + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr); + unsigned long flags; + + addr += (nr >> SHIFT_PER_LONG); + _atomic_spin_lock_irqsave(addr, flags); + *addr |= mask; + _atomic_spin_unlock_irqrestore(addr, flags); +} + +static __inline__ void clear_bit(int nr, volatile unsigned long * addr) +{ + unsigned long mask = ~(1UL << CHOP_SHIFTCOUNT(nr)); + unsigned long flags; + + addr += (nr >> SHIFT_PER_LONG); + _atomic_spin_lock_irqsave(addr, flags); + *addr &= mask; + _atomic_spin_unlock_irqrestore(addr, flags); +} + +static __inline__ void change_bit(int nr, volatile unsigned long * addr) +{ + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr); + unsigned long flags; + + addr += (nr >> SHIFT_PER_LONG); + _atomic_spin_lock_irqsave(addr, flags); + *addr ^= mask; + _atomic_spin_unlock_irqrestore(addr, flags); +} + +static __inline__ int test_and_set_bit(int nr, volatile unsigned long * addr) +{ + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr); + unsigned long old; + unsigned long flags; + int set; + + addr += (nr >> SHIFT_PER_LONG); + _atomic_spin_lock_irqsave(addr, flags); + old = *addr; + set = (old & mask) ? 1 : 0; + if (!set) + *addr = old | mask; + _atomic_spin_unlock_irqrestore(addr, flags); + + return set; +} + +static __inline__ int test_and_clear_bit(int nr, volatile unsigned long * addr) +{ + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr); + unsigned long old; + unsigned long flags; + int set; + + addr += (nr >> SHIFT_PER_LONG); + _atomic_spin_lock_irqsave(addr, flags); + old = *addr; + set = (old & mask) ? 1 : 0; + if (set) + *addr = old & ~mask; + _atomic_spin_unlock_irqrestore(addr, flags); + + return set; +} + +static __inline__ int test_and_change_bit(int nr, volatile unsigned long * addr) +{ + unsigned long mask = 1UL << CHOP_SHIFTCOUNT(nr); + unsigned long oldbit; + unsigned long flags; + + addr += (nr >> SHIFT_PER_LONG); + _atomic_spin_lock_irqsave(addr, flags); + oldbit = *addr; + *addr = oldbit ^ mask; + _atomic_spin_unlock_irqrestore(addr, flags); + + return (oldbit & mask) ? 1 : 0; +} + +#include + +#ifdef __KERNEL__ + +/** + * __ffs - find first bit in word. returns 0 to "BITS_PER_LONG-1". + * @word: The word to search + * + * __ffs() return is undefined if no bit is set. + * + * 32-bit fast __ffs by LaMont Jones "lamont At hp com". + * 64-bit enhancement by Grant Grundler "grundler At parisc-linux org". + * (with help from willy/jejb to get the semantics right) + * + * This algorithm avoids branches by making use of nullification. + * One side effect of "extr" instructions is it sets PSW[N] bit. + * How PSW[N] (nullify next insn) gets set is determined by the + * "condition" field (eg "<>" or "TR" below) in the extr* insn. + * Only the 1st and one of either the 2cd or 3rd insn will get executed. + * Each set of 3 insn will get executed in 2 cycles on PA8x00 vs 16 or so + * cycles for each mispredicted branch. + */ + +static __inline__ unsigned long __ffs(unsigned long x) +{ + unsigned long ret; + + __asm__( +#ifdef CONFIG_64BIT + " ldi 63,%1\n" + " extrd,u,*<> %0,63,32,%%r0\n" + " extrd,u,*TR %0,31,32,%0\n" /* move top 32-bits down */ + " addi -32,%1,%1\n" +#else + " ldi 31,%1\n" +#endif + " extru,<> %0,31,16,%%r0\n" + " extru,TR %0,15,16,%0\n" /* xxxx0000 -> 0000xxxx */ + " addi -16,%1,%1\n" + " extru,<> %0,31,8,%%r0\n" + " extru,TR %0,23,8,%0\n" /* 0000xx00 -> 000000xx */ + " addi -8,%1,%1\n" + " extru,<> %0,31,4,%%r0\n" + " extru,TR %0,27,4,%0\n" /* 000000x0 -> 0000000x */ + " addi -4,%1,%1\n" + " extru,<> %0,31,2,%%r0\n" + " extru,TR %0,29,2,%0\n" /* 0000000y, 1100b -> 0011b */ + " addi -2,%1,%1\n" + " extru,= %0,31,1,%%r0\n" /* check last bit */ + " addi -1,%1,%1\n" + : "+r" (x), "=r" (ret) ); + return ret; +} + +#include + +/* + * ffs: find first bit set. returns 1 to BITS_PER_LONG or 0 (if none set) + * This is defined the same way as the libc and compiler builtin + * ffs routines, therefore differs in spirit from the above ffz (man ffs). + */ +static __inline__ int ffs(int x) +{ + return x ? (__ffs((unsigned long)x) + 1) : 0; +} + +/* + * fls: find last (most significant) bit set. + * fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. + */ + +static __inline__ int fls(int x) +{ + int ret; + if (!x) + return 0; + + __asm__( + " ldi 1,%1\n" + " extru,<> %0,15,16,%%r0\n" + " zdep,TR %0,15,16,%0\n" /* xxxx0000 */ + " addi 16,%1,%1\n" + " extru,<> %0,7,8,%%r0\n" + " zdep,TR %0,23,24,%0\n" /* xx000000 */ + " addi 8,%1,%1\n" + " extru,<> %0,3,4,%%r0\n" + " zdep,TR %0,27,28,%0\n" /* x0000000 */ + " addi 4,%1,%1\n" + " extru,<> %0,1,2,%%r0\n" + " zdep,TR %0,29,30,%0\n" /* y0000000 (y&3 = 0) */ + " addi 2,%1,%1\n" + " extru,= %0,0,1,%%r0\n" + " addi 1,%1,%1\n" /* if y & 8, add 1 */ + : "+r" (x), "=r" (ret) ); + + return ret; +} + +#include +#include +#include +#include +#include + +#endif /* __KERNEL__ */ + +#include + +#ifdef __KERNEL__ + +#include + +/* '3' is bits per byte */ +#define LE_BYTE_ADDR ((sizeof(unsigned long) - 1) << 3) + +#define ext2_set_bit_atomic(l,nr,addr) \ + test_and_set_bit((nr) ^ LE_BYTE_ADDR, (unsigned long *)addr) +#define ext2_clear_bit_atomic(l,nr,addr) \ + test_and_clear_bit( (nr) ^ LE_BYTE_ADDR, (unsigned long *)addr) + +#endif /* __KERNEL__ */ + +#include + +#endif /* _PARISC_BITOPS_H */ diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h new file mode 100644 index 00000000000..8cfc553fc83 --- /dev/null +++ b/arch/parisc/include/asm/bug.h @@ -0,0 +1,92 @@ +#ifndef _PARISC_BUG_H +#define _PARISC_BUG_H + +/* + * Tell the user there is some problem. + * The offending file and line are encoded in the __bug_table section. + */ + +#ifdef CONFIG_BUG +#define HAVE_ARCH_BUG +#define HAVE_ARCH_WARN_ON + +/* the break instruction is used as BUG() marker. */ +#define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff" +#define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */ + +#if defined(CONFIG_64BIT) +#define ASM_WORD_INSN ".dword\t" +#else +#define ASM_WORD_INSN ".word\t" +#endif + +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define BUG() \ + do { \ + asm volatile("\n" \ + "1:\t" PARISC_BUG_BREAK_ASM "\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.short %c1, %c2\n" \ + "\t.org 2b+%c3\n" \ + "\t.popsection" \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (0), "i" (sizeof(struct bug_entry)) ); \ + for(;;) ; \ + } while(0) + +#else +#define BUG() \ + do { \ + asm volatile(PARISC_BUG_BREAK_ASM : : ); \ + for(;;) ; \ + } while(0) +#endif + +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define __WARN() \ + do { \ + asm volatile("\n" \ + "1:\t" PARISC_BUG_BREAK_ASM "\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.short %c1, %c2\n" \ + "\t.org 2b+%c3\n" \ + "\t.popsection" \ + : : "i" (__FILE__), "i" (__LINE__), \ + "i" (BUGFLAG_WARNING), \ + "i" (sizeof(struct bug_entry)) ); \ + } while(0) +#else +#define __WARN() \ + do { \ + asm volatile("\n" \ + "1:\t" PARISC_BUG_BREAK_ASM "\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "2:\t" ASM_WORD_INSN "1b\n" \ + "\t.short %c0\n" \ + "\t.org 2b+%c1\n" \ + "\t.popsection" \ + : : "i" (BUGFLAG_WARNING), \ + "i" (sizeof(struct bug_entry)) ); \ + } while(0) +#endif + + +#define WARN_ON(x) ({ \ + int __ret_warn_on = !!(x); \ + if (__builtin_constant_p(__ret_warn_on)) { \ + if (__ret_warn_on) \ + __WARN(); \ + } else { \ + if (unlikely(__ret_warn_on)) \ + __WARN(); \ + } \ + unlikely(__ret_warn_on); \ +}) + +#endif + +#include +#endif + diff --git a/arch/parisc/include/asm/bugs.h b/arch/parisc/include/asm/bugs.h new file mode 100644 index 00000000000..9e6284342a5 --- /dev/null +++ b/arch/parisc/include/asm/bugs.h @@ -0,0 +1,19 @@ +/* + * include/asm-parisc/bugs.h + * + * Copyright (C) 1999 Mike Shaver + */ + +/* + * This is included by init/main.c to check for architecture-dependent bugs. + * + * Needs: + * void check_bugs(void); + */ + +#include + +static inline void check_bugs(void) +{ +// identify_cpu(&boot_cpu_data); +} diff --git a/arch/parisc/include/asm/byteorder.h b/arch/parisc/include/asm/byteorder.h new file mode 100644 index 00000000000..db148313de5 --- /dev/null +++ b/arch/parisc/include/asm/byteorder.h @@ -0,0 +1,82 @@ +#ifndef _PARISC_BYTEORDER_H +#define _PARISC_BYTEORDER_H + +#include +#include + +#ifdef __GNUC__ + +static __inline__ __attribute_const__ __u16 ___arch__swab16(__u16 x) +{ + __asm__("dep %0, 15, 8, %0\n\t" /* deposit 00ab -> 0bab */ + "shd %%r0, %0, 8, %0" /* shift 000000ab -> 00ba */ + : "=r" (x) + : "0" (x)); + return x; +} + +static __inline__ __attribute_const__ __u32 ___arch__swab24(__u32 x) +{ + __asm__("shd %0, %0, 8, %0\n\t" /* shift xabcxabc -> cxab */ + "dep %0, 15, 8, %0\n\t" /* deposit cxab -> cbab */ + "shd %%r0, %0, 8, %0" /* shift 0000cbab -> 0cba */ + : "=r" (x) + : "0" (x)); + return x; +} + +static __inline__ __attribute_const__ __u32 ___arch__swab32(__u32 x) +{ + unsigned int temp; + __asm__("shd %0, %0, 16, %1\n\t" /* shift abcdabcd -> cdab */ + "dep %1, 15, 8, %1\n\t" /* deposit cdab -> cbab */ + "shd %0, %1, 8, %0" /* shift abcdcbab -> dcba */ + : "=r" (x), "=&r" (temp) + : "0" (x)); + return x; +} + + +#if BITS_PER_LONG > 32 +/* +** From "PA-RISC 2.0 Architecture", HP Professional Books. +** See Appendix I page 8 , "Endian Byte Swapping". +** +** Pretty cool algorithm: (* == zero'd bits) +** PERMH 01234567 -> 67452301 into %0 +** HSHL 67452301 -> 7*5*3*1* into %1 +** HSHR 67452301 -> *6*4*2*0 into %0 +** OR %0 | %1 -> 76543210 into %0 (all done!) +*/ +static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x) { + __u64 temp; + __asm__("permh,3210 %0, %0\n\t" + "hshl %0, 8, %1\n\t" + "hshr,u %0, 8, %0\n\t" + "or %1, %0, %0" + : "=r" (x), "=&r" (temp) + : "0" (x)); + return x; +} +#define __arch__swab64(x) ___arch__swab64(x) +#define __BYTEORDER_HAS_U64__ +#elif !defined(__STRICT_ANSI__) +static __inline__ __attribute_const__ __u64 ___arch__swab64(__u64 x) +{ + __u32 t1 = ___arch__swab32((__u32) x); + __u32 t2 = ___arch__swab32((__u32) (x >> 32)); + return (((__u64) t1 << 32) | t2); +} +#define __arch__swab64(x) ___arch__swab64(x) +#define __BYTEORDER_HAS_U64__ +#endif + +#define __arch__swab16(x) ___arch__swab16(x) +#define __arch__swab24(x) ___arch__swab24(x) +#define __arch__swab32(x) ___arch__swab32(x) + +#endif /* __GNUC__ */ + +#include + +#endif /* _PARISC_BYTEORDER_H */ diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h new file mode 100644 index 00000000000..32c2cca7434 --- /dev/null +++ b/arch/parisc/include/asm/cache.h @@ -0,0 +1,60 @@ +/* + * include/asm-parisc/cache.h + */ + +#ifndef __ARCH_PARISC_CACHE_H +#define __ARCH_PARISC_CACHE_H + + +/* + * PA 2.0 processors have 64-byte cachelines; PA 1.1 processors have + * 32-byte cachelines. The default configuration is not for SMP anyway, + * so if you're building for SMP, you should select the appropriate + * processor type. There is a potential livelock danger when running + * a machine with this value set too small, but it's more probable you'll + * just ruin performance. + */ +#ifdef CONFIG_PA20 +#define L1_CACHE_BYTES 64 +#define L1_CACHE_SHIFT 6 +#else +#define L1_CACHE_BYTES 32 +#define L1_CACHE_SHIFT 5 +#endif + +#ifndef __ASSEMBLY__ + +#define L1_CACHE_ALIGN(x) (((x)+(L1_CACHE_BYTES-1))&~(L1_CACHE_BYTES-1)) + +#define SMP_CACHE_BYTES L1_CACHE_BYTES + +#define __read_mostly __attribute__((__section__(".data.read_mostly"))) + +void parisc_cache_init(void); /* initializes cache-flushing */ +void disable_sr_hashing_asm(int); /* low level support for above */ +void disable_sr_hashing(void); /* turns off space register hashing */ +void free_sid(unsigned long); +unsigned long alloc_sid(void); + +struct seq_file; +extern void show_cache_info(struct seq_file *m); + +extern int split_tlb; +extern int dcache_stride; +extern int icache_stride; +extern struct pdc_cache_info cache_info; +void parisc_setup_cache_timing(void); + +#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" : : "r" (addr)); +#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" : : "r" (addr)); +#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" : : "r" (addr)); + +#endif /* ! __ASSEMBLY__ */ + +/* Classes of processor wrt: disabling space register hashing */ + +#define SRHASH_PCXST 0 /* pcxs, pcxt, pcxt_ */ +#define SRHASH_PCXL 1 /* pcxl */ +#define SRHASH_PA20 2 /* pcxu, pcxu_, pcxw, pcxw_ */ + +#endif diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h new file mode 100644 index 00000000000..b7ca6dc7fdd --- /dev/null +++ b/arch/parisc/include/asm/cacheflush.h @@ -0,0 +1,121 @@ +#ifndef _PARISC_CACHEFLUSH_H +#define _PARISC_CACHEFLUSH_H + +#include + +/* The usual comment is "Caches aren't brain-dead on the ". + * Unfortunately, that doesn't apply to PA-RISC. */ + +/* Internal implementation */ +void flush_data_cache_local(void *); /* flushes local data-cache only */ +void flush_instruction_cache_local(void *); /* flushes local code-cache only */ +#ifdef CONFIG_SMP +void flush_data_cache(void); /* flushes data-cache only (all processors) */ +void flush_instruction_cache(void); /* flushes i-cache only (all processors) */ +#else +#define flush_data_cache() flush_data_cache_local(NULL) +#define flush_instruction_cache() flush_instruction_cache_local(NULL) +#endif + +#define flush_cache_dup_mm(mm) flush_cache_mm(mm) + +void flush_user_icache_range_asm(unsigned long, unsigned long); +void flush_kernel_icache_range_asm(unsigned long, unsigned long); +void flush_user_dcache_range_asm(unsigned long, unsigned long); +void flush_kernel_dcache_range_asm(unsigned long, unsigned long); +void flush_kernel_dcache_page_asm(void *); +void flush_kernel_icache_page(void *); +void flush_user_dcache_page(unsigned long); +void flush_user_icache_page(unsigned long); +void flush_user_dcache_range(unsigned long, unsigned long); +void flush_user_icache_range(unsigned long, unsigned long); + +/* Cache flush operations */ + +void flush_cache_all_local(void); +void flush_cache_all(void); +void flush_cache_mm(struct mm_struct *mm); + +#define flush_kernel_dcache_range(start,size) \ + flush_kernel_dcache_range_asm((start), (start)+(size)); + +#define flush_cache_vmap(start, end) flush_cache_all() +#define flush_cache_vunmap(start, end) flush_cache_all() + +extern void flush_dcache_page(struct page *page); + +#define flush_dcache_mmap_lock(mapping) \ + spin_lock_irq(&(mapping)->tree_lock) +#define flush_dcache_mmap_unlock(mapping) \ + spin_unlock_irq(&(mapping)->tree_lock) + +#define flush_icache_page(vma,page) do { \ + flush_kernel_dcache_page(page); \ + flush_kernel_icache_page(page_address(page)); \ +} while (0) + +#define flush_icache_range(s,e) do { \ + flush_kernel_dcache_range_asm(s,e); \ + flush_kernel_icache_range_asm(s,e); \ +} while (0) + +#define copy_to_user_page(vma, page, vaddr, dst, src, len) \ +do { \ + flush_cache_page(vma, vaddr, page_to_pfn(page)); \ + memcpy(dst, src, len); \ + flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); \ +} while (0) + +#define copy_from_user_page(vma, page, vaddr, dst, src, len) \ +do { \ + flush_cache_page(vma, vaddr, page_to_pfn(page)); \ + memcpy(dst, src, len); \ +} while (0) + +void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn); +void flush_cache_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end); + +#define ARCH_HAS_FLUSH_ANON_PAGE +static inline void +flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) +{ + if (PageAnon(page)) + flush_user_dcache_page(vmaddr); +} + +#define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE +void flush_kernel_dcache_page_addr(void *addr); +static inline void flush_kernel_dcache_page(struct page *page) +{ + flush_kernel_dcache_page_addr(page_address(page)); +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void); +#endif + +#ifdef CONFIG_PA8X00 +/* Only pa8800, pa8900 needs this */ +#define ARCH_HAS_KMAP + +void kunmap_parisc(void *addr); + +static inline void *kmap(struct page *page) +{ + might_sleep(); + return page_address(page); +} + +#define kunmap(page) kunmap_parisc(page_address(page)) + +#define kmap_atomic(page, idx) page_address(page) + +#define kunmap_atomic(addr, idx) kunmap_parisc(addr) + +#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) +#define kmap_atomic_to_page(ptr) virt_to_page(ptr) +#endif + +#endif /* _PARISC_CACHEFLUSH_H */ + diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h new file mode 100644 index 00000000000..e9639ccc3fc --- /dev/null +++ b/arch/parisc/include/asm/checksum.h @@ -0,0 +1,210 @@ +#ifndef _PARISC_CHECKSUM_H +#define _PARISC_CHECKSUM_H + +#include + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *, int, __wsum); + +/* + * The same as csum_partial, but copies from src while it checksums. + * + * Here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ +extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum); + +/* + * this is a new version of the above that records errors it finds in *errp, + * but continues and zeros the rest of the buffer. + */ +extern __wsum csum_partial_copy_from_user(const void __user *src, + void *dst, int len, __wsum sum, int *errp); + +/* + * Optimized for IP headers, which always checksum on 4 octet boundaries. + * + * Written by Randolph Chung , and then mucked with by + * LaMont Jones + */ +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int sum; + + __asm__ __volatile__ ( +" ldws,ma 4(%1), %0\n" +" addib,<= -4, %2, 2f\n" +"\n" +" ldws 4(%1), %%r20\n" +" ldws 8(%1), %%r21\n" +" add %0, %%r20, %0\n" +" ldws,ma 12(%1), %%r19\n" +" addc %0, %%r21, %0\n" +" addc %0, %%r19, %0\n" +"1: ldws,ma 4(%1), %%r19\n" +" addib,< 0, %2, 1b\n" +" addc %0, %%r19, %0\n" +"\n" +" extru %0, 31, 16, %%r20\n" +" extru %0, 15, 16, %%r21\n" +" addc %%r20, %%r21, %0\n" +" extru %0, 15, 16, %%r21\n" +" add %0, %%r21, %0\n" +" subi -1, %0, %0\n" +"2:\n" + : "=r" (sum), "=r" (iph), "=r" (ihl) + : "1" (iph), "2" (ihl) + : "r19", "r20", "r21", "memory"); + + return (__force __sum16)sum; +} + +/* + * Fold a partial checksum + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + /* add the swapped two 16-bit halves of sum, + a possible carry from adding the two 16-bit halves, + will carry from the lower half into the upper half, + giving us the correct sum in the upper half. */ + sum += (sum << 16) + (sum >> 16); + return (__force __sum16)(~sum >> 16); +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + __asm__( + " add %1, %0, %0\n" + " addc %2, %0, %0\n" + " addc %3, %0, %0\n" + " addc %%r0, %0, %0\n" + : "=r" (sum) + : "r" (daddr), "r"(saddr), "r"(proto+len), "0"(sum)); + return sum; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum)); +} + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline __sum16 ip_compute_csum(const void *buf, int len) +{ + return csum_fold (csum_partial(buf, len, 0)); +} + + +#define _HAVE_ARCH_IPV6_CSUM +static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, unsigned short proto, + __wsum sum) +{ + __asm__ __volatile__ ( + +#if BITS_PER_LONG > 32 + + /* + ** We can execute two loads and two adds per cycle on PA 8000. + ** But add insn's get serialized waiting for the carry bit. + ** Try to keep 4 registers with "live" values ahead of the ALU. + */ + +" ldd,ma 8(%1), %%r19\n" /* get 1st saddr word */ +" ldd,ma 8(%2), %%r20\n" /* get 1st daddr word */ +" add %8, %3, %3\n"/* add 16-bit proto + len */ +" add %%r19, %0, %0\n" +" ldd,ma 8(%1), %%r21\n" /* 2cd saddr */ +" ldd,ma 8(%2), %%r22\n" /* 2cd daddr */ +" add,dc %%r20, %0, %0\n" +" add,dc %%r21, %0, %0\n" +" add,dc %%r22, %0, %0\n" +" add,dc %3, %0, %0\n" /* fold in proto+len | carry bit */ +" extrd,u %0, 31, 32, %%r19\n" /* copy upper half down */ +" depdi 0, 31, 32, %0\n" /* clear upper half */ +" add %%r19, %0, %0\n" /* fold into 32-bits */ +" addc 0, %0, %0\n" /* add carry */ + +#else + + /* + ** For PA 1.x, the insn order doesn't matter as much. + ** Insn stream is serialized on the carry bit here too. + ** result from the previous operation (eg r0 + x) + */ + +" ldw,ma 4(%1), %%r19\n" /* get 1st saddr word */ +" ldw,ma 4(%2), %%r20\n" /* get 1st daddr word */ +" add %8, %3, %3\n" /* add 16-bit proto + len */ +" add %%r19, %0, %0\n" +" ldw,ma 4(%1), %%r21\n" /* 2cd saddr */ +" addc %%r20, %0, %0\n" +" ldw,ma 4(%2), %%r22\n" /* 2cd daddr */ +" addc %%r21, %0, %0\n" +" ldw,ma 4(%1), %%r19\n" /* 3rd saddr */ +" addc %%r22, %0, %0\n" +" ldw,ma 4(%2), %%r20\n" /* 3rd daddr */ +" addc %%r19, %0, %0\n" +" ldw,ma 4(%1), %%r21\n" /* 4th saddr */ +" addc %%r20, %0, %0\n" +" ldw,ma 4(%2), %%r22\n" /* 4th daddr */ +" addc %%r21, %0, %0\n" +" addc %%r22, %0, %0\n" +" addc %3, %0, %0\n" /* fold in proto+len, catch carry */ + +#endif + : "=r" (sum), "=r" (saddr), "=r" (daddr), "=r" (len) + : "0" (sum), "1" (saddr), "2" (daddr), "3" (len), "r" (proto) + : "r19", "r20", "r21", "r22"); + return csum_fold(sum); +} + +/* + * Copy and checksum to user + */ +#define HAVE_CSUM_COPY_USER +static __inline__ __wsum csum_and_copy_to_user(const void *src, + void __user *dst, + int len, __wsum sum, + int *err_ptr) +{ + /* code stolen from include/asm-mips64 */ + sum = csum_partial(src, len, sum); + + if (copy_to_user(dst, src, len)) { + *err_ptr = -EFAULT; + return (__force __wsum)-1; + } + + return sum; +} + +#endif + diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h new file mode 100644 index 00000000000..7f32611a7a5 --- /dev/null +++ b/arch/parisc/include/asm/compat.h @@ -0,0 +1,165 @@ +#ifndef _ASM_PARISC_COMPAT_H +#define _ASM_PARISC_COMPAT_H +/* + * Architecture specific compatibility types + */ +#include +#include +#include + +#define COMPAT_USER_HZ 100 + +typedef u32 compat_size_t; +typedef s32 compat_ssize_t; +typedef s32 compat_time_t; +typedef s32 compat_clock_t; +typedef s32 compat_pid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; +typedef u16 compat_mode_t; +typedef u32 compat_ino_t; +typedef u32 compat_dev_t; +typedef s32 compat_off_t; +typedef s64 compat_loff_t; +typedef u16 compat_nlink_t; +typedef u16 compat_ipc_pid_t; +typedef s32 compat_daddr_t; +typedef u32 compat_caddr_t; +typedef s32 compat_timer_t; + +typedef s32 compat_int_t; +typedef s32 compat_long_t; +typedef s64 compat_s64; +typedef u32 compat_uint_t; +typedef u32 compat_ulong_t; +typedef u64 compat_u64; + +struct compat_timespec { + compat_time_t tv_sec; + s32 tv_nsec; +}; + +struct compat_timeval { + compat_time_t tv_sec; + s32 tv_usec; +}; + +struct compat_stat { + compat_dev_t st_dev; /* dev_t is 32 bits on parisc */ + compat_ino_t st_ino; /* 32 bits */ + compat_mode_t st_mode; /* 16 bits */ + compat_nlink_t st_nlink; /* 16 bits */ + u16 st_reserved1; /* old st_uid */ + u16 st_reserved2; /* old st_gid */ + compat_dev_t st_rdev; + compat_off_t st_size; + compat_time_t st_atime; + u32 st_atime_nsec; + compat_time_t st_mtime; + u32 st_mtime_nsec; + compat_time_t st_ctime; + u32 st_ctime_nsec; + s32 st_blksize; + s32 st_blocks; + u32 __unused1; /* ACL stuff */ + compat_dev_t __unused2; /* network */ + compat_ino_t __unused3; /* network */ + u32 __unused4; /* cnodes */ + u16 __unused5; /* netsite */ + short st_fstype; + compat_dev_t st_realdev; + u16 st_basemode; + u16 st_spareshort; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; + u32 st_spare4[3]; +}; + +struct compat_flock { + short l_type; + short l_whence; + compat_off_t l_start; + compat_off_t l_len; + compat_pid_t l_pid; +}; + +struct compat_flock64 { + short l_type; + short l_whence; + compat_loff_t l_start; + compat_loff_t l_len; + compat_pid_t l_pid; +}; + +struct compat_statfs { + s32 f_type; + s32 f_bsize; + s32 f_blocks; + s32 f_bfree; + s32 f_bavail; + s32 f_files; + s32 f_ffree; + __kernel_fsid_t f_fsid; + s32 f_namelen; + s32 f_frsize; + s32 f_spare[5]; +}; + +struct compat_sigcontext { + compat_int_t sc_flags; + compat_int_t sc_gr[32]; /* PSW in sc_gr[0] */ + u64 sc_fr[32]; + compat_int_t sc_iasq[2]; + compat_int_t sc_iaoq[2]; + compat_int_t sc_sar; /* cr11 */ +}; + +#define COMPAT_RLIM_INFINITY 0xffffffff + +typedef u32 compat_old_sigset_t; /* at least 32 bits */ + +#define _COMPAT_NSIG 64 +#define _COMPAT_NSIG_BPW 32 + +typedef u32 compat_sigset_word; + +#define COMPAT_OFF_T_MAX 0x7fffffff +#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL + +/* + * A pointer passed in from user mode. This should not + * be used for syscall parameters, just declare them + * as pointers because the syscall entry code will have + * appropriately converted them already. + */ +typedef u32 compat_uptr_t; + +static inline void __user *compat_ptr(compat_uptr_t uptr) +{ + return (void __user *)(unsigned long)uptr; +} + +static inline compat_uptr_t ptr_to_compat(void __user *uptr) +{ + return (u32)(unsigned long)uptr; +} + +static __inline__ void __user *compat_alloc_user_space(long len) +{ + struct pt_regs *regs = ¤t->thread.regs; + return (void __user *)regs->gr[30]; +} + +static inline int __is_compat_task(struct task_struct *t) +{ + return test_ti_thread_flag(task_thread_info(t), TIF_32BIT); +} + +static inline int is_compat_task(void) +{ + return __is_compat_task(current); +} + +#endif /* _ASM_PARISC_COMPAT_H */ diff --git a/arch/parisc/include/asm/compat_rt_sigframe.h b/arch/parisc/include/asm/compat_rt_sigframe.h new file mode 100644 index 00000000000..81bec28bdc4 --- /dev/null +++ b/arch/parisc/include/asm/compat_rt_sigframe.h @@ -0,0 +1,50 @@ +#include +#include +#include + +#ifndef _ASM_PARISC_COMPAT_RT_SIGFRAME_H +#define _ASM_PARISC_COMPAT_RT_SIGFRAME_H + +/* In a deft move of uber-hackery, we decide to carry the top half of all + * 64-bit registers in a non-portable, non-ABI, hidden structure. + * Userspace can read the hidden structure if it *wants* but is never + * guaranteed to be in the same place. Infact the uc_sigmask from the + * ucontext_t structure may push the hidden register file downards + */ +struct compat_regfile { + /* Upper half of all the 64-bit registers that were truncated + on a copy to a 32-bit userspace */ + compat_int_t rf_gr[32]; + compat_int_t rf_iasq[2]; + compat_int_t rf_iaoq[2]; + compat_int_t rf_sar; +}; + +#define COMPAT_SIGRETURN_TRAMP 4 +#define COMPAT_SIGRESTARTBLOCK_TRAMP 5 +#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + COMPAT_SIGRESTARTBLOCK_TRAMP) + +struct compat_rt_sigframe { + /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c + Secondary to that it must protect the ERESTART_RESTARTBLOCK + trampoline we left on the stack (we were bad and didn't + change sp so we could run really fast.) */ + compat_uint_t tramp[COMPAT_TRAMP_SIZE]; + compat_siginfo_t info; + struct compat_ucontext uc; + /* Hidden location of truncated registers, *must* be last. */ + struct compat_regfile regs; +}; + +/* + * The 32-bit ABI wants at least 48 bytes for a function call frame: + * 16 bytes for arg0-arg3, and 32 bytes for magic (the only part of + * which Linux/parisc uses is sp-20 for the saved return pointer...) + * Then, the stack pointer must be rounded to a cache line (64 bytes). + */ +#define SIGFRAME32 64 +#define FUNCTIONCALLFRAME32 48 +#define PARISC_RT_SIGFRAME_SIZE32 \ + (((sizeof(struct compat_rt_sigframe) + FUNCTIONCALLFRAME32) + SIGFRAME32) & -SIGFRAME32) + +#endif diff --git a/arch/parisc/include/asm/compat_signal.h b/arch/parisc/include/asm/compat_signal.h new file mode 100644 index 00000000000..6ad02c360b2 --- /dev/null +++ b/arch/parisc/include/asm/compat_signal.h @@ -0,0 +1,2 @@ +/* Use generic */ +#include diff --git a/arch/parisc/include/asm/compat_ucontext.h b/arch/parisc/include/asm/compat_ucontext.h new file mode 100644 index 00000000000..2f7292afde3 --- /dev/null +++ b/arch/parisc/include/asm/compat_ucontext.h @@ -0,0 +1,17 @@ +#ifndef _ASM_PARISC_COMPAT_UCONTEXT_H +#define _ASM_PARISC_COMPAT_UCONTEXT_H + +#include + +/* 32-bit ucontext as seen from an 64-bit kernel */ +struct compat_ucontext { + compat_uint_t uc_flags; + compat_uptr_t uc_link; + compat_stack_t uc_stack; /* struct compat_sigaltstack (12 bytes)*/ + /* FIXME: Pad out to get uc_mcontext to start at an 8-byte aligned boundary */ + compat_uint_t pad[1]; + struct compat_sigcontext uc_mcontext; + compat_sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* !_ASM_PARISC_COMPAT_UCONTEXT_H */ diff --git a/arch/parisc/include/asm/cputime.h b/arch/parisc/include/asm/cputime.h new file mode 100644 index 00000000000..dcdf2fbd7e7 --- /dev/null +++ b/arch/parisc/include/asm/cputime.h @@ -0,0 +1,6 @@ +#ifndef __PARISC_CPUTIME_H +#define __PARISC_CPUTIME_H + +#include + +#endif /* __PARISC_CPUTIME_H */ diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h new file mode 100644 index 00000000000..0fb9338e3bf --- /dev/null +++ b/arch/parisc/include/asm/current.h @@ -0,0 +1,15 @@ +#ifndef _PARISC_CURRENT_H +#define _PARISC_CURRENT_H + +#include + +struct task_struct; + +static inline struct task_struct * get_current(void) +{ + return current_thread_info()->task; +} + +#define current get_current() + +#endif /* !(_PARISC_CURRENT_H) */ diff --git a/arch/parisc/include/asm/delay.h b/arch/parisc/include/asm/delay.h new file mode 100644 index 00000000000..7a75e984674 --- /dev/null +++ b/arch/parisc/include/asm/delay.h @@ -0,0 +1,43 @@ +#ifndef _PARISC_DELAY_H +#define _PARISC_DELAY_H + +#include /* for mfctl() */ +#include /* for boot_cpu_data */ + + +/* + * Copyright (C) 1993 Linus Torvalds + * + * Delay routines + */ + +static __inline__ void __delay(unsigned long loops) { + asm volatile( + " .balignl 64,0x34000034\n" + " addib,UV -1,%0,.\n" + " nop\n" + : "=r" (loops) : "0" (loops)); +} + +static __inline__ void __cr16_delay(unsigned long clocks) { + unsigned long start; + + /* + * Note: Due to unsigned math, cr16 rollovers shouldn't be + * a problem here. However, on 32 bit, we need to make sure + * we don't pass in too big a value. The current default + * value of MAX_UDELAY_MS should help prevent this. + */ + + start = mfctl(16); + while ((mfctl(16) - start) < clocks) + ; +} + +static __inline__ void __udelay(unsigned long usecs) { + __cr16_delay(usecs * ((unsigned long)boot_cpu_data.cpu_hz / 1000000UL)); +} + +#define udelay(n) __udelay(n) + +#endif /* defined(_PARISC_DELAY_H) */ diff --git a/arch/parisc/include/asm/device.h b/arch/parisc/include/asm/device.h new file mode 100644 index 00000000000..d8f9872b0e2 --- /dev/null +++ b/arch/parisc/include/asm/device.h @@ -0,0 +1,7 @@ +/* + * Arch specific extensions to struct device + * + * This file is released under the GPLv2 + */ +#include + diff --git a/arch/parisc/include/asm/div64.h b/arch/parisc/include/asm/div64.h new file mode 100644 index 00000000000..6cd978cefb2 --- /dev/null +++ b/arch/parisc/include/asm/div64.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h new file mode 100644 index 00000000000..53af696f23d --- /dev/null +++ b/arch/parisc/include/asm/dma-mapping.h @@ -0,0 +1,253 @@ +#ifndef _PARISC_DMA_MAPPING_H +#define _PARISC_DMA_MAPPING_H + +#include +#include +#include + +/* See Documentation/DMA-mapping.txt */ +struct hppa_dma_ops { + int (*dma_supported)(struct device *dev, u64 mask); + void *(*alloc_consistent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag); + void *(*alloc_noncoherent)(struct device *dev, size_t size, dma_addr_t *iova, gfp_t flag); + void (*free_consistent)(struct device *dev, size_t size, void *vaddr, dma_addr_t iova); + dma_addr_t (*map_single)(struct device *dev, void *addr, size_t size, enum dma_data_direction direction); + void (*unmap_single)(struct device *dev, dma_addr_t iova, size_t size, enum dma_data_direction direction); + int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction direction); + void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nhwents, enum dma_data_direction direction); + void (*dma_sync_single_for_cpu)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction); + void (*dma_sync_single_for_device)(struct device *dev, dma_addr_t iova, unsigned long offset, size_t size, enum dma_data_direction direction); + void (*dma_sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); + void (*dma_sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction direction); +}; + +/* +** We could live without the hppa_dma_ops indirection if we didn't want +** to support 4 different coherent dma models with one binary (they will +** someday be loadable modules): +** I/O MMU consistent method dma_sync behavior +** ============= ====================== ======================= +** a) PA-7x00LC uncachable host memory flush/purge +** b) U2/Uturn cachable host memory NOP +** c) Ike/Astro cachable host memory NOP +** d) EPIC/SAGA memory on EPIC/SAGA flush/reset DMA channel +** +** PA-7[13]00LC processors have a GSC bus interface and no I/O MMU. +** +** Systems (eg PCX-T workstations) that don't fall into the above +** categories will need to modify the needed drivers to perform +** flush/purge and allocate "regular" cacheable pages for everything. +*/ + +#ifdef CONFIG_PA11 +extern struct hppa_dma_ops pcxl_dma_ops; +extern struct hppa_dma_ops pcx_dma_ops; +#endif + +extern struct hppa_dma_ops *hppa_dma_ops; + +static inline void * +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag) +{ + return hppa_dma_ops->alloc_consistent(dev, size, dma_handle, flag); +} + +static inline void * +dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flag) +{ + return hppa_dma_ops->alloc_noncoherent(dev, size, dma_handle, flag); +} + +static inline void +dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + hppa_dma_ops->free_consistent(dev, size, vaddr, dma_handle); +} + +static inline void +dma_free_noncoherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle) +{ + hppa_dma_ops->free_consistent(dev, size, vaddr, dma_handle); +} + +static inline dma_addr_t +dma_map_single(struct device *dev, void *ptr, size_t size, + enum dma_data_direction direction) +{ + return hppa_dma_ops->map_single(dev, ptr, size, direction); +} + +static inline void +dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, + enum dma_data_direction direction) +{ + hppa_dma_ops->unmap_single(dev, dma_addr, size, direction); +} + +static inline int +dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction direction) +{ + return hppa_dma_ops->map_sg(dev, sg, nents, direction); +} + +static inline void +dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, + enum dma_data_direction direction) +{ + hppa_dma_ops->unmap_sg(dev, sg, nhwentries, direction); +} + +static inline dma_addr_t +dma_map_page(struct device *dev, struct page *page, unsigned long offset, + size_t size, enum dma_data_direction direction) +{ + return dma_map_single(dev, (page_address(page) + (offset)), size, direction); +} + +static inline void +dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, + enum dma_data_direction direction) +{ + dma_unmap_single(dev, dma_address, size, direction); +} + + +static inline void +dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_single_for_cpu) + hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, 0, size, direction); +} + +static inline void +dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_single_for_device) + hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, 0, size, direction); +} + +static inline void +dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_single_for_cpu) + hppa_dma_ops->dma_sync_single_for_cpu(dev, dma_handle, offset, size, direction); +} + +static inline void +dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_single_for_device) + hppa_dma_ops->dma_sync_single_for_device(dev, dma_handle, offset, size, direction); +} + +static inline void +dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_sg_for_cpu) + hppa_dma_ops->dma_sync_sg_for_cpu(dev, sg, nelems, direction); +} + +static inline void +dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_sg_for_device) + hppa_dma_ops->dma_sync_sg_for_device(dev, sg, nelems, direction); +} + +static inline int +dma_supported(struct device *dev, u64 mask) +{ + return hppa_dma_ops->dma_supported(dev, mask); +} + +static inline int +dma_set_mask(struct device *dev, u64 mask) +{ + if(!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} + +static inline int +dma_get_cache_alignment(void) +{ + return dcache_stride; +} + +static inline int +dma_is_consistent(struct device *dev, dma_addr_t dma_addr) +{ + return (hppa_dma_ops->dma_sync_single_for_cpu == NULL); +} + +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction) +{ + if(hppa_dma_ops->dma_sync_single_for_cpu) + flush_kernel_dcache_range((unsigned long)vaddr, size); +} + +static inline void * +parisc_walk_tree(struct device *dev) +{ + struct device *otherdev; + if(likely(dev->platform_data != NULL)) + return dev->platform_data; + /* OK, just traverse the bus to find it */ + for(otherdev = dev->parent; otherdev; + otherdev = otherdev->parent) { + if(otherdev->platform_data) { + dev->platform_data = otherdev->platform_data; + break; + } + } + BUG_ON(!dev->platform_data); + return dev->platform_data; +} + +#define GET_IOC(dev) (HBA_DATA(parisc_walk_tree(dev))->iommu); + + +#ifdef CONFIG_IOMMU_CCIO +struct parisc_device; +struct ioc; +void * ccio_get_iommu(const struct parisc_device *dev); +int ccio_request_resource(const struct parisc_device *dev, + struct resource *res); +int ccio_allocate_resource(const struct parisc_device *dev, + struct resource *res, unsigned long size, + unsigned long min, unsigned long max, unsigned long align); +#else /* !CONFIG_IOMMU_CCIO */ +#define ccio_get_iommu(dev) NULL +#define ccio_request_resource(dev, res) insert_resource(&iomem_resource, res) +#define ccio_allocate_resource(dev, res, size, min, max, align) \ + allocate_resource(&iomem_resource, res, size, min, max, \ + align, NULL, NULL) +#endif /* !CONFIG_IOMMU_CCIO */ + +#ifdef CONFIG_IOMMU_SBA +struct parisc_device; +void * sba_get_iommu(struct parisc_device *dev); +#endif + +/* At the moment, we panic on error for IOMMU resource exaustion */ +#define dma_mapping_error(dev, x) 0 + +#endif diff --git a/arch/parisc/include/asm/dma.h b/arch/parisc/include/asm/dma.h new file mode 100644 index 00000000000..31ad0f05af3 --- /dev/null +++ b/arch/parisc/include/asm/dma.h @@ -0,0 +1,186 @@ +/* $Id: dma.h,v 1.2 1999/04/27 00:46:18 deller Exp $ + * linux/include/asm/dma.h: Defines for using and allocating dma channels. + * Written by Hennus Bergman, 1992. + * High DMA channel support & info by Hannu Savolainen + * and John Boyd, Nov. 1992. + * (c) Copyright 2000, Grant Grundler + */ + +#ifndef _ASM_DMA_H +#define _ASM_DMA_H + +#include /* need byte IO */ +#include + +#define dma_outb outb +#define dma_inb inb + +/* +** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up +** (or rather not merge) DMAs into manageable chunks. +** On parisc, this is more of the software/tuning constraint +** rather than the HW. I/O MMU allocation algorithms can be +** faster with smaller sizes (to some degree). +*/ +#define DMA_CHUNK_SIZE (BITS_PER_LONG*PAGE_SIZE) + +/* The maximum address that we can perform a DMA transfer to on this platform +** New dynamic DMA interfaces should obsolete this.... +*/ +#define MAX_DMA_ADDRESS (~0UL) + +/* +** We don't have DMA channels... well V-class does but the +** Dynamic DMA Mapping interface will support them... right? :^) +** Note: this is not relevant right now for PA-RISC, but we cannot +** leave this as undefined because some things (e.g. sound) +** won't compile :-( +*/ +#define MAX_DMA_CHANNELS 8 +#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ +#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ +#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ + +#define DMA_AUTOINIT 0x10 + +/* 8237 DMA controllers */ +#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ +#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ + +/* DMA controller registers */ +#define DMA1_CMD_REG 0x08 /* command register (w) */ +#define DMA1_STAT_REG 0x08 /* status register (r) */ +#define DMA1_REQ_REG 0x09 /* request register (w) */ +#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ +#define DMA1_MODE_REG 0x0B /* mode register (w) */ +#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ +#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ +#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ +#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ +#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ +#define DMA1_EXT_MODE_REG (0x400 | DMA1_MODE_REG) + +#define DMA2_CMD_REG 0xD0 /* command register (w) */ +#define DMA2_STAT_REG 0xD0 /* status register (r) */ +#define DMA2_REQ_REG 0xD2 /* request register (w) */ +#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ +#define DMA2_MODE_REG 0xD6 /* mode register (w) */ +#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ +#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ +#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ +#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ +#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ +#define DMA2_EXT_MODE_REG (0x400 | DMA2_MODE_REG) + +static __inline__ unsigned long claim_dma_lock(void) +{ + return 0; +} + +static __inline__ void release_dma_lock(unsigned long flags) +{ +} + + +/* Get DMA residue count. After a DMA transfer, this + * should return zero. Reading this while a DMA transfer is + * still in progress will return unpredictable results. + * If called before the channel has been used, it may return 1. + * Otherwise, it returns the number of _bytes_ left to transfer. + * + * Assumes DMA flip-flop is clear. + */ +static __inline__ int get_dma_residue(unsigned int dmanr) +{ + unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE + : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; + + /* using short to get 16-bit wrap around */ + unsigned short count; + + count = 1 + dma_inb(io_port); + count += dma_inb(io_port) << 8; + + return (dmanr<=3)? count : (count<<1); +} + +/* enable/disable a specific DMA channel */ +static __inline__ void enable_dma(unsigned int dmanr) +{ +#ifdef CONFIG_SUPERIO + if (dmanr<=3) + dma_outb(dmanr, DMA1_MASK_REG); + else + dma_outb(dmanr & 3, DMA2_MASK_REG); +#endif +} + +static __inline__ void disable_dma(unsigned int dmanr) +{ +#ifdef CONFIG_SUPERIO + if (dmanr<=3) + dma_outb(dmanr | 4, DMA1_MASK_REG); + else + dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); +#endif +} + +/* reserve a DMA channel */ +#define request_dma(dmanr, device_id) (0) + +/* Clear the 'DMA Pointer Flip Flop'. + * Write 0 for LSB/MSB, 1 for MSB/LSB access. + * Use this once to initialize the FF to a known state. + * After that, keep track of it. :-) + * --- In order to do that, the DMA routines below should --- + * --- only be used while holding the DMA lock ! --- + */ +static __inline__ void clear_dma_ff(unsigned int dmanr) +{ +} + +/* set mode (above) for a specific DMA channel */ +static __inline__ void set_dma_mode(unsigned int dmanr, char mode) +{ +} + +/* Set only the page register bits of the transfer address. + * This is used for successive transfers when we know the contents of + * the lower 16 bits of the DMA current address register, but a 64k boundary + * may have been crossed. + */ +static __inline__ void set_dma_page(unsigned int dmanr, char pagenr) +{ +} + + +/* Set transfer address & page bits for specific DMA channel. + * Assumes dma flipflop is clear. + */ +static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int a) +{ +} + + +/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for + * a specific DMA channel. + * You must ensure the parameters are valid. + * NOTE: from a manual: "the number of transfers is one more + * than the initial word count"! This is taken into account. + * Assumes dma flip-flop is clear. + * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. + */ +static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) +{ +} + + +#define free_dma(dmanr) + +#ifdef CONFIG_PCI +extern int isa_dma_bridge_buggy; +#else +#define isa_dma_bridge_buggy (0) +#endif + +#endif /* _ASM_DMA_H */ diff --git a/arch/parisc/include/asm/eisa_bus.h b/arch/parisc/include/asm/eisa_bus.h new file mode 100644 index 00000000000..201085f83dd --- /dev/null +++ b/arch/parisc/include/asm/eisa_bus.h @@ -0,0 +1,23 @@ +/* + * eisa_bus.h interface between the eisa BA driver and the bus enumerator + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (c) 2002 Daniel Engstrom <5116@telia.com> + * + */ + +#ifndef ASM_EISA_H +#define ASM_EISA_H + +extern void eisa_make_irq_level(int num); +extern void eisa_make_irq_edge(int num); +extern int eisa_enumerator(unsigned long eeprom_addr, + struct resource *io_parent, + struct resource *mem_parent); +extern int eisa_eeprom_init(unsigned long addr); + +#endif diff --git a/arch/parisc/include/asm/eisa_eeprom.h b/arch/parisc/include/asm/eisa_eeprom.h new file mode 100644 index 00000000000..9c9da980402 --- /dev/null +++ b/arch/parisc/include/asm/eisa_eeprom.h @@ -0,0 +1,153 @@ +/* + * eisa_eeprom.h - provide support for EISA adapters in PA-RISC machines + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Copyright (c) 2001, 2002 Daniel Engstrom <5116@telia.com> + * + */ + +#ifndef ASM_EISA_EEPROM_H +#define ASM_EISA_EEPROM_H + +extern void __iomem *eisa_eeprom_addr; + +#define HPEE_MAX_LENGTH 0x2000 /* maximum eeprom length */ + +#define HPEE_SLOT_INFO(slot) (20+(48*slot)) + +struct eeprom_header +{ + + u_int32_t num_writes; /* number of writes */ + u_int8_t flags; /* flags, usage? */ + u_int8_t ver_maj; + u_int8_t ver_min; + u_int8_t num_slots; /* number of EISA slots in system */ + u_int16_t csum; /* checksum, I don't know how to calulate this */ + u_int8_t pad[10]; +} __attribute__ ((packed)); + + +struct eeprom_eisa_slot_info +{ + u_int32_t eisa_slot_id; + u_int32_t config_data_offset; + u_int32_t num_writes; + u_int16_t csum; + u_int16_t num_functions; + u_int16_t config_data_length; + + /* bits 0..3 are the duplicate slot id */ +#define HPEE_SLOT_INFO_EMBEDDED 0x10 +#define HPEE_SLOT_INFO_VIRTUAL 0x20 +#define HPEE_SLOT_INFO_NO_READID 0x40 +#define HPEE_SLOT_INFO_DUPLICATE 0x80 + u_int8_t slot_info; + +#define HPEE_SLOT_FEATURES_ENABLE 0x01 +#define HPEE_SLOT_FEATURES_IOCHK 0x02 +#define HPEE_SLOT_FEATURES_CFG_INCOMPLETE 0x80 + u_int8_t slot_features; + + u_int8_t ver_min; + u_int8_t ver_maj; + +#define HPEE_FUNCTION_INFO_HAVE_TYPE 0x01 +#define HPEE_FUNCTION_INFO_HAVE_MEMORY 0x02 +#define HPEE_FUNCTION_INFO_HAVE_IRQ 0x04 +#define HPEE_FUNCTION_INFO_HAVE_DMA 0x08 +#define HPEE_FUNCTION_INFO_HAVE_PORT 0x10 +#define HPEE_FUNCTION_INFO_HAVE_PORT_INIT 0x20 +/* I think there are two slighty different + * versions of the function_info field + * one int the fixed header and one optional + * in the parsed slot data area */ +#define HPEE_FUNCTION_INFO_HAVE_FUNCTION 0x01 +#define HPEE_FUNCTION_INFO_F_DISABLED 0x80 +#define HPEE_FUNCTION_INFO_CFG_FREE_FORM 0x40 + u_int8_t function_info; + +#define HPEE_FLAG_BOARD_IS_ISA 0x01 /* flag and minor version for isa board */ + u_int8_t flags; + u_int8_t pad[24]; +} __attribute__ ((packed)); + + +#define HPEE_MEMORY_MAX_ENT 9 +/* memory descriptor: byte 0 */ +#define HPEE_MEMORY_WRITABLE 0x01 +#define HPEE_MEMORY_CACHABLE 0x02 +#define HPEE_MEMORY_TYPE_MASK 0x18 +#define HPEE_MEMORY_TYPE_SYS 0x00 +#define HPEE_MEMORY_TYPE_EXP 0x08 +#define HPEE_MEMORY_TYPE_VIR 0x10 +#define HPEE_MEMORY_TYPE_OTH 0x18 +#define HPEE_MEMORY_SHARED 0x20 +#define HPEE_MEMORY_MORE 0x80 + +/* memory descriptor: byte 1 */ +#define HPEE_MEMORY_WIDTH_MASK 0x03 +#define HPEE_MEMORY_WIDTH_BYTE 0x00 +#define HPEE_MEMORY_WIDTH_WORD 0x01 +#define HPEE_MEMORY_WIDTH_DWORD 0x02 +#define HPEE_MEMORY_DECODE_MASK 0x0c +#define HPEE_MEMORY_DECODE_20BITS 0x00 +#define HPEE_MEMORY_DECODE_24BITS 0x04 +#define HPEE_MEMORY_DECODE_32BITS 0x08 +/* byte 2 and 3 are a 16bit LE value + * containging the memory size in kilobytes */ +/* byte 4,5,6 are a 24bit LE value + * containing the memory base address */ + + +#define HPEE_IRQ_MAX_ENT 7 +/* Interrupt entry: byte 0 */ +#define HPEE_IRQ_CHANNEL_MASK 0xf +#define HPEE_IRQ_TRIG_LEVEL 0x20 +#define HPEE_IRQ_MORE 0x80 +/* byte 1 seems to be unused */ + +#define HPEE_DMA_MAX_ENT 4 + +/* dma entry: byte 0 */ +#define HPEE_DMA_CHANNEL_MASK 7 +#define HPEE_DMA_SIZE_MASK 0xc +#define HPEE_DMA_SIZE_BYTE 0x0 +#define HPEE_DMA_SIZE_WORD 0x4 +#define HPEE_DMA_SIZE_DWORD 0x8 +#define HPEE_DMA_SHARED 0x40 +#define HPEE_DMA_MORE 0x80 + +/* dma entry: byte 1 */ +#define HPEE_DMA_TIMING_MASK 0x30 +#define HPEE_DMA_TIMING_ISA 0x0 +#define HPEE_DMA_TIMING_TYPEA 0x10 +#define HPEE_DMA_TIMING_TYPEB 0x20 +#define HPEE_DMA_TIMING_TYPEC 0x30 + +#define HPEE_PORT_MAX_ENT 20 +/* port entry byte 0 */ +#define HPEE_PORT_SIZE_MASK 0x1f +#define HPEE_PORT_SHARED 0x40 +#define HPEE_PORT_MORE 0x80 +/* byte 1 and 2 is a 16bit LE value + * conating the start port number */ + +#define HPEE_PORT_INIT_MAX_LEN 60 /* in bytes here */ +/* port init entry byte 0 */ +#define HPEE_PORT_INIT_WIDTH_MASK 0x3 +#define HPEE_PORT_INIT_WIDTH_BYTE 0x0 +#define HPEE_PORT_INIT_WIDTH_WORD 0x1 +#define HPEE_PORT_INIT_WIDTH_DWORD 0x2 +#define HPEE_PORT_INIT_MASK 0x4 +#define HPEE_PORT_INIT_MORE 0x80 + +#define HPEE_SELECTION_MAX_ENT 26 + +#define HPEE_TYPE_MAX_LEN 80 + +#endif diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h new file mode 100644 index 00000000000..d0a4a826281 --- /dev/null +++ b/arch/parisc/include/asm/elf.h @@ -0,0 +1,342 @@ +#ifndef __ASMPARISC_ELF_H +#define __ASMPARISC_ELF_H + +/* + * ELF register definitions.. + */ + +#include + +#define EM_PARISC 15 + +/* HPPA specific definitions. */ + +/* Legal values for e_flags field of Elf32_Ehdr. */ + +#define EF_PARISC_TRAPNIL 0x00010000 /* Trap nil pointer dereference. */ +#define EF_PARISC_EXT 0x00020000 /* Program uses arch. extensions. */ +#define EF_PARISC_LSB 0x00040000 /* Program expects little endian. */ +#define EF_PARISC_WIDE 0x00080000 /* Program expects wide mode. */ +#define EF_PARISC_NO_KABP 0x00100000 /* No kernel assisted branch + prediction. */ +#define EF_PARISC_LAZYSWAP 0x00400000 /* Allow lazy swapping. */ +#define EF_PARISC_ARCH 0x0000ffff /* Architecture version. */ + +/* Defined values for `e_flags & EF_PARISC_ARCH' are: */ + +#define EFA_PARISC_1_0 0x020b /* PA-RISC 1.0 big-endian. */ +#define EFA_PARISC_1_1 0x0210 /* PA-RISC 1.1 big-endian. */ +#define EFA_PARISC_2_0 0x0214 /* PA-RISC 2.0 big-endian. */ + +/* Additional section indices. */ + +#define SHN_PARISC_ANSI_COMMON 0xff00 /* Section for tenatively declared + symbols in ANSI C. */ +#define SHN_PARISC_HUGE_COMMON 0xff01 /* Common blocks in huge model. */ + +/* Legal values for sh_type field of Elf32_Shdr. */ + +#define SHT_PARISC_EXT 0x70000000 /* Contains product specific ext. */ +#define SHT_PARISC_UNWIND 0x70000001 /* Unwind information. */ +#define SHT_PARISC_DOC 0x70000002 /* Debug info for optimized code. */ + +/* Legal values for sh_flags field of Elf32_Shdr. */ + +#define SHF_PARISC_SHORT 0x20000000 /* Section with short addressing. */ +#define SHF_PARISC_HUGE 0x40000000 /* Section far from gp. */ +#define SHF_PARISC_SBP 0x80000000 /* Static branch prediction code. */ + +/* Legal values for ST_TYPE subfield of st_info (symbol type). */ + +#define STT_PARISC_MILLICODE 13 /* Millicode function entry point. */ + +#define STT_HP_OPAQUE (STT_LOOS + 0x1) +#define STT_HP_STUB (STT_LOOS + 0x2) + +/* HPPA relocs. */ + +#define R_PARISC_NONE 0 /* No reloc. */ +#define R_PARISC_DIR32 1 /* Direct 32-bit reference. */ +#define R_PARISC_DIR21L 2 /* Left 21 bits of eff. address. */ +#define R_PARISC_DIR17R 3 /* Right 17 bits of eff. address. */ +#define R_PARISC_DIR17F 4 /* 17 bits of eff. address. */ +#define R_PARISC_DIR14R 6 /* Right 14 bits of eff. address. */ +#define R_PARISC_PCREL32 9 /* 32-bit rel. address. */ +#define R_PARISC_PCREL21L 10 /* Left 21 bits of rel. address. */ +#define R_PARISC_PCREL17R 11 /* Right 17 bits of rel. address. */ +#define R_PARISC_PCREL17F 12 /* 17 bits of rel. address. */ +#define R_PARISC_PCREL14R 14 /* Right 14 bits of rel. address. */ +#define R_PARISC_DPREL21L 18 /* Left 21 bits of rel. address. */ +#define R_PARISC_DPREL14R 22 /* Right 14 bits of rel. address. */ +#define R_PARISC_GPREL21L 26 /* GP-relative, left 21 bits. */ +#define R_PARISC_GPREL14R 30 /* GP-relative, right 14 bits. */ +#define R_PARISC_LTOFF21L 34 /* LT-relative, left 21 bits. */ +#define R_PARISC_LTOFF14R 38 /* LT-relative, right 14 bits. */ +#define R_PARISC_SECREL32 41 /* 32 bits section rel. address. */ +#define R_PARISC_SEGBASE 48 /* No relocation, set segment base. */ +#define R_PARISC_SEGREL32 49 /* 32 bits segment rel. address. */ +#define R_PARISC_PLTOFF21L 50 /* PLT rel. address, left 21 bits. */ +#define R_PARISC_PLTOFF14R 54 /* PLT rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_FPTR32 57 /* 32 bits LT-rel. function pointer. */ +#define R_PARISC_LTOFF_FPTR21L 58 /* LT-rel. fct ptr, left 21 bits. */ +#define R_PARISC_LTOFF_FPTR14R 62 /* LT-rel. fct ptr, right 14 bits. */ +#define R_PARISC_FPTR64 64 /* 64 bits function address. */ +#define R_PARISC_PLABEL32 65 /* 32 bits function address. */ +#define R_PARISC_PCREL64 72 /* 64 bits PC-rel. address. */ +#define R_PARISC_PCREL22F 74 /* 22 bits PC-rel. address. */ +#define R_PARISC_PCREL14WR 75 /* PC-rel. address, right 14 bits. */ +#define R_PARISC_PCREL14DR 76 /* PC rel. address, right 14 bits. */ +#define R_PARISC_PCREL16F 77 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16WF 78 /* 16 bits PC-rel. address. */ +#define R_PARISC_PCREL16DF 79 /* 16 bits PC-rel. address. */ +#define R_PARISC_DIR64 80 /* 64 bits of eff. address. */ +#define R_PARISC_DIR14WR 83 /* 14 bits of eff. address. */ +#define R_PARISC_DIR14DR 84 /* 14 bits of eff. address. */ +#define R_PARISC_DIR16F 85 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16WF 86 /* 16 bits of eff. address. */ +#define R_PARISC_DIR16DF 87 /* 16 bits of eff. address. */ +#define R_PARISC_GPREL64 88 /* 64 bits of GP-rel. address. */ +#define R_PARISC_GPREL14WR 91 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL14DR 92 /* GP-rel. address, right 14 bits. */ +#define R_PARISC_GPREL16F 93 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16WF 94 /* 16 bits GP-rel. address. */ +#define R_PARISC_GPREL16DF 95 /* 16 bits GP-rel. address. */ +#define R_PARISC_LTOFF64 96 /* 64 bits LT-rel. address. */ +#define R_PARISC_LTOFF14WR 99 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF14DR 100 /* LT-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF16F 101 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16WF 102 /* 16 bits LT-rel. address. */ +#define R_PARISC_LTOFF16DF 103 /* 16 bits LT-rel. address. */ +#define R_PARISC_SECREL64 104 /* 64 bits section rel. address. */ +#define R_PARISC_SEGREL64 112 /* 64 bits segment rel. address. */ +#define R_PARISC_PLTOFF14WR 115 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF14DR 116 /* PLT-rel. address, right 14 bits. */ +#define R_PARISC_PLTOFF16F 117 /* 16 bits LT-rel. address. */ +#define R_PARISC_PLTOFF16WF 118 /* 16 bits PLT-rel. address. */ +#define R_PARISC_PLTOFF16DF 119 /* 16 bits PLT-rel. address. */ +#define R_PARISC_LTOFF_FPTR64 120 /* 64 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR14WR 123 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR14DR 124 /* LT-rel. fct. ptr., right 14 bits. */ +#define R_PARISC_LTOFF_FPTR16F 125 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16WF 126 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LTOFF_FPTR16DF 127 /* 16 bits LT-rel. function ptr. */ +#define R_PARISC_LORESERVE 128 +#define R_PARISC_COPY 128 /* Copy relocation. */ +#define R_PARISC_IPLT 129 /* Dynamic reloc, imported PLT */ +#define R_PARISC_EPLT 130 /* Dynamic reloc, exported PLT */ +#define R_PARISC_TPREL32 153 /* 32 bits TP-rel. address. */ +#define R_PARISC_TPREL21L 154 /* TP-rel. address, left 21 bits. */ +#define R_PARISC_TPREL14R 158 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_LTOFF_TP21L 162 /* LT-TP-rel. address, left 21 bits. */ +#define R_PARISC_LTOFF_TP14R 166 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14F 167 /* 14 bits LT-TP-rel. address. */ +#define R_PARISC_TPREL64 216 /* 64 bits TP-rel. address. */ +#define R_PARISC_TPREL14WR 219 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL14DR 220 /* TP-rel. address, right 14 bits. */ +#define R_PARISC_TPREL16F 221 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16WF 222 /* 16 bits TP-rel. address. */ +#define R_PARISC_TPREL16DF 223 /* 16 bits TP-rel. address. */ +#define R_PARISC_LTOFF_TP64 224 /* 64 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP14WR 227 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP14DR 228 /* LT-TP-rel. address, right 14 bits.*/ +#define R_PARISC_LTOFF_TP16F 229 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16WF 230 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_LTOFF_TP16DF 231 /* 16 bits LT-TP-rel. address. */ +#define R_PARISC_HIRESERVE 255 + +#define PA_PLABEL_FDESC 0x02 /* bit set if PLABEL points to + * a function descriptor, not + * an address */ + +/* The following are PA function descriptors + * + * addr: the absolute address of the function + * gp: either the data pointer (r27) for non-PIC code or the + * the PLT pointer (r19) for PIC code */ + +/* Format for the Elf32 Function descriptor */ +typedef struct elf32_fdesc { + __u32 addr; + __u32 gp; +} Elf32_Fdesc; + +/* Format for the Elf64 Function descriptor */ +typedef struct elf64_fdesc { + __u64 dummy[2]; /* FIXME: nothing uses these, why waste + * the space */ + __u64 addr; + __u64 gp; +} Elf64_Fdesc; + +/* Legal values for p_type field of Elf32_Phdr/Elf64_Phdr. */ + +#define PT_HP_TLS (PT_LOOS + 0x0) +#define PT_HP_CORE_NONE (PT_LOOS + 0x1) +#define PT_HP_CORE_VERSION (PT_LOOS + 0x2) +#define PT_HP_CORE_KERNEL (PT_LOOS + 0x3) +#define PT_HP_CORE_COMM (PT_LOOS + 0x4) +#define PT_HP_CORE_PROC (PT_LOOS + 0x5) +#define PT_HP_CORE_LOADABLE (PT_LOOS + 0x6) +#define PT_HP_CORE_STACK (PT_LOOS + 0x7) +#define PT_HP_CORE_SHM (PT_LOOS + 0x8) +#define PT_HP_CORE_MMF (PT_LOOS + 0x9) +#define PT_HP_PARALLEL (PT_LOOS + 0x10) +#define PT_HP_FASTBIND (PT_LOOS + 0x11) +#define PT_HP_OPT_ANNOT (PT_LOOS + 0x12) +#define PT_HP_HSL_ANNOT (PT_LOOS + 0x13) +#define PT_HP_STACK (PT_LOOS + 0x14) + +#define PT_PARISC_ARCHEXT 0x70000000 +#define PT_PARISC_UNWIND 0x70000001 + +/* Legal values for p_flags field of Elf32_Phdr/Elf64_Phdr. */ + +#define PF_PARISC_SBP 0x08000000 + +#define PF_HP_PAGE_SIZE 0x00100000 +#define PF_HP_FAR_SHARED 0x00200000 +#define PF_HP_NEAR_SHARED 0x00400000 +#define PF_HP_CODE 0x01000000 +#define PF_HP_MODIFY 0x02000000 +#define PF_HP_LAZYSWAP 0x04000000 +#define PF_HP_SBP 0x08000000 + +/* + * The following definitions are those for 32-bit ELF binaries on a 32-bit + * kernel and for 64-bit binaries on a 64-bit kernel. To run 32-bit binaries + * on a 64-bit kernel, arch/parisc/kernel/binfmt_elf32.c defines these + * macros appropriately and then #includes binfmt_elf.c, which then includes + * this file. + */ +#ifndef ELF_CLASS + +/* + * This is used to ensure we don't load something for the wrong architecture. + * + * Note that this header file is used by default in fs/binfmt_elf.c. So + * the following macros are for the default case. However, for the 64 + * bit kernel we also support 32 bit parisc binaries. To do that + * arch/parisc/kernel/binfmt_elf32.c defines its own set of these + * macros, and then it includes fs/binfmt_elf.c to provide an alternate + * elf binary handler for 32 bit binaries (on the 64 bit kernel). + */ +#ifdef CONFIG_64BIT +#define ELF_CLASS ELFCLASS64 +#else +#define ELF_CLASS ELFCLASS32 +#endif + +typedef unsigned long elf_greg_t; + +/* + * This yields a string that ld.so will use to load implementation + * specific libraries for optimization. This is more specific in + * intent than poking at uname or /proc/cpuinfo. + */ + +#define ELF_PLATFORM ("PARISC\0") + +#define SET_PERSONALITY(ex, ibcs2) \ + current->personality = PER_LINUX; \ + current->thread.map_base = DEFAULT_MAP_BASE; \ + current->thread.task_size = DEFAULT_TASK_SIZE \ + +/* + * Fill in general registers in a core dump. This saves pretty + * much the same registers as hp-ux, although in a different order. + * Registers marked # below are not currently saved in pt_regs, so + * we use their current values here. + * + * gr0..gr31 + * sr0..sr7 + * iaoq0..iaoq1 + * iasq0..iasq1 + * cr11 (sar) + * cr19 (iir) + * cr20 (isr) + * cr21 (ior) + * # cr22 (ipsw) + * # cr0 (recovery counter) + * # cr24..cr31 (temporary registers) + * # cr8,9,12,13 (protection IDs) + * # cr10 (scr/ccr) + * # cr15 (ext int enable mask) + * + */ + +#define ELF_CORE_COPY_REGS(dst, pt) \ + memset(dst, 0, sizeof(dst)); /* don't leak any "random" bits */ \ + memcpy(dst + 0, pt->gr, 32 * sizeof(elf_greg_t)); \ + memcpy(dst + 32, pt->sr, 8 * sizeof(elf_greg_t)); \ + memcpy(dst + 40, pt->iaoq, 2 * sizeof(elf_greg_t)); \ + memcpy(dst + 42, pt->iasq, 2 * sizeof(elf_greg_t)); \ + dst[44] = pt->sar; dst[45] = pt->iir; \ + dst[46] = pt->isr; dst[47] = pt->ior; \ + dst[48] = mfctl(22); dst[49] = mfctl(0); \ + dst[50] = mfctl(24); dst[51] = mfctl(25); \ + dst[52] = mfctl(26); dst[53] = mfctl(27); \ + dst[54] = mfctl(28); dst[55] = mfctl(29); \ + dst[56] = mfctl(30); dst[57] = mfctl(31); \ + dst[58] = mfctl( 8); dst[59] = mfctl( 9); \ + dst[60] = mfctl(12); dst[61] = mfctl(13); \ + dst[62] = mfctl(10); dst[63] = mfctl(15); + +#endif /* ! ELF_CLASS */ + +#define ELF_NGREG 80 /* We only need 64 at present, but leave space + for expansion. */ +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +#define ELF_NFPREG 32 +typedef double elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + +struct task_struct; + +extern int dump_task_fpu (struct task_struct *, elf_fpregset_t *); +#define ELF_CORE_COPY_FPREGS(tsk, elf_fpregs) dump_task_fpu(tsk, elf_fpregs) + +struct pt_regs; /* forward declaration... */ + + +#define elf_check_arch(x) ((x)->e_machine == EM_PARISC && (x)->e_ident[EI_CLASS] == ELF_CLASS) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_DATA ELFDATA2MSB +#define ELF_ARCH EM_PARISC +#define ELF_OSABI ELFOSABI_LINUX + +/* %r23 is set by ld.so to a pointer to a function which might be + registered using atexit. This provides a means for the dynamic + linker to call DT_FINI functions for shared libraries that have + been loaded before the code runs. + + So that we can use the same startup file with static executables, + we start programs with a value of 0 to indicate that there is no + such function. */ +#define ELF_PLAT_INIT(_r, load_addr) _r->gr[23] = 0 + +#define USE_ELF_CORE_DUMP +#define ELF_EXEC_PAGESIZE 4096 + +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. + + (2 * TASK_SIZE / 3) turns into something undefined when run through a + 32 bit preprocessor and in some cases results in the kernel trying to map + ld.so to the kernel virtual base. Use a sane value instead. /Jes + */ + +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x01000000) + +/* This yields a mask that user programs can use to figure out what + instruction set this CPU supports. This could be done in user space, + but it's not easy, and we've already done it here. */ + +#define ELF_HWCAP 0 + +#endif diff --git a/arch/parisc/include/asm/emergency-restart.h b/arch/parisc/include/asm/emergency-restart.h new file mode 100644 index 00000000000..108d8c48e42 --- /dev/null +++ b/arch/parisc/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/parisc/include/asm/errno.h b/arch/parisc/include/asm/errno.h new file mode 100644 index 00000000000..e2f3ddc796b --- /dev/null +++ b/arch/parisc/include/asm/errno.h @@ -0,0 +1,124 @@ +#ifndef _PARISC_ERRNO_H +#define _PARISC_ERRNO_H + +#include + +#define ENOMSG 35 /* No message of desired type */ +#define EIDRM 36 /* Identifier removed */ +#define ECHRNG 37 /* Channel number out of range */ +#define EL2NSYNC 38 /* Level 2 not synchronized */ +#define EL3HLT 39 /* Level 3 halted */ +#define EL3RST 40 /* Level 3 reset */ +#define ELNRNG 41 /* Link number out of range */ +#define EUNATCH 42 /* Protocol driver not attached */ +#define ENOCSI 43 /* No CSI structure available */ +#define EL2HLT 44 /* Level 2 halted */ +#define EDEADLK 45 /* Resource deadlock would occur */ +#define EDEADLOCK EDEADLK +#define ENOLCK 46 /* No record locks available */ +#define EILSEQ 47 /* Illegal byte sequence */ + +#define ENONET 50 /* Machine is not on the network */ +#define ENODATA 51 /* No data available */ +#define ETIME 52 /* Timer expired */ +#define ENOSR 53 /* Out of streams resources */ +#define ENOSTR 54 /* Device not a stream */ +#define ENOPKG 55 /* Package not installed */ + +#define ENOLINK 57 /* Link has been severed */ +#define EADV 58 /* Advertise error */ +#define ESRMNT 59 /* Srmount error */ +#define ECOMM 60 /* Communication error on send */ +#define EPROTO 61 /* Protocol error */ + +#define EMULTIHOP 64 /* Multihop attempted */ + +#define EDOTDOT 66 /* RFS specific error */ +#define EBADMSG 67 /* Not a data message */ +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale NFS file handle */ +#define EREMOTE 71 /* Object is remote */ +#define EOVERFLOW 72 /* Value too large for defined data type */ + +/* these errnos are defined by Linux but not HPUX. */ + +#define EBADE 160 /* Invalid exchange */ +#define EBADR 161 /* Invalid request descriptor */ +#define EXFULL 162 /* Exchange full */ +#define ENOANO 163 /* No anode */ +#define EBADRQC 164 /* Invalid request code */ +#define EBADSLT 165 /* Invalid slot */ +#define EBFONT 166 /* Bad font file format */ +#define ENOTUNIQ 167 /* Name not unique on network */ +#define EBADFD 168 /* File descriptor in bad state */ +#define EREMCHG 169 /* Remote address changed */ +#define ELIBACC 170 /* Can not access a needed shared library */ +#define ELIBBAD 171 /* Accessing a corrupted shared library */ +#define ELIBSCN 172 /* .lib section in a.out corrupted */ +#define ELIBMAX 173 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 174 /* Cannot exec a shared library directly */ +#define ERESTART 175 /* Interrupted system call should be restarted */ +#define ESTRPIPE 176 /* Streams pipe error */ +#define EUCLEAN 177 /* Structure needs cleaning */ +#define ENOTNAM 178 /* Not a XENIX named type file */ +#define ENAVAIL 179 /* No XENIX semaphores available */ +#define EISNAM 180 /* Is a named type file */ +#define EREMOTEIO 181 /* Remote I/O error */ +#define ENOMEDIUM 182 /* No medium found */ +#define EMEDIUMTYPE 183 /* Wrong medium type */ +#define ENOKEY 184 /* Required key not available */ +#define EKEYEXPIRED 185 /* Key has expired */ +#define EKEYREVOKED 186 /* Key has been revoked */ +#define EKEYREJECTED 187 /* Key was rejected by service */ + +/* We now return you to your regularly scheduled HPUX. */ + +#define ENOSYM 215 /* symbol does not exist in executable */ +#define ENOTSOCK 216 /* Socket operation on non-socket */ +#define EDESTADDRREQ 217 /* Destination address required */ +#define EMSGSIZE 218 /* Message too long */ +#define EPROTOTYPE 219 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 220 /* Protocol not available */ +#define EPROTONOSUPPORT 221 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 222 /* Socket type not supported */ +#define EOPNOTSUPP 223 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 224 /* Protocol family not supported */ +#define EAFNOSUPPORT 225 /* Address family not supported by protocol */ +#define EADDRINUSE 226 /* Address already in use */ +#define EADDRNOTAVAIL 227 /* Cannot assign requested address */ +#define ENETDOWN 228 /* Network is down */ +#define ENETUNREACH 229 /* Network is unreachable */ +#define ENETRESET 230 /* Network dropped connection because of reset */ +#define ECONNABORTED 231 /* Software caused connection abort */ +#define ECONNRESET 232 /* Connection reset by peer */ +#define ENOBUFS 233 /* No buffer space available */ +#define EISCONN 234 /* Transport endpoint is already connected */ +#define ENOTCONN 235 /* Transport endpoint is not connected */ +#define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 237 /* Too many references: cannot splice */ +#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +#define ETIMEDOUT 238 /* Connection timed out */ +#define ECONNREFUSED 239 /* Connection refused */ +#define EREMOTERELEASE 240 /* Remote peer released connection */ +#define EHOSTDOWN 241 /* Host is down */ +#define EHOSTUNREACH 242 /* No route to host */ + +#define EALREADY 244 /* Operation already in progress */ +#define EINPROGRESS 245 /* Operation now in progress */ +#define EWOULDBLOCK 246 /* Operation would block (Linux returns EAGAIN) */ +#define ENOTEMPTY 247 /* Directory not empty */ +#define ENAMETOOLONG 248 /* File name too long */ +#define ELOOP 249 /* Too many symbolic links encountered */ +#define ENOSYS 251 /* Function not implemented */ + +#define ENOTSUP 252 /* Function not implemented (POSIX.4 / HPUX) */ +#define ECANCELLED 253 /* aio request was canceled before complete (POSIX.4 / HPUX) */ +#define ECANCELED ECANCELLED /* SuSv3 and Solaris wants one 'L' */ + +/* for robust mutexes */ +#define EOWNERDEAD 254 /* Owner died */ +#define ENOTRECOVERABLE 255 /* State not recoverable */ + + +#endif diff --git a/arch/parisc/include/asm/fb.h b/arch/parisc/include/asm/fb.h new file mode 100644 index 00000000000..4d503a023ab --- /dev/null +++ b/arch/parisc/include/asm/fb.h @@ -0,0 +1,19 @@ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ + +#include +#include +#include + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE; +} + +static inline int fb_is_primary_device(struct fb_info *info) +{ + return 0; +} + +#endif /* _ASM_FB_H_ */ diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h new file mode 100644 index 00000000000..1e1c824764e --- /dev/null +++ b/arch/parisc/include/asm/fcntl.h @@ -0,0 +1,39 @@ +#ifndef _PARISC_FCNTL_H +#define _PARISC_FCNTL_H + +/* open/fcntl - O_SYNC is only implemented on blocks devices and on files + located on an ext2 file system */ +#define O_APPEND 000000010 +#define O_BLKSEEK 000000100 /* HPUX only */ +#define O_CREAT 000000400 /* not fcntl */ +#define O_EXCL 000002000 /* not fcntl */ +#define O_LARGEFILE 000004000 +#define O_SYNC 000100000 +#define O_NONBLOCK 000200004 /* HPUX has separate NDELAY & NONBLOCK */ +#define O_NOCTTY 000400000 /* not fcntl */ +#define O_DSYNC 001000000 /* HPUX only */ +#define O_RSYNC 002000000 /* HPUX only */ +#define O_NOATIME 004000000 +#define O_CLOEXEC 010000000 /* set close_on_exec */ + +#define O_DIRECTORY 000010000 /* must be a directory */ +#define O_NOFOLLOW 000000200 /* don't follow links */ +#define O_INVISIBLE 004000000 /* invisible I/O, for DMAPI/XDSM */ + +#define F_GETLK64 8 +#define F_SETLK64 9 +#define F_SETLKW64 10 + +#define F_GETOWN 11 /* for sockets. */ +#define F_SETOWN 12 /* for sockets. */ +#define F_SETSIG 13 /* for sockets. */ +#define F_GETSIG 14 /* for sockets. */ + +/* for posix fcntl() and lockf() */ +#define F_RDLCK 01 +#define F_WRLCK 02 +#define F_UNLCK 03 + +#include + +#endif diff --git a/arch/parisc/include/asm/fixmap.h b/arch/parisc/include/asm/fixmap.h new file mode 100644 index 00000000000..de3fe3a1822 --- /dev/null +++ b/arch/parisc/include/asm/fixmap.h @@ -0,0 +1,30 @@ +#ifndef _ASM_FIXMAP_H +#define _ASM_FIXMAP_H + +/* + * This file defines the locations of the fixed mappings on parisc. + * + * All of the values in this file are machine virtual addresses. + * + * All of the values in this file must be <4GB (because of assembly + * loading restrictions). If you place this region anywhere above + * __PAGE_OFFSET, you must adjust the memory map accordingly */ + +/* The alias region is used in kernel space to do copy/clear to or + * from areas congruently mapped with user space. It is 8MB large + * and must be 16MB aligned */ +#define TMPALIAS_MAP_START ((__PAGE_OFFSET) - 16*1024*1024) +/* This is the kernel area for all maps (vmalloc, dma etc.) most + * usually, it extends up to TMPALIAS_MAP_START. Virtual addresses + * 0..GATEWAY_PAGE_SIZE are reserved for the gateway page */ +#define KERNEL_MAP_START (GATEWAY_PAGE_SIZE) +#define KERNEL_MAP_END (TMPALIAS_MAP_START) + +#ifndef __ASSEMBLY__ +extern void *vmalloc_start; +#define PCXL_DMA_MAP_SIZE (8*1024*1024) +#define VMALLOC_START ((unsigned long)vmalloc_start) +#define VMALLOC_END (KERNEL_MAP_END) +#endif /*__ASSEMBLY__*/ + +#endif /*_ASM_FIXMAP_H*/ diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h new file mode 100644 index 00000000000..4ca69f558fa --- /dev/null +++ b/arch/parisc/include/asm/floppy.h @@ -0,0 +1,271 @@ +/* Architecture specific parts of the Floppy driver + * + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * Copyright (C) 2000 Matthew Wilcox (willy a debian . org) + * Copyright (C) 2000 Dave Kennedy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#ifndef __ASM_PARISC_FLOPPY_H +#define __ASM_PARISC_FLOPPY_H + +#include + + +/* + * The DMA channel used by the floppy controller cannot access data at + * addresses >= 16MB + * + * Went back to the 1MB limit, as some people had problems with the floppy + * driver otherwise. It doesn't matter much for performance anyway, as most + * floppy accesses go through the track buffer. + */ +#define _CROSS_64KB(a,s,vdma) \ +(!vdma && ((unsigned long)(a)/K_64 != ((unsigned long)(a) + (s) - 1) / K_64)) + +#define CROSS_64KB(a,s) _CROSS_64KB(a,s,use_virtual_dma & 1) + + +#define SW fd_routine[use_virtual_dma&1] +#define CSW fd_routine[can_use_virtual_dma & 1] + + +#define fd_inb(port) readb(port) +#define fd_outb(value, port) writeb(value, port) + +#define fd_request_dma() CSW._request_dma(FLOPPY_DMA,"floppy") +#define fd_free_dma() CSW._free_dma(FLOPPY_DMA) +#define fd_enable_irq() enable_irq(FLOPPY_IRQ) +#define fd_disable_irq() disable_irq(FLOPPY_IRQ) +#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL) +#define fd_get_dma_residue() SW._get_dma_residue(FLOPPY_DMA) +#define fd_dma_mem_alloc(size) SW._dma_mem_alloc(size) +#define fd_dma_setup(addr, size, mode, io) SW._dma_setup(addr, size, mode, io) + +#define FLOPPY_CAN_FALLBACK_ON_NODMA + +static int virtual_dma_count=0; +static int virtual_dma_residue=0; +static char *virtual_dma_addr=0; +static int virtual_dma_mode=0; +static int doing_pdma=0; + +static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) +{ + register unsigned char st; + +#undef TRACE_FLPY_INT + +#ifdef TRACE_FLPY_INT + static int calls=0; + static int bytes=0; + static int dma_wait=0; +#endif + if (!doing_pdma) { + floppy_interrupt(irq, dev_id, regs); + return; + } + +#ifdef TRACE_FLPY_INT + if(!calls) + bytes = virtual_dma_count; +#endif + + { + register int lcount; + register char *lptr = virtual_dma_addr; + + for (lcount = virtual_dma_count; lcount; lcount--) { + st = fd_inb(virtual_dma_port+4) & 0xa0 ; + if (st != 0xa0) + break; + if (virtual_dma_mode) { + fd_outb(*lptr, virtual_dma_port+5); + } else { + *lptr = fd_inb(virtual_dma_port+5); + } + lptr++; + } + virtual_dma_count = lcount; + virtual_dma_addr = lptr; + st = fd_inb(virtual_dma_port+4); + } + +#ifdef TRACE_FLPY_INT + calls++; +#endif + if (st == 0x20) + return; + if (!(st & 0x20)) { + virtual_dma_residue += virtual_dma_count; + virtual_dma_count = 0; +#ifdef TRACE_FLPY_INT + printk("count=%x, residue=%x calls=%d bytes=%d dma_wait=%d\n", + virtual_dma_count, virtual_dma_residue, calls, bytes, + dma_wait); + calls = 0; + dma_wait=0; +#endif + doing_pdma = 0; + floppy_interrupt(irq, dev_id, regs); + return; + } +#ifdef TRACE_FLPY_INT + if (!virtual_dma_count) + dma_wait++; +#endif +} + +static void fd_disable_dma(void) +{ + if(! (can_use_virtual_dma & 1)) + disable_dma(FLOPPY_DMA); + doing_pdma = 0; + virtual_dma_residue += virtual_dma_count; + virtual_dma_count=0; +} + +static int vdma_request_dma(unsigned int dmanr, const char * device_id) +{ + return 0; +} + +static void vdma_nop(unsigned int dummy) +{ +} + + +static int vdma_get_dma_residue(unsigned int dummy) +{ + return virtual_dma_count + virtual_dma_residue; +} + + +static int fd_request_irq(void) +{ + if(can_use_virtual_dma) + return request_irq(FLOPPY_IRQ, floppy_hardint, + IRQF_DISABLED, "floppy", NULL); + else + return request_irq(FLOPPY_IRQ, floppy_interrupt, + IRQF_DISABLED, "floppy", NULL); +} + +static unsigned long dma_mem_alloc(unsigned long size) +{ + return __get_dma_pages(GFP_KERNEL, get_order(size)); +} + + +static unsigned long vdma_mem_alloc(unsigned long size) +{ + return (unsigned long) vmalloc(size); + +} + +#define nodma_mem_alloc(size) vdma_mem_alloc(size) + +static void _fd_dma_mem_free(unsigned long addr, unsigned long size) +{ + if((unsigned int) addr >= (unsigned int) high_memory) + return vfree((void *)addr); + else + free_pages(addr, get_order(size)); +} + +#define fd_dma_mem_free(addr, size) _fd_dma_mem_free(addr, size) + +static void _fd_chose_dma_mode(char *addr, unsigned long size) +{ + if(can_use_virtual_dma == 2) { + if((unsigned int) addr >= (unsigned int) high_memory || + virt_to_bus(addr) >= 0x1000000 || + _CROSS_64KB(addr, size, 0)) + use_virtual_dma = 1; + else + use_virtual_dma = 0; + } else { + use_virtual_dma = can_use_virtual_dma & 1; + } +} + +#define fd_chose_dma_mode(addr, size) _fd_chose_dma_mode(addr, size) + + +static int vdma_dma_setup(char *addr, unsigned long size, int mode, int io) +{ + doing_pdma = 1; + virtual_dma_port = io; + virtual_dma_mode = (mode == DMA_MODE_WRITE); + virtual_dma_addr = addr; + virtual_dma_count = size; + virtual_dma_residue = 0; + return 0; +} + +static int hard_dma_setup(char *addr, unsigned long size, int mode, int io) +{ +#ifdef FLOPPY_SANITY_CHECK + if (CROSS_64KB(addr, size)) { + printk("DMA crossing 64-K boundary %p-%p\n", addr, addr+size); + return -1; + } +#endif + /* actual, physical DMA */ + doing_pdma = 0; + clear_dma_ff(FLOPPY_DMA); + set_dma_mode(FLOPPY_DMA,mode); + set_dma_addr(FLOPPY_DMA,virt_to_bus(addr)); + set_dma_count(FLOPPY_DMA,size); + enable_dma(FLOPPY_DMA); + return 0; +} + +static struct fd_routine_l { + int (*_request_dma)(unsigned int dmanr, const char * device_id); + void (*_free_dma)(unsigned int dmanr); + int (*_get_dma_residue)(unsigned int dummy); + unsigned long (*_dma_mem_alloc) (unsigned long size); + int (*_dma_setup)(char *addr, unsigned long size, int mode, int io); +} fd_routine[] = { + { + request_dma, + free_dma, + get_dma_residue, + dma_mem_alloc, + hard_dma_setup + }, + { + vdma_request_dma, + vdma_nop, + vdma_get_dma_residue, + vdma_mem_alloc, + vdma_dma_setup + } +}; + + +static int FDC1 = 0x3f0; /* Lies. Floppy controller is memory mapped, not io mapped */ +static int FDC2 = -1; + +#define FLOPPY0_TYPE 0 +#define FLOPPY1_TYPE 0 + +#define N_FDC 1 +#define N_DRIVE 8 + +#define EXTRA_FLOPPY_PARAMS + +#endif /* __ASM_PARISC_FLOPPY_H */ diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h new file mode 100644 index 00000000000..0c705c3a55e --- /dev/null +++ b/arch/parisc/include/asm/futex.h @@ -0,0 +1,77 @@ +#ifndef _ASM_PARISC_FUTEX_H +#define _ASM_PARISC_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +/* Non-atomic version */ +static inline int +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) +{ + int err = 0; + int uval; + + /* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is + * our gateway page, and causes no end of trouble... + */ + if (segment_eq(KERNEL_DS, get_fs()) && !uaddr) + return -EFAULT; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + err = get_user(uval, uaddr); + if (err) return -EFAULT; + if (uval == oldval) + err = put_user(newval, uaddr); + if (err) return -EFAULT; + return uval; +} + +#endif /*__KERNEL__*/ +#endif /*_ASM_PARISC_FUTEX_H*/ diff --git a/arch/parisc/include/asm/grfioctl.h b/arch/parisc/include/asm/grfioctl.h new file mode 100644 index 00000000000..671e06042b4 --- /dev/null +++ b/arch/parisc/include/asm/grfioctl.h @@ -0,0 +1,113 @@ +/* Architecture specific parts of HP's STI (framebuffer) driver. + * Structures are HP-UX compatible for XFree86 usage. + * + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * Copyright (C) 2001 Helge Deller (deller a parisc-linux org) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __ASM_PARISC_GRFIOCTL_H +#define __ASM_PARISC_GRFIOCTL_H + +/* upper 32 bits of graphics id (HP/UX identifier) */ + +#define GRFGATOR 8 +#define S9000_ID_S300 9 +#define GRFBOBCAT 9 +#define GRFCATSEYE 9 +#define S9000_ID_98720 10 +#define GRFRBOX 10 +#define S9000_ID_98550 11 +#define GRFFIREEYE 11 +#define S9000_ID_A1096A 12 +#define GRFHYPERION 12 +#define S9000_ID_FRI 13 +#define S9000_ID_98730 14 +#define GRFDAVINCI 14 +#define S9000_ID_98705 0x26C08070 /* Tigershark */ +#define S9000_ID_98736 0x26D148AB +#define S9000_ID_A1659A 0x26D1482A /* CRX 8 plane color (=ELK) */ +#define S9000_ID_ELK S9000_ID_A1659A +#define S9000_ID_A1439A 0x26D148EE /* CRX24 = CRX+ (24-plane color) */ +#define S9000_ID_A1924A 0x26D1488C /* GRX gray-scale */ +#define S9000_ID_ELM S9000_ID_A1924A +#define S9000_ID_98765 0x27480DEF +#define S9000_ID_ELK_768 0x27482101 +#define S9000_ID_STINGER 0x27A4A402 +#define S9000_ID_TIMBER 0x27F12392 /* Bushmaster (710) Graphics */ +#define S9000_ID_TOMCAT 0x27FCCB6D /* dual-headed ELK (Dual CRX) */ +#define S9000_ID_ARTIST 0x2B4DED6D /* Artist (Gecko/712 & 715) onboard Graphics */ +#define S9000_ID_HCRX 0x2BCB015A /* Hyperdrive/Hyperbowl (A4071A) Graphics */ +#define CRX24_OVERLAY_PLANES 0x920825AA /* Overlay planes on CRX24 */ + +#define CRT_ID_ELK_1024 S9000_ID_ELK_768 /* Elk 1024x768 CRX */ +#define CRT_ID_ELK_1280 S9000_ID_A1659A /* Elk 1280x1024 CRX */ +#define CRT_ID_ELK_1024DB 0x27849CA5 /* Elk 1024x768 double buffer */ +#define CRT_ID_ELK_GS S9000_ID_A1924A /* Elk 1280x1024 GreyScale */ +#define CRT_ID_CRX24 S9000_ID_A1439A /* Piranha */ +#define CRT_ID_VISUALIZE_EG 0x2D08C0A7 /* Graffiti, A4450A (built-in B132+/B160L) */ +#define CRT_ID_THUNDER 0x2F23E5FC /* Thunder 1 VISUALIZE 48*/ +#define CRT_ID_THUNDER2 0x2F8D570E /* Thunder 2 VISUALIZE 48 XP*/ +#define CRT_ID_HCRX S9000_ID_HCRX /* Hyperdrive HCRX */ +#define CRT_ID_CRX48Z S9000_ID_STINGER /* Stinger */ +#define CRT_ID_DUAL_CRX S9000_ID_TOMCAT /* Tomcat */ +#define CRT_ID_PVRX S9000_ID_98705 /* Tigershark */ +#define CRT_ID_TIMBER S9000_ID_TIMBER /* Timber (710 builtin) */ +#define CRT_ID_TVRX S9000_ID_98765 /* TVRX (gto/falcon) */ +#define CRT_ID_ARTIST S9000_ID_ARTIST /* Artist */ +#define CRT_ID_SUMMIT 0x2FC1066B /* Summit FX2, FX4, FX6 ... */ +#define CRT_ID_LEGO 0x35ACDA30 /* Lego FX5, FX10 ... */ +#define CRT_ID_PINNACLE 0x35ACDA16 /* Pinnacle FXe */ + +/* structure for ioctl(GCDESCRIBE) */ + +#define gaddr_t unsigned long /* FIXME: PA2.0 (64bit) portable ? */ + +struct grf_fbinfo { + unsigned int id; /* upper 32 bits of graphics id */ + unsigned int mapsize; /* mapped size of framebuffer */ + unsigned int dwidth, dlength;/* x and y sizes */ + unsigned int width, length; /* total x and total y size */ + unsigned int xlen; /* x pitch size */ + unsigned int bpp, bppu; /* bits per pixel and used bpp */ + unsigned int npl, nplbytes; /* # of planes and bytes per plane */ + char name[32]; /* name of the device (from ROM) */ + unsigned int attr; /* attributes */ + gaddr_t fbbase, regbase;/* framebuffer and register base addr */ + gaddr_t regions[6]; /* region bases */ +}; + +#define GCID _IOR('G', 0, int) +#define GCON _IO('G', 1) +#define GCOFF _IO('G', 2) +#define GCAON _IO('G', 3) +#define GCAOFF _IO('G', 4) +#define GCMAP _IOWR('G', 5, int) +#define GCUNMAP _IOWR('G', 6, int) +#define GCMAP_HPUX _IO('G', 5) +#define GCUNMAP_HPUX _IO('G', 6) +#define GCLOCK _IO('G', 7) +#define GCUNLOCK _IO('G', 8) +#define GCLOCK_MINIMUM _IO('G', 9) +#define GCUNLOCK_MINIMUM _IO('G', 10) +#define GCSTATIC_CMAP _IO('G', 11) +#define GCVARIABLE_CMAP _IO('G', 12) +#define GCTERM _IOWR('G',20,int) /* multi-headed Tomcat */ +#define GCDESCRIBE _IOR('G', 21, struct grf_fbinfo) +#define GCFASTLOCK _IO('G', 26) + +#endif /* __ASM_PARISC_GRFIOCTL_H */ + diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h new file mode 100644 index 00000000000..ce93133d511 --- /dev/null +++ b/arch/parisc/include/asm/hardirq.h @@ -0,0 +1,29 @@ +/* hardirq.h: PA-RISC hard IRQ support. + * + * Copyright (C) 2001 Matthew Wilcox + * + * The locking is really quite interesting. There's a cpu-local + * count of how many interrupts are being handled, and a global + * lock. An interrupt can only be serviced if the global lock + * is free. You can't be sure no more interrupts are being + * serviced until you've acquired the lock and then checked + * all the per-cpu interrupt counts are all zero. It's a specialised + * br_lock, and that's exactly how Sparc does it. We don't because + * it's more locking for us. This way is lock-free in the interrupt path. + */ + +#ifndef _PARISC_HARDIRQ_H +#define _PARISC_HARDIRQ_H + +#include +#include + +typedef struct { + unsigned long __softirq_pending; /* set_bit is used on this */ +} ____cacheline_aligned irq_cpustat_t; + +#include /* Standard mappings for irq_cpustat_t above */ + +void ack_bad_irq(unsigned int irq); + +#endif /* _PARISC_HARDIRQ_H */ diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h new file mode 100644 index 00000000000..4e9626836ba --- /dev/null +++ b/arch/parisc/include/asm/hardware.h @@ -0,0 +1,127 @@ +#ifndef _PARISC_HARDWARE_H +#define _PARISC_HARDWARE_H + +#include +#include + +#define HWTYPE_ANY_ID PA_HWTYPE_ANY_ID +#define HVERSION_ANY_ID PA_HVERSION_ANY_ID +#define HVERSION_REV_ANY_ID PA_HVERSION_REV_ANY_ID +#define SVERSION_ANY_ID PA_SVERSION_ANY_ID + +struct hp_hardware { + unsigned short hw_type:5; /* HPHW_xxx */ + unsigned short hversion; + unsigned long sversion:28; + unsigned short opt; + const char name[80]; /* The hardware description */ +}; + +struct parisc_device; + +enum cpu_type { + pcx = 0, /* pa7000 pa 1.0 */ + pcxs = 1, /* pa7000 pa 1.1a */ + pcxt = 2, /* pa7100 pa 1.1b */ + pcxt_ = 3, /* pa7200 (t') pa 1.1c */ + pcxl = 4, /* pa7100lc pa 1.1d */ + pcxl2 = 5, /* pa7300lc pa 1.1e */ + pcxu = 6, /* pa8000 pa 2.0 */ + pcxu_ = 7, /* pa8200 (u+) pa 2.0 */ + pcxw = 8, /* pa8500 pa 2.0 */ + pcxw_ = 9, /* pa8600 (w+) pa 2.0 */ + pcxw2 = 10, /* pa8700 pa 2.0 */ + mako = 11, /* pa8800 pa 2.0 */ + mako2 = 12 /* pa8900 pa 2.0 */ +}; + +extern const char * const cpu_name_version[][2]; /* mapping from enum cpu_type to strings */ + +struct parisc_driver; + +struct io_module { + volatile uint32_t nothing; /* reg 0 */ + volatile uint32_t io_eim; + volatile uint32_t io_dc_adata; + volatile uint32_t io_ii_cdata; + volatile uint32_t io_dma_link; /* reg 4 */ + volatile uint32_t io_dma_command; + volatile uint32_t io_dma_address; + volatile uint32_t io_dma_count; + volatile uint32_t io_flex; /* reg 8 */ + volatile uint32_t io_spa_address; + volatile uint32_t reserved1[2]; + volatile uint32_t io_command; /* reg 12 */ + volatile uint32_t io_status; + volatile uint32_t io_control; + volatile uint32_t io_data; + volatile uint32_t reserved2; /* reg 16 */ + volatile uint32_t chain_addr; + volatile uint32_t sub_mask_clr; + volatile uint32_t reserved3[13]; + volatile uint32_t undefined[480]; + volatile uint32_t unpriv[512]; +}; + +struct bc_module { + volatile uint32_t unused1[12]; + volatile uint32_t io_command; + volatile uint32_t io_status; + volatile uint32_t io_control; + volatile uint32_t unused2[1]; + volatile uint32_t io_err_resp; + volatile uint32_t io_err_info; + volatile uint32_t io_err_req; + volatile uint32_t unused3[11]; + volatile uint32_t io_io_low; + volatile uint32_t io_io_high; +}; + +#define HPHW_NPROC 0 +#define HPHW_MEMORY 1 +#define HPHW_B_DMA 2 +#define HPHW_OBSOLETE 3 +#define HPHW_A_DMA 4 +#define HPHW_A_DIRECT 5 +#define HPHW_OTHER 6 +#define HPHW_BCPORT 7 +#define HPHW_CIO 8 +#define HPHW_CONSOLE 9 +#define HPHW_FIO 10 +#define HPHW_BA 11 +#define HPHW_IOA 12 +#define HPHW_BRIDGE 13 +#define HPHW_FABRIC 14 +#define HPHW_MC 15 +#define HPHW_FAULTY 31 + + +/* hardware.c: */ +extern const char *parisc_hardware_description(struct parisc_device_id *id); +extern enum cpu_type parisc_get_cpu_type(unsigned long hversion); + +struct pci_dev; + +/* drivers.c: */ +extern struct parisc_device *alloc_pa_dev(unsigned long hpa, + struct hardware_path *path); +extern int register_parisc_device(struct parisc_device *dev); +extern int register_parisc_driver(struct parisc_driver *driver); +extern int count_parisc_driver(struct parisc_driver *driver); +extern int unregister_parisc_driver(struct parisc_driver *driver); +extern void walk_central_bus(void); +extern const struct parisc_device *find_pa_parent_type(const struct parisc_device *, int); +extern void print_parisc_devices(void); +extern char *print_pa_hwpath(struct parisc_device *dev, char *path); +extern char *print_pci_hwpath(struct pci_dev *dev, char *path); +extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path); +extern void init_parisc_bus(void); +extern struct device *hwpath_to_device(struct hardware_path *modpath); +extern void device_to_hwpath(struct device *dev, struct hardware_path *path); + + +/* inventory.c: */ +extern void do_memory_inventory(void); +extern void do_device_inventory(void); + +#endif /* _PARISC_HARDWARE_H */ diff --git a/arch/parisc/include/asm/hw_irq.h b/arch/parisc/include/asm/hw_irq.h new file mode 100644 index 00000000000..6707f7df392 --- /dev/null +++ b/arch/parisc/include/asm/hw_irq.h @@ -0,0 +1,8 @@ +#ifndef _ASM_HW_IRQ_H +#define _ASM_HW_IRQ_H + +/* + * linux/include/asm/hw_irq.h + */ + +#endif diff --git a/arch/parisc/include/asm/ide.h b/arch/parisc/include/asm/ide.h new file mode 100644 index 00000000000..c246ef75017 --- /dev/null +++ b/arch/parisc/include/asm/ide.h @@ -0,0 +1,61 @@ +/* + * linux/include/asm-parisc/ide.h + * + * Copyright (C) 1994-1996 Linus Torvalds & authors + */ + +/* + * This file contains the PARISC architecture specific IDE code. + */ + +#ifndef __ASM_PARISC_IDE_H +#define __ASM_PARISC_IDE_H + +#ifdef __KERNEL__ + +#define ide_request_irq(irq,hand,flg,dev,id) request_irq((irq),(hand),(flg),(dev),(id)) +#define ide_free_irq(irq,dev_id) free_irq((irq), (dev_id)) +#define ide_request_region(from,extent,name) request_region((from), (extent), (name)) +#define ide_release_region(from,extent) release_region((from), (extent)) +/* Generic I/O and MEMIO string operations. */ + +#define __ide_insw insw +#define __ide_insl insl +#define __ide_outsw outsw +#define __ide_outsl outsl + +static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count) +{ + while (count--) { + *(u16 *)addr = __raw_readw(port); + addr += 2; + } +} + +static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count) +{ + while (count--) { + *(u32 *)addr = __raw_readl(port); + addr += 4; + } +} + +static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count) +{ + while (count--) { + __raw_writew(*(u16 *)addr, port); + addr += 2; + } +} + +static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count) +{ + while (count--) { + __raw_writel(*(u32 *)addr, port); + addr += 4; + } +} + +#endif /* __KERNEL__ */ + +#endif /* __ASM_PARISC_IDE_H */ diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h new file mode 100644 index 00000000000..55ddb184210 --- /dev/null +++ b/arch/parisc/include/asm/io.h @@ -0,0 +1,293 @@ +#ifndef _ASM_IO_H +#define _ASM_IO_H + +#include +#include + +extern unsigned long parisc_vmerge_boundary; +extern unsigned long parisc_vmerge_max_size; + +#define BIO_VMERGE_BOUNDARY parisc_vmerge_boundary +#define BIO_VMERGE_MAX_SIZE parisc_vmerge_max_size + +#define virt_to_phys(a) ((unsigned long)__pa(a)) +#define phys_to_virt(a) __va(a) +#define virt_to_bus virt_to_phys +#define bus_to_virt phys_to_virt + +static inline unsigned long isa_bus_to_virt(unsigned long addr) { + BUG(); + return 0; +} + +static inline unsigned long isa_virt_to_bus(void *addr) { + BUG(); + return 0; +} + +/* + * Memory mapped I/O + * + * readX()/writeX() do byteswapping and take an ioremapped address + * __raw_readX()/__raw_writeX() don't byteswap and take an ioremapped address. + * gsc_*() don't byteswap and operate on physical addresses; + * eg dev->hpa or 0xfee00000. + */ + +static inline unsigned char gsc_readb(unsigned long addr) +{ + long flags; + unsigned char ret; + + __asm__ __volatile__( + " rsm 2,%0\n" + " ldbx 0(%2),%1\n" + " mtsm %0\n" + : "=&r" (flags), "=r" (ret) : "r" (addr) ); + + return ret; +} + +static inline unsigned short gsc_readw(unsigned long addr) +{ + long flags; + unsigned short ret; + + __asm__ __volatile__( + " rsm 2,%0\n" + " ldhx 0(%2),%1\n" + " mtsm %0\n" + : "=&r" (flags), "=r" (ret) : "r" (addr) ); + + return ret; +} + +static inline unsigned int gsc_readl(unsigned long addr) +{ + u32 ret; + + __asm__ __volatile__( + " ldwax 0(%1),%0\n" + : "=r" (ret) : "r" (addr) ); + + return ret; +} + +static inline unsigned long long gsc_readq(unsigned long addr) +{ + unsigned long long ret; + +#ifdef CONFIG_64BIT + __asm__ __volatile__( + " ldda 0(%1),%0\n" + : "=r" (ret) : "r" (addr) ); +#else + /* two reads may have side effects.. */ + ret = ((u64) gsc_readl(addr)) << 32; + ret |= gsc_readl(addr+4); +#endif + return ret; +} + +static inline void gsc_writeb(unsigned char val, unsigned long addr) +{ + long flags; + __asm__ __volatile__( + " rsm 2,%0\n" + " stbs %1,0(%2)\n" + " mtsm %0\n" + : "=&r" (flags) : "r" (val), "r" (addr) ); +} + +static inline void gsc_writew(unsigned short val, unsigned long addr) +{ + long flags; + __asm__ __volatile__( + " rsm 2,%0\n" + " sths %1,0(%2)\n" + " mtsm %0\n" + : "=&r" (flags) : "r" (val), "r" (addr) ); +} + +static inline void gsc_writel(unsigned int val, unsigned long addr) +{ + __asm__ __volatile__( + " stwas %0,0(%1)\n" + : : "r" (val), "r" (addr) ); +} + +static inline void gsc_writeq(unsigned long long val, unsigned long addr) +{ +#ifdef CONFIG_64BIT + __asm__ __volatile__( + " stda %0,0(%1)\n" + : : "r" (val), "r" (addr) ); +#else + /* two writes may have side effects.. */ + gsc_writel(val >> 32, addr); + gsc_writel(val, addr+4); +#endif +} + +/* + * The standard PCI ioremap interfaces + */ + +extern void __iomem * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); + +/* Most machines react poorly to I/O-space being cacheable... Instead let's + * define ioremap() in terms of ioremap_nocache(). + */ +static inline void __iomem * ioremap(unsigned long offset, unsigned long size) +{ + return __ioremap(offset, size, _PAGE_NO_CACHE); +} +#define ioremap_nocache(off, sz) ioremap((off), (sz)) + +extern void iounmap(const volatile void __iomem *addr); + +static inline unsigned char __raw_readb(const volatile void __iomem *addr) +{ + return (*(volatile unsigned char __force *) (addr)); +} +static inline unsigned short __raw_readw(const volatile void __iomem *addr) +{ + return *(volatile unsigned short __force *) addr; +} +static inline unsigned int __raw_readl(const volatile void __iomem *addr) +{ + return *(volatile unsigned int __force *) addr; +} +static inline unsigned long long __raw_readq(const volatile void __iomem *addr) +{ + return *(volatile unsigned long long __force *) addr; +} + +static inline void __raw_writeb(unsigned char b, volatile void __iomem *addr) +{ + *(volatile unsigned char __force *) addr = b; +} +static inline void __raw_writew(unsigned short b, volatile void __iomem *addr) +{ + *(volatile unsigned short __force *) addr = b; +} +static inline void __raw_writel(unsigned int b, volatile void __iomem *addr) +{ + *(volatile unsigned int __force *) addr = b; +} +static inline void __raw_writeq(unsigned long long b, volatile void __iomem *addr) +{ + *(volatile unsigned long long __force *) addr = b; +} + +/* readb can never be const, so use __fswab instead of le*_to_cpu */ +#define readb(addr) __raw_readb(addr) +#define readw(addr) __fswab16(__raw_readw(addr)) +#define readl(addr) __fswab32(__raw_readl(addr)) +#define readq(addr) __fswab64(__raw_readq(addr)) +#define writeb(b, addr) __raw_writeb(b, addr) +#define writew(b, addr) __raw_writew(cpu_to_le16(b), addr) +#define writel(b, addr) __raw_writel(cpu_to_le32(b), addr) +#define writeq(b, addr) __raw_writeq(cpu_to_le64(b), addr) + +#define readb_relaxed(addr) readb(addr) +#define readw_relaxed(addr) readw(addr) +#define readl_relaxed(addr) readl(addr) +#define readq_relaxed(addr) readq(addr) + +#define mmiowb() do { } while (0) + +void memset_io(volatile void __iomem *addr, unsigned char val, int count); +void memcpy_fromio(void *dst, const volatile void __iomem *src, int count); +void memcpy_toio(volatile void __iomem *dst, const void *src, int count); + +/* Port-space IO */ + +#define inb_p inb +#define inw_p inw +#define inl_p inl +#define outb_p outb +#define outw_p outw +#define outl_p outl + +extern unsigned char eisa_in8(unsigned short port); +extern unsigned short eisa_in16(unsigned short port); +extern unsigned int eisa_in32(unsigned short port); +extern void eisa_out8(unsigned char data, unsigned short port); +extern void eisa_out16(unsigned short data, unsigned short port); +extern void eisa_out32(unsigned int data, unsigned short port); + +#if defined(CONFIG_PCI) +extern unsigned char inb(int addr); +extern unsigned short inw(int addr); +extern unsigned int inl(int addr); + +extern void outb(unsigned char b, int addr); +extern void outw(unsigned short b, int addr); +extern void outl(unsigned int b, int addr); +#elif defined(CONFIG_EISA) +#define inb eisa_in8 +#define inw eisa_in16 +#define inl eisa_in32 +#define outb eisa_out8 +#define outw eisa_out16 +#define outl eisa_out32 +#else +static inline char inb(unsigned long addr) +{ + BUG(); + return -1; +} + +static inline short inw(unsigned long addr) +{ + BUG(); + return -1; +} + +static inline int inl(unsigned long addr) +{ + BUG(); + return -1; +} + +#define outb(x, y) BUG() +#define outw(x, y) BUG() +#define outl(x, y) BUG() +#endif + +/* + * String versions of in/out ops: + */ +extern void insb (unsigned long port, void *dst, unsigned long count); +extern void insw (unsigned long port, void *dst, unsigned long count); +extern void insl (unsigned long port, void *dst, unsigned long count); +extern void outsb (unsigned long port, const void *src, unsigned long count); +extern void outsw (unsigned long port, const void *src, unsigned long count); +extern void outsl (unsigned long port, const void *src, unsigned long count); + + +/* IO Port space is : BBiiii where BB is HBA number. */ +#define IO_SPACE_LIMIT 0x00ffffff + +/* PA machines have an MM I/O space from 0xf0000000-0xffffffff in 32 + * bit mode and from 0xfffffffff0000000-0xfffffffffffffff in 64 bit + * mode (essentially just sign extending. This macro takes in a 32 + * bit I/O address (still with the leading f) and outputs the correct + * value for either 32 or 64 bit mode */ +#define F_EXTEND(x) ((unsigned long)((x) | (0xffffffff00000000ULL))) + +#include + +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +#define xlate_dev_mem_ptr(p) __va(p) + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +#endif diff --git a/arch/parisc/include/asm/ioctl.h b/arch/parisc/include/asm/ioctl.h new file mode 100644 index 00000000000..ec8efa02bed --- /dev/null +++ b/arch/parisc/include/asm/ioctl.h @@ -0,0 +1,44 @@ +/* + * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * Copyright (C) 1999,2003 Matthew Wilcox < willy at debian . org > + * portions from "linux/ioctl.h for Linux" by H.H. Bergman. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef _ASM_PARISC_IOCTL_H +#define _ASM_PARISC_IOCTL_H + +/* ioctl command encoding: 32 bits total, command in lower 16 bits, + * size of the parameter structure in the lower 14 bits of the + * upper 16 bits. + * Encoding the size of the parameter structure in the ioctl request + * is useful for catching programs compiled with old versions + * and to avoid overwriting user space outside the user buffer area. + * The highest 2 bits are reserved for indicating the ``access mode''. + * NOTE: This limits the max parameter size to 16kB -1 ! + */ + +/* + * Direction bits. + */ +#define _IOC_NONE 0U +#define _IOC_WRITE 2U +#define _IOC_READ 1U + +#include + +#endif /* _ASM_PARISC_IOCTL_H */ diff --git a/arch/parisc/include/asm/ioctls.h b/arch/parisc/include/asm/ioctls.h new file mode 100644 index 00000000000..6747fad07a3 --- /dev/null +++ b/arch/parisc/include/asm/ioctls.h @@ -0,0 +1,90 @@ +#ifndef __ARCH_PARISC_IOCTLS_H__ +#define __ARCH_PARISC_IOCTLS_H__ + +#include + +/* 0x54 is just a magic number to make these relatively unique ('T') */ + +#define TCGETS _IOR('T', 16, struct termios) /* TCGETATTR */ +#define TCSETS _IOW('T', 17, struct termios) /* TCSETATTR */ +#define TCSETSW _IOW('T', 18, struct termios) /* TCSETATTRD */ +#define TCSETSF _IOW('T', 19, struct termios) /* TCSETATTRF */ +#define TCGETA _IOR('T', 1, struct termio) +#define TCSETA _IOW('T', 2, struct termio) +#define TCSETAW _IOW('T', 3, struct termio) +#define TCSETAF _IOW('T', 4, struct termio) +#define TCSBRK _IO('T', 5) +#define TCXONC _IO('T', 6) +#define TCFLSH _IO('T', 7) +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E +#define TIOCGPGRP _IOR('T', 30, int) +#define TIOCSPGRP _IOW('T', 29, int) +#define TIOCOUTQ 0x5411 +#define TIOCSTI 0x5412 +#define TIOCGWINSZ 0x5413 +#define TIOCSWINSZ 0x5414 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define FIONREAD 0x541B +#define TIOCINQ FIONREAD +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +#define FIONBIO 0x5421 +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID _IOR('T', 20, int) /* Return the session ID of FD */ +#define TCGETS2 _IOR('T',0x2A, struct termios2) +#define TCSETS2 _IOW('T',0x2B, struct termios2) +#define TCSETSW2 _IOW('T',0x2C, struct termios2) +#define TCSETSF2 _IOW('T',0x2D, struct termios2) +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ + +#define FIONCLEX 0x5450 /* these numbers need to be adjusted. */ +#define FIOCLEX 0x5451 +#define FIOASYNC 0x5452 +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ +#define TIOCGHAYESESP 0x545E /* Get Hayes ESP configuration */ +#define TIOCSHAYESESP 0x545F /* Set Hayes ESP configuration */ +#define FIOQSIZE 0x5460 /* Get exact space used by quota */ + +#define TIOCSTART 0x5461 +#define TIOCSTOP 0x5462 +#define TIOCSLTC 0x5462 + +/* Used for packet mode */ +#define TIOCPKT_DATA 0 +#define TIOCPKT_FLUSHREAD 1 +#define TIOCPKT_FLUSHWRITE 2 +#define TIOCPKT_STOP 4 +#define TIOCPKT_START 8 +#define TIOCPKT_NOSTOP 16 +#define TIOCPKT_DOSTOP 32 + +#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ + +#endif /* _ASM_PARISC_IOCTLS_H */ diff --git a/arch/parisc/include/asm/ipcbuf.h b/arch/parisc/include/asm/ipcbuf.h new file mode 100644 index 00000000000..bd956c42578 --- /dev/null +++ b/arch/parisc/include/asm/ipcbuf.h @@ -0,0 +1,27 @@ +#ifndef __PARISC_IPCBUF_H__ +#define __PARISC_IPCBUF_H__ + +/* + * The ipc64_perm structure for PA-RISC is almost identical to + * kern_ipc_perm as we have always had 32-bit UIDs and GIDs in the kernel. + * 'seq' has been changed from long to int so that it's the same size + * on 64-bit kernels as on 32-bit ones. + */ + +struct ipc64_perm +{ + key_t key; + uid_t uid; + gid_t gid; + uid_t cuid; + gid_t cgid; + unsigned short int __pad1; + mode_t mode; + unsigned short int __pad2; + unsigned short int seq; + unsigned int __pad3; + unsigned long long int __unused1; + unsigned long long int __unused2; +}; + +#endif /* __PARISC_IPCBUF_H__ */ diff --git a/arch/parisc/include/asm/irq.h b/arch/parisc/include/asm/irq.h new file mode 100644 index 00000000000..399c81981ed --- /dev/null +++ b/arch/parisc/include/asm/irq.h @@ -0,0 +1,57 @@ +/* + * include/asm-parisc/irq.h + * + * Copyright 2005 Matthew Wilcox + */ + +#ifndef _ASM_PARISC_IRQ_H +#define _ASM_PARISC_IRQ_H + +#include +#include + +#define NO_IRQ (-1) + +#ifdef CONFIG_GSC +#define GSC_IRQ_BASE 16 +#define GSC_IRQ_MAX 63 +#define CPU_IRQ_BASE 64 +#else +#define CPU_IRQ_BASE 16 +#endif + +#define TIMER_IRQ (CPU_IRQ_BASE + 0) +#define IPI_IRQ (CPU_IRQ_BASE + 1) +#define CPU_IRQ_MAX (CPU_IRQ_BASE + (BITS_PER_LONG - 1)) + +#define NR_IRQS (CPU_IRQ_MAX + 1) + +static __inline__ int irq_canonicalize(int irq) +{ + return (irq == 2) ? 9 : irq; +} + +struct irq_chip; + +/* + * Some useful "we don't have to do anything here" handlers. Should + * probably be provided by the generic code. + */ +void no_ack_irq(unsigned int irq); +void no_end_irq(unsigned int irq); +void cpu_ack_irq(unsigned int irq); +void cpu_end_irq(unsigned int irq); + +extern int txn_alloc_irq(unsigned int nbits); +extern int txn_claim_irq(int); +extern unsigned int txn_alloc_data(unsigned int); +extern unsigned long txn_alloc_addr(unsigned int); +extern unsigned long txn_affinity_addr(unsigned int irq, int cpu); + +extern int cpu_claim_irq(unsigned int irq, struct irq_chip *, void *); +extern int cpu_check_affinity(unsigned int irq, cpumask_t *dest); + +/* soft power switch support (power.c) */ +extern struct tasklet_struct power_tasklet; + +#endif /* _ASM_PARISC_IRQ_H */ diff --git a/arch/parisc/include/asm/irq_regs.h b/arch/parisc/include/asm/irq_regs.h new file mode 100644 index 00000000000..3dd9c0b7027 --- /dev/null +++ b/arch/parisc/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/kdebug.h b/arch/parisc/include/asm/kdebug.h new file mode 100644 index 00000000000..6ece1b03766 --- /dev/null +++ b/arch/parisc/include/asm/kdebug.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/kmap_types.h b/arch/parisc/include/asm/kmap_types.h new file mode 100644 index 00000000000..806aae3c533 --- /dev/null +++ b/arch/parisc/include/asm/kmap_types.h @@ -0,0 +1,30 @@ +#ifndef _ASM_KMAP_TYPES_H +#define _ASM_KMAP_TYPES_H + + +#ifdef CONFIG_DEBUG_HIGHMEM +# define D(n) __KM_FENCE_##n , +#else +# define D(n) +#endif + +enum km_type { +D(0) KM_BOUNCE_READ, +D(1) KM_SKB_SUNRPC_DATA, +D(2) KM_SKB_DATA_SOFTIRQ, +D(3) KM_USER0, +D(4) KM_USER1, +D(5) KM_BIO_SRC_IRQ, +D(6) KM_BIO_DST_IRQ, +D(7) KM_PTE0, +D(8) KM_PTE1, +D(9) KM_IRQ0, +D(10) KM_IRQ1, +D(11) KM_SOFTIRQ0, +D(12) KM_SOFTIRQ1, +D(13) KM_TYPE_NR +}; + +#undef D + +#endif diff --git a/arch/parisc/include/asm/led.h b/arch/parisc/include/asm/led.h new file mode 100644 index 00000000000..c3405ab9d60 --- /dev/null +++ b/arch/parisc/include/asm/led.h @@ -0,0 +1,42 @@ +#ifndef LED_H +#define LED_H + +#define LED7 0x80 /* top (or furthest right) LED */ +#define LED6 0x40 +#define LED5 0x20 +#define LED4 0x10 +#define LED3 0x08 +#define LED2 0x04 +#define LED1 0x02 +#define LED0 0x01 /* bottom (or furthest left) LED */ + +#define LED_LAN_TX LED0 /* for LAN transmit activity */ +#define LED_LAN_RCV LED1 /* for LAN receive activity */ +#define LED_DISK_IO LED2 /* for disk activity */ +#define LED_HEARTBEAT LED3 /* heartbeat */ + +/* values for pdc_chassis_lcd_info_ret_block.model: */ +#define DISPLAY_MODEL_LCD 0 /* KittyHawk LED or LCD */ +#define DISPLAY_MODEL_NONE 1 /* no LED or LCD */ +#define DISPLAY_MODEL_LASI 2 /* LASI style 8 bit LED */ +#define DISPLAY_MODEL_OLD_ASP 0x7F /* faked: ASP style 8 x 1 bit LED (only very old ASP versions) */ + +#define LED_CMD_REG_NONE 0 /* NULL == no addr for the cmd register */ + +/* register_led_driver() */ +int __init register_led_driver(int model, unsigned long cmd_reg, unsigned long data_reg); + +/* registers the LED regions for procfs */ +void __init register_led_regions(void); + +#ifdef CONFIG_CHASSIS_LCD_LED +/* writes a string to the LCD display (if possible on this h/w) */ +int lcd_print(const char *str); +#else +#define lcd_print(str) +#endif + +/* main LED initialization function (uses PDC) */ +int __init led_init(void); + +#endif /* LED_H */ diff --git a/arch/parisc/include/asm/linkage.h b/arch/parisc/include/asm/linkage.h new file mode 100644 index 00000000000..0b19a7242d0 --- /dev/null +++ b/arch/parisc/include/asm/linkage.h @@ -0,0 +1,31 @@ +#ifndef __ASM_PARISC_LINKAGE_H +#define __ASM_PARISC_LINKAGE_H + +#ifndef __ALIGN +#define __ALIGN .align 4 +#define __ALIGN_STR ".align 4" +#endif + +/* + * In parisc assembly a semicolon marks a comment while a + * exclamation mark is used to separate independent lines. + */ +#ifdef __ASSEMBLY__ + +#define ENTRY(name) \ + .export name !\ + ALIGN !\ +name: + +#ifdef CONFIG_64BIT +#define ENDPROC(name) \ + END(name) +#else +#define ENDPROC(name) \ + .type name, @function !\ + END(name) +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_PARISC_LINKAGE_H */ diff --git a/arch/parisc/include/asm/local.h b/arch/parisc/include/asm/local.h new file mode 100644 index 00000000000..c11c530f74d --- /dev/null +++ b/arch/parisc/include/asm/local.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/machdep.h b/arch/parisc/include/asm/machdep.h new file mode 100644 index 00000000000..a231c97d703 --- /dev/null +++ b/arch/parisc/include/asm/machdep.h @@ -0,0 +1,16 @@ +#ifndef _PARISC_MACHDEP_H +#define _PARISC_MACHDEP_H + +#include + +#define MACH_RESTART 1 +#define MACH_HALT 2 +#define MACH_POWER_ON 3 +#define MACH_POWER_OFF 4 + +extern struct notifier_block *mach_notifier; +extern void pa7300lc_init(void); + +extern void (*cpu_lpmc)(int, struct pt_regs *); + +#endif diff --git a/arch/parisc/include/asm/mc146818rtc.h b/arch/parisc/include/asm/mc146818rtc.h new file mode 100644 index 00000000000..adf41631449 --- /dev/null +++ b/arch/parisc/include/asm/mc146818rtc.h @@ -0,0 +1,9 @@ +/* + * Machine dependent access functions for RTC registers. + */ +#ifndef _ASM_MC146818RTC_H +#define _ASM_MC146818RTC_H + +/* empty include file to satisfy the include in genrtc.c */ + +#endif /* _ASM_MC146818RTC_H */ diff --git a/arch/parisc/include/asm/mckinley.h b/arch/parisc/include/asm/mckinley.h new file mode 100644 index 00000000000..d1ea6f12915 --- /dev/null +++ b/arch/parisc/include/asm/mckinley.h @@ -0,0 +1,9 @@ +#ifndef ASM_PARISC_MCKINLEY_H +#define ASM_PARISC_MCKINLEY_H +#ifdef __KERNEL__ + +/* declared in arch/parisc/kernel/setup.c */ +extern struct proc_dir_entry * proc_mckinley_root; + +#endif /*__KERNEL__*/ +#endif /*ASM_PARISC_MCKINLEY_H*/ diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 00000000000..defe752cc99 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,61 @@ +#ifndef __PARISC_MMAN_H__ +#define __PARISC_MMAN_H__ + +#define PROT_READ 0x1 /* page can be read */ +#define PROT_WRITE 0x2 /* page can be written */ +#define PROT_EXEC 0x4 /* page can be executed */ +#define PROT_SEM 0x8 /* page may be used for atomic ops */ +#define PROT_NONE 0x0 /* page can not be accessed */ +#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ +#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ + +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_TYPE 0x03 /* Mask for type of mapping */ +#define MAP_FIXED 0x04 /* Interpret addr exactly */ +#define MAP_ANONYMOUS 0x10 /* don't use a file */ + +#define MAP_DENYWRITE 0x0800 /* ETXTBSY */ +#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */ +#define MAP_LOCKED 0x2000 /* pages are locked */ +#define MAP_NORESERVE 0x4000 /* don't check for reservations */ +#define MAP_GROWSDOWN 0x8000 /* stack-like segment */ +#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */ +#define MAP_NONBLOCK 0x20000 /* do not block on IO */ + +#define MS_SYNC 1 /* synchronous memory sync */ +#define MS_ASYNC 2 /* sync memory asynchronously */ +#define MS_INVALIDATE 4 /* invalidate the caches */ + +#define MCL_CURRENT 1 /* lock all current mappings */ +#define MCL_FUTURE 2 /* lock all future mappings */ + +#define MADV_NORMAL 0 /* no further special treatment */ +#define MADV_RANDOM 1 /* expect random page references */ +#define MADV_SEQUENTIAL 2 /* expect sequential page references */ +#define MADV_WILLNEED 3 /* will need these pages */ +#define MADV_DONTNEED 4 /* don't need these pages */ +#define MADV_SPACEAVAIL 5 /* insure that resources are reserved */ +#define MADV_VPS_PURGE 6 /* Purge pages from VM page cache */ +#define MADV_VPS_INHERIT 7 /* Inherit parents page size */ + +/* common/generic parameters */ +#define MADV_REMOVE 9 /* remove these pages & resources */ +#define MADV_DONTFORK 10 /* don't inherit across fork */ +#define MADV_DOFORK 11 /* do inherit across fork */ + +/* The range 12-64 is reserved for page size specification. */ +#define MADV_4K_PAGES 12 /* Use 4K pages */ +#define MADV_16K_PAGES 14 /* Use 16K pages */ +#define MADV_64K_PAGES 16 /* Use 64K pages */ +#define MADV_256K_PAGES 18 /* Use 256K pages */ +#define MADV_1M_PAGES 20 /* Use 1 Megabyte pages */ +#define MADV_4M_PAGES 22 /* Use 4 Megabyte pages */ +#define MADV_16M_PAGES 24 /* Use 16 Megabyte pages */ +#define MADV_64M_PAGES 26 /* Use 64 Megabyte pages */ + +/* compatibility flags */ +#define MAP_FILE 0 +#define MAP_VARIABLE 0 + +#endif /* __PARISC_MMAN_H__ */ diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h new file mode 100644 index 00000000000..6a310cf8b73 --- /dev/null +++ b/arch/parisc/include/asm/mmu.h @@ -0,0 +1,7 @@ +#ifndef _PARISC_MMU_H_ +#define _PARISC_MMU_H_ + +/* On parisc, we store the space id here */ +typedef unsigned long mm_context_t; + +#endif /* _PARISC_MMU_H_ */ diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h new file mode 100644 index 00000000000..85856c74ad1 --- /dev/null +++ b/arch/parisc/include/asm/mmu_context.h @@ -0,0 +1,75 @@ +#ifndef __PARISC_MMU_CONTEXT_H +#define __PARISC_MMU_CONTEXT_H + +#include +#include +#include +#include +#include +#include + +static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) +{ +} + +/* on PA-RISC, we actually have enough contexts to justify an allocator + * for them. prumpf */ + +extern unsigned long alloc_sid(void); +extern void free_sid(unsigned long); + +static inline int +init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + BUG_ON(atomic_read(&mm->mm_users) != 1); + + mm->context = alloc_sid(); + return 0; +} + +static inline void +destroy_context(struct mm_struct *mm) +{ + free_sid(mm->context); + mm->context = 0; +} + +static inline void load_context(mm_context_t context) +{ + mtsp(context, 3); +#if SPACEID_SHIFT == 0 + mtctl(context << 1,8); +#else + mtctl(context >> (SPACEID_SHIFT - 1),8); +#endif +} + +static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) +{ + + if (prev != next) { + mtctl(__pa(next->pgd), 25); + load_context(next->context); + } +} + +#define deactivate_mm(tsk,mm) do { } while (0) + +static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) +{ + /* + * Activate_mm is our one chance to allocate a space id + * for a new mm created in the exec path. There's also + * some lazy tlb stuff, which is currently dead code, but + * we only allocate a space id if one hasn't been allocated + * already, so we should be OK. + */ + + BUG_ON(next == &init_mm); /* Should never happen */ + + if (next->context == 0) + next->context = alloc_sid(); + + switch_mm(prev,next,current); +} +#endif diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h new file mode 100644 index 00000000000..9608d2cf214 --- /dev/null +++ b/arch/parisc/include/asm/mmzone.h @@ -0,0 +1,73 @@ +#ifndef _PARISC_MMZONE_H +#define _PARISC_MMZONE_H + +#ifdef CONFIG_DISCONTIGMEM + +#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ +extern int npmem_ranges; + +struct node_map_data { + pg_data_t pg_data; +}; + +extern struct node_map_data node_data[]; + +#define NODE_DATA(nid) (&node_data[nid].pg_data) + +#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) +#define node_end_pfn(nid) \ +({ \ + pg_data_t *__pgdat = NODE_DATA(nid); \ + __pgdat->node_start_pfn + __pgdat->node_spanned_pages; \ +}) + +/* We have these possible memory map layouts: + * Astro: 0-3.75, 67.75-68, 4-64 + * zx1: 0-1, 257-260, 4-256 + * Stretch (N-class): 0-2, 4-32, 34-xxx + */ + +/* Since each 1GB can only belong to one region (node), we can create + * an index table for pfn to nid lookup; each entry in pfnnid_map + * represents 1GB, and contains the node that the memory belongs to. */ + +#define PFNNID_SHIFT (30 - PAGE_SHIFT) +#define PFNNID_MAP_MAX 512 /* support 512GB */ +extern unsigned char pfnnid_map[PFNNID_MAP_MAX]; + +#ifndef CONFIG_64BIT +#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) +#else +/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ +#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) +#endif + +static inline int pfn_to_nid(unsigned long pfn) +{ + unsigned int i; + unsigned char r; + + if (unlikely(pfn_is_io(pfn))) + return 0; + + i = pfn >> PFNNID_SHIFT; + BUG_ON(i >= sizeof(pfnnid_map) / sizeof(pfnnid_map[0])); + r = pfnnid_map[i]; + BUG_ON(r == 0xff); + + return (int)r; +} + +static inline int pfn_valid(int pfn) +{ + int nid = pfn_to_nid(pfn); + + if (nid >= 0) + return (pfn < node_end_pfn(nid)); + return 0; +} + +#else /* !CONFIG_DISCONTIGMEM */ +#define MAX_PHYSMEM_RANGES 1 +#endif +#endif /* _PARISC_MMZONE_H */ diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h new file mode 100644 index 00000000000..c2cb49e934c --- /dev/null +++ b/arch/parisc/include/asm/module.h @@ -0,0 +1,32 @@ +#ifndef _ASM_PARISC_MODULE_H +#define _ASM_PARISC_MODULE_H +/* + * This file contains the parisc architecture specific module code. + */ +#ifdef CONFIG_64BIT +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Addr Elf64_Addr +#define Elf_Rela Elf64_Rela +#else +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Addr Elf32_Addr +#define Elf_Rela Elf32_Rela +#endif + +struct unwind_table; + +struct mod_arch_specific +{ + unsigned long got_offset, got_count, got_max; + unsigned long fdesc_offset, fdesc_count, fdesc_max; + unsigned long stub_offset, stub_count, stub_max; + unsigned long init_stub_offset, init_stub_count, init_stub_max; + int unwind_section; + struct unwind_table *unwind; +}; + +#endif /* _ASM_PARISC_MODULE_H */ diff --git a/arch/parisc/include/asm/msgbuf.h b/arch/parisc/include/asm/msgbuf.h new file mode 100644 index 00000000000..fe88f264941 --- /dev/null +++ b/arch/parisc/include/asm/msgbuf.h @@ -0,0 +1,37 @@ +#ifndef _PARISC_MSGBUF_H +#define _PARISC_MSGBUF_H + +/* + * The msqid64_ds structure for parisc architecture, copied from sparc. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct msqid64_ds { + struct ipc64_perm msg_perm; +#ifndef CONFIG_64BIT + unsigned int __pad1; +#endif + __kernel_time_t msg_stime; /* last msgsnd time */ +#ifndef CONFIG_64BIT + unsigned int __pad2; +#endif + __kernel_time_t msg_rtime; /* last msgrcv time */ +#ifndef CONFIG_64BIT + unsigned int __pad3; +#endif + __kernel_time_t msg_ctime; /* last change time */ + unsigned int msg_cbytes; /* current number of bytes on queue */ + unsigned int msg_qnum; /* number of messages in queue */ + unsigned int msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned int __unused1; + unsigned int __unused2; +}; + +#endif /* _PARISC_MSGBUF_H */ diff --git a/arch/parisc/include/asm/mutex.h b/arch/parisc/include/asm/mutex.h new file mode 100644 index 00000000000..458c1f7fbc1 --- /dev/null +++ b/arch/parisc/include/asm/mutex.h @@ -0,0 +1,9 @@ +/* + * Pull in the generic implementation for the mutex fastpath. + * + * TODO: implement optimized primitives instead, or leave the generic + * implementation in place, or pick the atomic_xchg() based generic + * implementation. (see asm-generic/mutex-xchg.h for details) + */ + +#include diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h new file mode 100644 index 00000000000..c3941f09a87 --- /dev/null +++ b/arch/parisc/include/asm/page.h @@ -0,0 +1,173 @@ +#ifndef _PARISC_PAGE_H +#define _PARISC_PAGE_H + +#include + +#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) +# define PAGE_SHIFT 12 +#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB) +# define PAGE_SHIFT 14 +#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB) +# define PAGE_SHIFT 16 +#else +# error "unknown default kernel page size" +#endif +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + + +#ifndef __ASSEMBLY__ + +#include +#include + +#define clear_page(page) memset((void *)(page), 0, PAGE_SIZE) +#define copy_page(to,from) copy_user_page_asm((void *)(to), (void *)(from)) + +struct page; + +void copy_user_page_asm(void *to, void *from); +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg); +void clear_user_page(void *page, unsigned long vaddr, struct page *pg); + +/* + * These are used to make use of C type-checking.. + */ +#define STRICT_MM_TYPECHECKS +#ifdef STRICT_MM_TYPECHECKS +typedef struct { unsigned long pte; +#if !defined(CONFIG_64BIT) + unsigned long future_flags; + /* XXX: it's possible to remove future_flags and change BITS_PER_PTE_ENTRY + to 2, but then strangely the identical 32bit kernel boots on a + c3000(pa20), but not any longer on a 715(pa11). + Still investigating... HelgeD. + */ +#endif +} pte_t; /* either 32 or 64bit */ + +/* NOTE: even on 64 bits, these entries are __u32 because we allocate + * the pmd and pgd in ZONE_DMA (i.e. under 4GB) */ +typedef struct { __u32 pmd; } pmd_t; +typedef struct { __u32 pgd; } pgd_t; +typedef struct { unsigned long pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +/* These do not work lvalues, so make sure we don't use them as such. */ +#define pmd_val(x) ((x).pmd + 0) +#define pgd_val(x) ((x).pgd + 0) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#define __pmd_val_set(x,n) (x).pmd = (n) +#define __pgd_val_set(x,n) (x).pgd = (n) + +#else +/* + * .. while these make it easier on the compiler + */ +typedef unsigned long pte_t; +typedef __u32 pmd_t; +typedef __u32 pgd_t; +typedef unsigned long pgprot_t; + +#define pte_val(x) (x) +#define pmd_val(x) (x) +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +#define __pte(x) (x) +#define __pmd(x) (x) +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#define __pmd_val_set(x,n) (x) = (n) +#define __pgd_val_set(x,n) (x) = (n) + +#endif /* STRICT_MM_TYPECHECKS */ + +typedef struct page *pgtable_t; + +typedef struct __physmem_range { + unsigned long start_pfn; + unsigned long pages; /* PAGE_SIZE pages */ +} physmem_range_t; + +extern physmem_range_t pmem_ranges[]; +extern int npmem_ranges; + +#endif /* !__ASSEMBLY__ */ + +/* WARNING: The definitions below must match exactly to sizeof(pte_t) + * etc + */ +#ifdef CONFIG_64BIT +#define BITS_PER_PTE_ENTRY 3 +#define BITS_PER_PMD_ENTRY 2 +#define BITS_PER_PGD_ENTRY 2 +#else +#define BITS_PER_PTE_ENTRY 3 +#define BITS_PER_PMD_ENTRY 2 +#define BITS_PER_PGD_ENTRY BITS_PER_PMD_ENTRY +#endif +#define PGD_ENTRY_SIZE (1UL << BITS_PER_PGD_ENTRY) +#define PMD_ENTRY_SIZE (1UL << BITS_PER_PMD_ENTRY) +#define PTE_ENTRY_SIZE (1UL << BITS_PER_PTE_ENTRY) + +#define LINUX_GATEWAY_SPACE 0 + +/* This governs the relationship between virtual and physical addresses. + * If you alter it, make sure to take care of our various fixed mapping + * segments in fixmap.h */ +#ifdef CONFIG_64BIT +#define __PAGE_OFFSET (0x40000000) /* 1GB */ +#else +#define __PAGE_OFFSET (0x10000000) /* 256MB */ +#endif + +#define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + +/* The size of the gateway page (we leave lots of room for expansion) */ +#define GATEWAY_PAGE_SIZE 0x4000 + +/* The start of the actual kernel binary---used in vmlinux.lds.S + * Leave some space after __PAGE_OFFSET for detecting kernel null + * ptr derefs */ +#define KERNEL_BINARY_TEXT_START (__PAGE_OFFSET + 0x100000) + +/* These macros don't work for 64-bit C code -- don't allow in C at all */ +#ifdef __ASSEMBLY__ +# define PA(x) ((x)-__PAGE_OFFSET) +# define VA(x) ((x)+__PAGE_OFFSET) +#endif +#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) +#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) + +#ifndef CONFIG_DISCONTIGMEM +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#endif /* CONFIG_DISCONTIGMEM */ + +#ifdef CONFIG_HUGETLB_PAGE +#define HPAGE_SHIFT 22 /* 4MB (is this fixed?) */ +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#endif + +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) + +#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#include +#include + +#endif /* _PARISC_PAGE_H */ diff --git a/arch/parisc/include/asm/param.h b/arch/parisc/include/asm/param.h new file mode 100644 index 00000000000..32e03d87785 --- /dev/null +++ b/arch/parisc/include/asm/param.h @@ -0,0 +1,22 @@ +#ifndef _ASMPARISC_PARAM_H +#define _ASMPARISC_PARAM_H + +#ifdef __KERNEL__ +#define HZ CONFIG_HZ +#define USER_HZ 100 /* some user API use "ticks" */ +#define CLOCKS_PER_SEC (USER_HZ) /* like times() */ +#endif + +#ifndef HZ +#define HZ 100 +#endif + +#define EXEC_PAGESIZE 4096 + +#ifndef NOGROUP +#define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#endif diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h new file mode 100644 index 00000000000..7aa13f2add7 --- /dev/null +++ b/arch/parisc/include/asm/parisc-device.h @@ -0,0 +1,64 @@ +#ifndef _ASM_PARISC_PARISC_DEVICE_H_ +#define _ASM_PARISC_PARISC_DEVICE_H_ + +#include + +struct parisc_device { + struct resource hpa; /* Hard Physical Address */ + struct parisc_device_id id; + struct parisc_driver *driver; /* Driver for this device */ + char name[80]; /* The hardware description */ + int irq; + int aux_irq; /* Some devices have a second IRQ */ + + char hw_path; /* The module number on this bus */ + unsigned int num_addrs; /* some devices have additional address ranges. */ + unsigned long *addr; /* which will be stored here */ + +#ifdef CONFIG_64BIT + /* parms for pdc_pat_cell_module() call */ + unsigned long pcell_loc; /* Physical Cell location */ + unsigned long mod_index; /* PAT specific - Misc Module info */ + + /* generic info returned from pdc_pat_cell_module() */ + unsigned long mod_info; /* PAT specific - Misc Module info */ + unsigned long pmod_loc; /* physical Module location */ +#endif + u64 dma_mask; /* DMA mask for I/O */ + struct device dev; +}; + +struct parisc_driver { + struct parisc_driver *next; + char *name; + const struct parisc_device_id *id_table; + int (*probe) (struct parisc_device *dev); /* New device discovered */ + int (*remove) (struct parisc_device *dev); + struct device_driver drv; +}; + + +#define to_parisc_device(d) container_of(d, struct parisc_device, dev) +#define to_parisc_driver(d) container_of(d, struct parisc_driver, drv) +#define parisc_parent(d) to_parisc_device(d->dev.parent) + +static inline char *parisc_pathname(struct parisc_device *d) +{ + return d->dev.bus_id; +} + +static inline void +parisc_set_drvdata(struct parisc_device *d, void *p) +{ + dev_set_drvdata(&d->dev, p); +} + +static inline void * +parisc_get_drvdata(struct parisc_device *d) +{ + return dev_get_drvdata(&d->dev); +} + +extern struct bus_type parisc_bus_type; + +#endif /*_ASM_PARISC_PARISC_DEVICE_H_*/ diff --git a/arch/parisc/include/asm/parport.h b/arch/parisc/include/asm/parport.h new file mode 100644 index 00000000000..00d9cc3e7b9 --- /dev/null +++ b/arch/parisc/include/asm/parport.h @@ -0,0 +1,18 @@ +/* + * + * parport.h: ia32-compatible parport initialisation + * + * This file should only be included by drivers/parport/parport_pc.c. + */ +#ifndef _ASM_PARPORT_H +#define _ASM_PARPORT_H 1 + + +static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma) +{ + /* nothing ! */ + return 0; +} + + +#endif /* !(_ASM_PARPORT_H) */ diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h new file mode 100644 index 00000000000..4ba868f44a5 --- /dev/null +++ b/arch/parisc/include/asm/pci.h @@ -0,0 +1,294 @@ +#ifndef __ASM_PARISC_PCI_H +#define __ASM_PARISC_PCI_H + +#include + + + +/* +** HP PCI platforms generally support multiple bus adapters. +** (workstations 1-~4, servers 2-~32) +** +** Newer platforms number the busses across PCI bus adapters *sparsely*. +** E.g. 0, 8, 16, ... +** +** Under a PCI bus, most HP platforms support PPBs up to two or three +** levels deep. See "Bit3" product line. +*/ +#define PCI_MAX_BUSSES 256 + + +/* To be used as: mdelay(pci_post_reset_delay); + * + * post_reset is the time the kernel should stall to prevent anyone from + * accessing the PCI bus once #RESET is de-asserted. + * PCI spec somewhere says 1 second but with multi-PCI bus systems, + * this makes the boot time much longer than necessary. + * 20ms seems to work for all the HP PCI implementations to date. + */ +#define pci_post_reset_delay 50 + + +/* +** pci_hba_data (aka H2P_OBJECT in HP/UX) +** +** This is the "common" or "base" data structure which HBA drivers +** (eg Dino or LBA) are required to place at the top of their own +** platform_data structure. I've heard this called "C inheritance" too. +** +** Data needed by pcibios layer belongs here. +*/ +struct pci_hba_data { + void __iomem *base_addr; /* aka Host Physical Address */ + const struct parisc_device *dev; /* device from PA bus walk */ + struct pci_bus *hba_bus; /* primary PCI bus below HBA */ + int hba_num; /* I/O port space access "key" */ + struct resource bus_num; /* PCI bus numbers */ + struct resource io_space; /* PIOP */ + struct resource lmmio_space; /* bus addresses < 4Gb */ + struct resource elmmio_space; /* additional bus addresses < 4Gb */ + struct resource gmmio_space; /* bus addresses > 4Gb */ + + /* NOTE: Dino code assumes it can use *all* of the lmmio_space, + * elmmio_space and gmmio_space as a contiguous array of + * resources. This #define represents the array size */ + #define DINO_MAX_LMMIO_RESOURCES 3 + + unsigned long lmmio_space_offset; /* CPU view - PCI view */ + void * iommu; /* IOMMU this device is under */ + /* REVISIT - spinlock to protect resources? */ + + #define HBA_NAME_SIZE 16 + char io_name[HBA_NAME_SIZE]; + char lmmio_name[HBA_NAME_SIZE]; + char elmmio_name[HBA_NAME_SIZE]; + char gmmio_name[HBA_NAME_SIZE]; +}; + +#define HBA_DATA(d) ((struct pci_hba_data *) (d)) + +/* +** We support 2^16 I/O ports per HBA. These are set up in the form +** 0xbbxxxx, where bb is the bus number and xxxx is the I/O port +** space address. +*/ +#define HBA_PORT_SPACE_BITS 16 + +#define HBA_PORT_BASE(h) ((h) << HBA_PORT_SPACE_BITS) +#define HBA_PORT_SPACE_SIZE (1UL << HBA_PORT_SPACE_BITS) + +#define PCI_PORT_HBA(a) ((a) >> HBA_PORT_SPACE_BITS) +#define PCI_PORT_ADDR(a) ((a) & (HBA_PORT_SPACE_SIZE - 1)) + +#ifdef CONFIG_64BIT +#define PCI_F_EXTEND 0xffffffff00000000UL +#define PCI_IS_LMMIO(hba,a) pci_is_lmmio(hba,a) + +/* We need to know if an address is LMMMIO or GMMIO. + * LMMIO requires mangling and GMMIO we must use as-is. + */ +static __inline__ int pci_is_lmmio(struct pci_hba_data *hba, unsigned long a) +{ + return(((a) & PCI_F_EXTEND) == PCI_F_EXTEND); +} + +/* +** Convert between PCI (IO_VIEW) addresses and processor (PA_VIEW) addresses. +** See pci.c for more conversions used by Generic PCI code. +** +** Platform characteristics/firmware guarantee that +** (1) PA_VIEW - IO_VIEW = lmmio_offset for both LMMIO and ELMMIO +** (2) PA_VIEW == IO_VIEW for GMMIO +*/ +#define PCI_BUS_ADDR(hba,a) (PCI_IS_LMMIO(hba,a) \ + ? ((a) - hba->lmmio_space_offset) /* mangle LMMIO */ \ + : (a)) /* GMMIO */ +#define PCI_HOST_ADDR(hba,a) (((a) & PCI_F_EXTEND) == 0 \ + ? (a) + hba->lmmio_space_offset \ + : (a)) + +#else /* !CONFIG_64BIT */ + +#define PCI_BUS_ADDR(hba,a) (a) +#define PCI_HOST_ADDR(hba,a) (a) +#define PCI_F_EXTEND 0UL +#define PCI_IS_LMMIO(hba,a) (1) /* 32-bit doesn't support GMMIO */ + +#endif /* !CONFIG_64BIT */ + +/* +** KLUGE: linux/pci.h include asm/pci.h BEFORE declaring struct pci_bus +** (This eliminates some of the warnings). +*/ +struct pci_bus; +struct pci_dev; + +/* + * If the PCI device's view of memory is the same as the CPU's view of memory, + * PCI_DMA_BUS_IS_PHYS is true. The networking and block device layers use + * this boolean for bounce buffer decisions. + */ +#ifdef CONFIG_PA20 +/* All PA-2.0 machines have an IOMMU. */ +#define PCI_DMA_BUS_IS_PHYS 0 +#define parisc_has_iommu() do { } while (0) +#else + +#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA) +extern int parisc_bus_is_phys; /* in arch/parisc/kernel/setup.c */ +#define PCI_DMA_BUS_IS_PHYS parisc_bus_is_phys +#define parisc_has_iommu() do { parisc_bus_is_phys = 0; } while (0) +#else +#define PCI_DMA_BUS_IS_PHYS 1 +#define parisc_has_iommu() do { } while (0) +#endif + +#endif /* !CONFIG_PA20 */ + + +/* +** Most PCI devices (eg Tulip, NCR720) also export the same registers +** to both MMIO and I/O port space. Due to poor performance of I/O Port +** access under HP PCI bus adapters, strongly recommend the use of MMIO +** address space. +** +** While I'm at it more PA programming notes: +** +** 1) MMIO stores (writes) are posted operations. This means the processor +** gets an "ACK" before the write actually gets to the device. A read +** to the same device (or typically the bus adapter above it) will +** force in-flight write transaction(s) out to the targeted device +** before the read can complete. +** +** 2) The Programmed I/O (PIO) data may not always be strongly ordered with +** respect to DMA on all platforms. Ie PIO data can reach the processor +** before in-flight DMA reaches memory. Since most SMP PA platforms +** are I/O coherent, it generally doesn't matter...but sometimes +** it does. +** +** I've helped device driver writers debug both types of problems. +*/ +struct pci_port_ops { + u8 (*inb) (struct pci_hba_data *hba, u16 port); + u16 (*inw) (struct pci_hba_data *hba, u16 port); + u32 (*inl) (struct pci_hba_data *hba, u16 port); + void (*outb) (struct pci_hba_data *hba, u16 port, u8 data); + void (*outw) (struct pci_hba_data *hba, u16 port, u16 data); + void (*outl) (struct pci_hba_data *hba, u16 port, u32 data); +}; + + +struct pci_bios_ops { + void (*init)(void); + void (*fixup_bus)(struct pci_bus *bus); +}; + +/* pci_unmap_{single,page} is not a nop, thus... */ +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME; +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + __u32 LEN_NAME; +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) + +/* +** Stuff declared in arch/parisc/kernel/pci.c +*/ +extern struct pci_port_ops *pci_port; +extern struct pci_bios_ops *pci_bios; + +#ifdef CONFIG_PCI +extern void pcibios_register_hba(struct pci_hba_data *); +extern void pcibios_set_master(struct pci_dev *); +#else +static inline void pcibios_register_hba(struct pci_hba_data *x) +{ +} +#endif + +/* + * pcibios_assign_all_busses() is used in drivers/pci/pci.c:pci_do_scan_bus() + * 0 == check if bridge is numbered before re-numbering. + * 1 == pci_do_scan_bus() should automatically number all PCI-PCI bridges. + * + * We *should* set this to zero for "legacy" platforms and one + * for PAT platforms. + * + * But legacy platforms also need to renumber the busses below a Host + * Bus controller. Adding a 4-port Tulip card on the first PCI root + * bus of a C200 resulted in the secondary bus being numbered as 1. + * The second PCI host bus controller's root bus had already been + * assigned bus number 1 by firmware and sysfs complained. + * + * Firmware isn't doing anything wrong here since each controller + * is its own PCI domain. It's simpler and easier for us to renumber + * the busses rather than treat each Dino as a separate PCI domain. + * Eventually, we may want to introduce PCI domains for Superdome or + * rp7420/8420 boxes and then revisit this issue. + */ +#define pcibios_assign_all_busses() (1) +#define pcibios_scan_all_fns(a, b) (0) + +#define PCIBIOS_MIN_IO 0x10 +#define PCIBIOS_MIN_MEM 0x1000 /* NBPG - but pci/setup-res.c dies */ + +/* export the pci_ DMA API in terms of the dma_ one */ +#include + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + unsigned long cacheline_size; + u8 byte; + + pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte); + if (byte == 0) + cacheline_size = 1024; + else + cacheline_size = (int) byte * 4; + + *strat = PCI_DMA_BURST_MULTIPLE; + *strategy_parameter = cacheline_size; +} +#endif + +extern void +pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, + struct resource *res); + +extern void +pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, + struct pci_bus_region *region); + +static inline struct resource * +pcibios_select_root(struct pci_dev *pdev, struct resource *res) +{ + struct resource *root = NULL; + + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + + return root; +} + +static inline void pcibios_penalize_isa_irq(int irq, int active) +{ + /* We don't need to penalize isa irq's */ +} + +static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) +{ + return channel ? 15 : 14; +} + +#endif /* __ASM_PARISC_PCI_H */ diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h new file mode 100644 index 00000000000..46b75f9cce5 --- /dev/null +++ b/arch/parisc/include/asm/pdc.h @@ -0,0 +1,760 @@ +#ifndef _PARISC_PDC_H +#define _PARISC_PDC_H + +/* + * PDC return values ... + * All PDC calls return a subset of these errors. + */ + +#define PDC_WARN 3 /* Call completed with a warning */ +#define PDC_REQ_ERR_1 2 /* See above */ +#define PDC_REQ_ERR_0 1 /* Call would generate a requestor error */ +#define PDC_OK 0 /* Call completed successfully */ +#define PDC_BAD_PROC -1 /* Called non-existent procedure*/ +#define PDC_BAD_OPTION -2 /* Called with non-existent option */ +#define PDC_ERROR -3 /* Call could not complete without an error */ +#define PDC_NE_MOD -5 /* Module not found */ +#define PDC_NE_CELL_MOD -7 /* Cell module not found */ +#define PDC_INVALID_ARG -10 /* Called with an invalid argument */ +#define PDC_BUS_POW_WARN -12 /* Call could not complete in allowed power budget */ +#define PDC_NOT_NARROW -17 /* Narrow mode not supported */ + +/* + * PDC entry points... + */ + +#define PDC_POW_FAIL 1 /* perform a power-fail */ +#define PDC_POW_FAIL_PREPARE 0 /* prepare for powerfail */ + +#define PDC_CHASSIS 2 /* PDC-chassis functions */ +#define PDC_CHASSIS_DISP 0 /* update chassis display */ +#define PDC_CHASSIS_WARN 1 /* return chassis warnings */ +#define PDC_CHASSIS_DISPWARN 2 /* update&return chassis status */ +#define PDC_RETURN_CHASSIS_INFO 128 /* HVERSION dependent: return chassis LED/LCD info */ + +#define PDC_PIM 3 /* Get PIM data */ +#define PDC_PIM_HPMC 0 /* Transfer HPMC data */ +#define PDC_PIM_RETURN_SIZE 1 /* Get Max buffer needed for PIM*/ +#define PDC_PIM_LPMC 2 /* Transfer HPMC data */ +#define PDC_PIM_SOFT_BOOT 3 /* Transfer Soft Boot data */ +#define PDC_PIM_TOC 4 /* Transfer TOC data */ + +#define PDC_MODEL 4 /* PDC model information call */ +#define PDC_MODEL_INFO 0 /* returns information */ +#define PDC_MODEL_BOOTID 1 /* set the BOOT_ID */ +#define PDC_MODEL_VERSIONS 2 /* returns cpu-internal versions*/ +#define PDC_MODEL_SYSMODEL 3 /* return system model info */ +#define PDC_MODEL_ENSPEC 4 /* enable specific option */ +#define PDC_MODEL_DISPEC 5 /* disable specific option */ +#define PDC_MODEL_CPU_ID 6 /* returns cpu-id (only newer machines!) */ +#define PDC_MODEL_CAPABILITIES 7 /* returns OS32/OS64-flags */ +/* Values for PDC_MODEL_CAPABILITIES non-equivalent virtual aliasing support */ +#define PDC_MODEL_IOPDIR_FDC (1 << 2) +#define PDC_MODEL_NVA_MASK (3 << 4) +#define PDC_MODEL_NVA_SUPPORTED (0 << 4) +#define PDC_MODEL_NVA_SLOW (1 << 4) +#define PDC_MODEL_NVA_UNSUPPORTED (3 << 4) +#define PDC_MODEL_GET_BOOT__OP 8 /* returns boot test options */ +#define PDC_MODEL_SET_BOOT__OP 9 /* set boot test options */ + +#define PA89_INSTRUCTION_SET 0x4 /* capatibilies returned */ +#define PA90_INSTRUCTION_SET 0x8 + +#define PDC_CACHE 5 /* return/set cache (& TLB) info*/ +#define PDC_CACHE_INFO 0 /* returns information */ +#define PDC_CACHE_SET_COH 1 /* set coherence state */ +#define PDC_CACHE_RET_SPID 2 /* returns space-ID bits */ + +#define PDC_HPA 6 /* return HPA of processor */ +#define PDC_HPA_PROCESSOR 0 +#define PDC_HPA_MODULES 1 + +#define PDC_COPROC 7 /* Co-Processor (usually FP unit(s)) */ +#define PDC_COPROC_CFG 0 /* Co-Processor Cfg (FP unit(s) enabled?) */ + +#define PDC_IODC 8 /* talk to IODC */ +#define PDC_IODC_READ 0 /* read IODC entry point */ +/* PDC_IODC_RI_ * INDEX parameter of PDC_IODC_READ */ +#define PDC_IODC_RI_DATA_BYTES 0 /* IODC Data Bytes */ +/* 1, 2 obsolete - HVERSION dependent*/ +#define PDC_IODC_RI_INIT 3 /* Initialize module */ +#define PDC_IODC_RI_IO 4 /* Module input/output */ +#define PDC_IODC_RI_SPA 5 /* Module input/output */ +#define PDC_IODC_RI_CONFIG 6 /* Module input/output */ +/* 7 obsolete - HVERSION dependent */ +#define PDC_IODC_RI_TEST 8 /* Module input/output */ +#define PDC_IODC_RI_TLB 9 /* Module input/output */ +#define PDC_IODC_NINIT 2 /* non-destructive init */ +#define PDC_IODC_DINIT 3 /* destructive init */ +#define PDC_IODC_MEMERR 4 /* check for memory errors */ +#define PDC_IODC_INDEX_DATA 0 /* get first 16 bytes from mod IODC */ +#define PDC_IODC_BUS_ERROR -4 /* bus error return value */ +#define PDC_IODC_INVALID_INDEX -5 /* invalid index return value */ +#define PDC_IODC_COUNT -6 /* count is too small */ + +#define PDC_TOD 9 /* time-of-day clock (TOD) */ +#define PDC_TOD_READ 0 /* read TOD */ +#define PDC_TOD_WRITE 1 /* write TOD */ + + +#define PDC_STABLE 10 /* stable storage (sprockets) */ +#define PDC_STABLE_READ 0 +#define PDC_STABLE_WRITE 1 +#define PDC_STABLE_RETURN_SIZE 2 +#define PDC_STABLE_VERIFY_CONTENTS 3 +#define PDC_STABLE_INITIALIZE 4 + +#define PDC_NVOLATILE 11 /* often not implemented */ + +#define PDC_ADD_VALID 12 /* Memory validation PDC call */ +#define PDC_ADD_VALID_VERIFY 0 /* Make PDC_ADD_VALID verify region */ + +#define PDC_INSTR 15 /* get instr to invoke PDCE_CHECK() */ + +#define PDC_PROC 16 /* (sprockets) */ + +#define PDC_CONFIG 16 /* (sprockets) */ +#define PDC_CONFIG_DECONFIG 0 +#define PDC_CONFIG_DRECONFIG 1 +#define PDC_CONFIG_DRETURN_CONFIG 2 + +#define PDC_BLOCK_TLB 18 /* manage hardware block-TLB */ +#define PDC_BTLB_INFO 0 /* returns parameter */ +#define PDC_BTLB_INSERT 1 /* insert BTLB entry */ +#define PDC_BTLB_PURGE 2 /* purge BTLB entries */ +#define PDC_BTLB_PURGE_ALL 3 /* purge all BTLB entries */ + +#define PDC_TLB 19 /* manage hardware TLB miss handling */ +#define PDC_TLB_INFO 0 /* returns parameter */ +#define PDC_TLB_SETUP 1 /* set up miss handling */ + +#define PDC_MEM 20 /* Manage memory */ +#define PDC_MEM_MEMINFO 0 +#define PDC_MEM_ADD_PAGE 1 +#define PDC_MEM_CLEAR_PDT 2 +#define PDC_MEM_READ_PDT 3 +#define PDC_MEM_RESET_CLEAR 4 +#define PDC_MEM_GOODMEM 5 +#define PDC_MEM_TABLE 128 /* Non contig mem map (sprockets) */ +#define PDC_MEM_RETURN_ADDRESS_TABLE PDC_MEM_TABLE +#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES_SIZE 131 +#define PDC_MEM_GET_MEMORY_SYSTEM_TABLES 132 +#define PDC_MEM_GET_PHYSICAL_LOCATION_FROM_MEMORY_ADDRESS 133 + +#define PDC_MEM_RET_SBE_REPLACED 5 /* PDC_MEM return values */ +#define PDC_MEM_RET_DUPLICATE_ENTRY 4 +#define PDC_MEM_RET_BUF_SIZE_SMALL 1 +#define PDC_MEM_RET_PDT_FULL -11 +#define PDC_MEM_RET_INVALID_PHYSICAL_LOCATION ~0ULL + +#define PDC_PSW 21 /* Get/Set default System Mask */ +#define PDC_PSW_MASK 0 /* Return mask */ +#define PDC_PSW_GET_DEFAULTS 1 /* Return defaults */ +#define PDC_PSW_SET_DEFAULTS 2 /* Set default */ +#define PDC_PSW_ENDIAN_BIT 1 /* set for big endian */ +#define PDC_PSW_WIDE_BIT 2 /* set for wide mode */ + +#define PDC_SYSTEM_MAP 22 /* find system modules */ +#define PDC_FIND_MODULE 0 +#define PDC_FIND_ADDRESS 1 +#define PDC_TRANSLATE_PATH 2 + +#define PDC_SOFT_POWER 23 /* soft power switch */ +#define PDC_SOFT_POWER_INFO 0 /* return info about the soft power switch */ +#define PDC_SOFT_POWER_ENABLE 1 /* enable/disable soft power switch */ + + +/* HVERSION dependent */ + +/* The PDC_MEM_MAP calls */ +#define PDC_MEM_MAP 128 /* on s700: return page info */ +#define PDC_MEM_MAP_HPA 0 /* returns hpa of a module */ + +#define PDC_EEPROM 129 /* EEPROM access */ +#define PDC_EEPROM_READ_WORD 0 +#define PDC_EEPROM_WRITE_WORD 1 +#define PDC_EEPROM_READ_BYTE 2 +#define PDC_EEPROM_WRITE_BYTE 3 +#define PDC_EEPROM_EEPROM_PASSWORD -1000 + +#define PDC_NVM 130 /* NVM (non-volatile memory) access */ +#define PDC_NVM_READ_WORD 0 +#define PDC_NVM_WRITE_WORD 1 +#define PDC_NVM_READ_BYTE 2 +#define PDC_NVM_WRITE_BYTE 3 + +#define PDC_SEED_ERROR 132 /* (sprockets) */ + +#define PDC_IO 135 /* log error info, reset IO system */ +#define PDC_IO_READ_AND_CLEAR_ERRORS 0 +#define PDC_IO_RESET 1 +#define PDC_IO_RESET_DEVICES 2 +/* sets bits 6&7 (little endian) of the HcControl Register */ +#define PDC_IO_USB_SUSPEND 0xC000000000000000 +#define PDC_IO_EEPROM_IO_ERR_TABLE_FULL -5 /* return value */ +#define PDC_IO_NO_SUSPEND -6 /* return value */ + +#define PDC_BROADCAST_RESET 136 /* reset all processors */ +#define PDC_DO_RESET 0 /* option: perform a broadcast reset */ +#define PDC_DO_FIRM_TEST_RESET 1 /* Do broadcast reset with bitmap */ +#define PDC_BR_RECONFIGURATION 2 /* reset w/reconfiguration */ +#define PDC_FIRM_TEST_MAGIC 0xab9ec36fUL /* for this reboot only */ + +#define PDC_LAN_STATION_ID 138 /* Hversion dependent mechanism for */ +#define PDC_LAN_STATION_ID_READ 0 /* getting the lan station address */ + +#define PDC_LAN_STATION_ID_SIZE 6 + +#define PDC_CHECK_RANGES 139 /* (sprockets) */ + +#define PDC_NV_SECTIONS 141 /* (sprockets) */ + +#define PDC_PERFORMANCE 142 /* performance monitoring */ + +#define PDC_SYSTEM_INFO 143 /* system information */ +#define PDC_SYSINFO_RETURN_INFO_SIZE 0 +#define PDC_SYSINFO_RRETURN_SYS_INFO 1 +#define PDC_SYSINFO_RRETURN_ERRORS 2 +#define PDC_SYSINFO_RRETURN_WARNINGS 3 +#define PDC_SYSINFO_RETURN_REVISIONS 4 +#define PDC_SYSINFO_RRETURN_DIAGNOSE 5 +#define PDC_SYSINFO_RRETURN_HV_DIAGNOSE 1005 + +#define PDC_RDR 144 /* (sprockets) */ +#define PDC_RDR_READ_BUFFER 0 +#define PDC_RDR_READ_SINGLE 1 +#define PDC_RDR_WRITE_SINGLE 2 + +#define PDC_INTRIGUE 145 /* (sprockets) */ +#define PDC_INTRIGUE_WRITE_BUFFER 0 +#define PDC_INTRIGUE_GET_SCRATCH_BUFSIZE 1 +#define PDC_INTRIGUE_START_CPU_COUNTERS 2 +#define PDC_INTRIGUE_STOP_CPU_COUNTERS 3 + +#define PDC_STI 146 /* STI access */ +/* same as PDC_PCI_XXX values (see below) */ + +/* Legacy PDC definitions for same stuff */ +#define PDC_PCI_INDEX 147 +#define PDC_PCI_INTERFACE_INFO 0 +#define PDC_PCI_SLOT_INFO 1 +#define PDC_PCI_INFLIGHT_BYTES 2 +#define PDC_PCI_READ_CONFIG 3 +#define PDC_PCI_WRITE_CONFIG 4 +#define PDC_PCI_READ_PCI_IO 5 +#define PDC_PCI_WRITE_PCI_IO 6 +#define PDC_PCI_READ_CONFIG_DELAY 7 +#define PDC_PCI_UPDATE_CONFIG_DELAY 8 +#define PDC_PCI_PCI_PATH_TO_PCI_HPA 9 +#define PDC_PCI_PCI_HPA_TO_PCI_PATH 10 +#define PDC_PCI_PCI_PATH_TO_PCI_BUS 11 +#define PDC_PCI_PCI_RESERVED 12 +#define PDC_PCI_PCI_INT_ROUTE_SIZE 13 +#define PDC_PCI_GET_INT_TBL_SIZE PDC_PCI_PCI_INT_ROUTE_SIZE +#define PDC_PCI_PCI_INT_ROUTE 14 +#define PDC_PCI_GET_INT_TBL PDC_PCI_PCI_INT_ROUTE +#define PDC_PCI_READ_MON_TYPE 15 +#define PDC_PCI_WRITE_MON_TYPE 16 + + +/* Get SCSI Interface Card info: SDTR, SCSI ID, mode (SE vs LVD) */ +#define PDC_INITIATOR 163 +#define PDC_GET_INITIATOR 0 +#define PDC_SET_INITIATOR 1 +#define PDC_DELETE_INITIATOR 2 +#define PDC_RETURN_TABLE_SIZE 3 +#define PDC_RETURN_TABLE 4 + +#define PDC_LINK 165 /* (sprockets) */ +#define PDC_LINK_PCI_ENTRY_POINTS 0 /* list (Arg1) = 0 */ +#define PDC_LINK_USB_ENTRY_POINTS 1 /* list (Arg1) = 1 */ + +/* cl_class + * page 3-33 of IO-Firmware ARS + * IODC ENTRY_INIT(Search first) RET[1] + */ +#define CL_NULL 0 /* invalid */ +#define CL_RANDOM 1 /* random access (as disk) */ +#define CL_SEQU 2 /* sequential access (as tape) */ +#define CL_DUPLEX 7 /* full-duplex point-to-point (RS-232, Net) */ +#define CL_KEYBD 8 /* half-duplex console (HIL Keyboard) */ +#define CL_DISPL 9 /* half-duplex console (display) */ +#define CL_FC 10 /* FiberChannel access media */ + +/* IODC ENTRY_INIT() */ +#define ENTRY_INIT_SRCH_FRST 2 +#define ENTRY_INIT_SRCH_NEXT 3 +#define ENTRY_INIT_MOD_DEV 4 +#define ENTRY_INIT_DEV 5 +#define ENTRY_INIT_MOD 6 +#define ENTRY_INIT_MSG 9 + +/* IODC ENTRY_IO() */ +#define ENTRY_IO_BOOTIN 0 +#define ENTRY_IO_BOOTOUT 1 +#define ENTRY_IO_CIN 2 +#define ENTRY_IO_COUT 3 +#define ENTRY_IO_CLOSE 4 +#define ENTRY_IO_GETMSG 9 +#define ENTRY_IO_BBLOCK_IN 16 +#define ENTRY_IO_BBLOCK_OUT 17 + +/* IODC ENTRY_SPA() */ + +/* IODC ENTRY_CONFIG() */ + +/* IODC ENTRY_TEST() */ + +/* IODC ENTRY_TLB() */ + +/* constants for OS (NVM...) */ +#define OS_ID_NONE 0 /* Undefined OS ID */ +#define OS_ID_HPUX 1 /* HP-UX OS */ +#define OS_ID_MPEXL 2 /* MPE XL OS */ +#define OS_ID_OSF 3 /* OSF OS */ +#define OS_ID_HPRT 4 /* HP-RT OS */ +#define OS_ID_NOVEL 5 /* NOVELL OS */ +#define OS_ID_LINUX 6 /* Linux */ + + +/* constants for PDC_CHASSIS */ +#define OSTAT_OFF 0 +#define OSTAT_FLT 1 +#define OSTAT_TEST 2 +#define OSTAT_INIT 3 +#define OSTAT_SHUT 4 +#define OSTAT_WARN 5 +#define OSTAT_RUN 6 +#define OSTAT_ON 7 + +/* Page Zero constant offsets used by the HPMC handler */ +#define BOOT_CONSOLE_HPA_OFFSET 0x3c0 +#define BOOT_CONSOLE_SPA_OFFSET 0x3c4 +#define BOOT_CONSOLE_PATH_OFFSET 0x3a8 + +/* size of the pdc_result buffer for firmware.c */ +#define NUM_PDC_RESULT 32 + +#if !defined(__ASSEMBLY__) +#ifdef __KERNEL__ + +#include + +extern int pdc_type; + +/* Values for pdc_type */ +#define PDC_TYPE_ILLEGAL -1 +#define PDC_TYPE_PAT 0 /* 64-bit PAT-PDC */ +#define PDC_TYPE_SYSTEM_MAP 1 /* 32-bit, but supports PDC_SYSTEM_MAP */ +#define PDC_TYPE_SNAKE 2 /* Doesn't support SYSTEM_MAP */ + +struct pdc_chassis_info { /* for PDC_CHASSIS_INFO */ + unsigned long actcnt; /* actual number of bytes returned */ + unsigned long maxcnt; /* maximum number of bytes that could be returned */ +}; + +struct pdc_coproc_cfg { /* for PDC_COPROC_CFG */ + unsigned long ccr_functional; + unsigned long ccr_present; + unsigned long revision; + unsigned long model; +}; + +struct pdc_model { /* for PDC_MODEL */ + unsigned long hversion; + unsigned long sversion; + unsigned long hw_id; + unsigned long boot_id; + unsigned long sw_id; + unsigned long sw_cap; + unsigned long arch_rev; + unsigned long pot_key; + unsigned long curr_key; +}; + +struct pdc_cache_cf { /* for PDC_CACHE (I/D-caches) */ + unsigned long +#ifdef CONFIG_64BIT + cc_padW:32, +#endif + cc_alias: 4, /* alias boundaries for virtual addresses */ + cc_block: 4, /* to determine most efficient stride */ + cc_line : 3, /* maximum amount written back as a result of store (multiple of 16 bytes) */ + cc_shift: 2, /* how much to shift cc_block left */ + cc_wt : 1, /* 0 = WT-Dcache, 1 = WB-Dcache */ + cc_sh : 2, /* 0 = separate I/D-cache, else shared I/D-cache */ + cc_cst : 3, /* 0 = incoherent D-cache, 1=coherent D-cache */ + cc_pad1 : 10, /* reserved */ + cc_hv : 3; /* hversion dependent */ +}; + +struct pdc_tlb_cf { /* for PDC_CACHE (I/D-TLB's) */ + unsigned long tc_pad0:12, /* reserved */ +#ifdef CONFIG_64BIT + tc_padW:32, +#endif + tc_sh : 2, /* 0 = separate I/D-TLB, else shared I/D-TLB */ + tc_hv : 1, /* HV */ + tc_page : 1, /* 0 = 2K page-size-machine, 1 = 4k page size */ + tc_cst : 3, /* 0 = incoherent operations, else coherent operations */ + tc_aid : 5, /* ITLB: width of access ids of processor (encoded!) */ + tc_pad1 : 8; /* ITLB: width of space-registers (encoded) */ +}; + +struct pdc_cache_info { /* main-PDC_CACHE-structure (caches & TLB's) */ + /* I-cache */ + unsigned long ic_size; /* size in bytes */ + struct pdc_cache_cf ic_conf; /* configuration */ + unsigned long ic_base; /* base-addr */ + unsigned long ic_stride; + unsigned long ic_count; + unsigned long ic_loop; + /* D-cache */ + unsigned long dc_size; /* size in bytes */ + struct pdc_cache_cf dc_conf; /* configuration */ + unsigned long dc_base; /* base-addr */ + unsigned long dc_stride; + unsigned long dc_count; + unsigned long dc_loop; + /* Instruction-TLB */ + unsigned long it_size; /* number of entries in I-TLB */ + struct pdc_tlb_cf it_conf; /* I-TLB-configuration */ + unsigned long it_sp_base; + unsigned long it_sp_stride; + unsigned long it_sp_count; + unsigned long it_off_base; + unsigned long it_off_stride; + unsigned long it_off_count; + unsigned long it_loop; + /* data-TLB */ + unsigned long dt_size; /* number of entries in D-TLB */ + struct pdc_tlb_cf dt_conf; /* D-TLB-configuration */ + unsigned long dt_sp_base; + unsigned long dt_sp_stride; + unsigned long dt_sp_count; + unsigned long dt_off_base; + unsigned long dt_off_stride; + unsigned long dt_off_count; + unsigned long dt_loop; +}; + +#if 0 +/* If you start using the next struct, you'll have to adjust it to + * work with 64-bit firmware I think -PB + */ +struct pdc_iodc { /* PDC_IODC */ + unsigned char hversion_model; + unsigned char hversion; + unsigned char spa; + unsigned char type; + unsigned int sversion_rev:4; + unsigned int sversion_model:19; + unsigned int sversion_opt:8; + unsigned char rev; + unsigned char dep; + unsigned char features; + unsigned char pad1; + unsigned int checksum:16; + unsigned int length:16; + unsigned int pad[15]; +} __attribute__((aligned(8))) ; +#endif + +#ifndef CONFIG_PA20 +/* no BLTBs in pa2.0 processors */ +struct pdc_btlb_info_range { + __u8 res00; + __u8 num_i; + __u8 num_d; + __u8 num_comb; +}; + +struct pdc_btlb_info { /* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */ + unsigned int min_size; /* minimum size of BTLB in pages */ + unsigned int max_size; /* maximum size of BTLB in pages */ + struct pdc_btlb_info_range fixed_range_info; + struct pdc_btlb_info_range variable_range_info; +}; + +#endif /* !CONFIG_PA20 */ + +#ifdef CONFIG_64BIT +struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */ + unsigned long entries_returned; + unsigned long entries_total; +}; + +struct pdc_memory_table { /* PDC_MEM/PDC_MEM_TABLE (arguments) */ + unsigned long paddr; + unsigned int pages; + unsigned int reserved; +}; +#endif /* CONFIG_64BIT */ + +struct pdc_system_map_mod_info { /* PDC_SYSTEM_MAP/FIND_MODULE */ + unsigned long mod_addr; + unsigned long mod_pgs; + unsigned long add_addrs; +}; + +struct pdc_system_map_addr_info { /* PDC_SYSTEM_MAP/FIND_ADDRESS */ + unsigned long mod_addr; + unsigned long mod_pgs; +}; + +struct pdc_initiator { /* PDC_INITIATOR */ + int host_id; + int factor; + int width; + int mode; +}; + +struct hardware_path { + char flags; /* see bit definitions below */ + char bc[6]; /* Bus Converter routing info to a specific */ + /* I/O adaptor (< 0 means none, > 63 resvd) */ + char mod; /* fixed field of specified module */ +}; + +/* + * Device path specifications used by PDC. + */ +struct pdc_module_path { + struct hardware_path path; + unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */ +}; + +#ifndef CONFIG_PA20 +/* Only used on some pre-PA2.0 boxes */ +struct pdc_memory_map { /* PDC_MEMORY_MAP */ + unsigned long hpa; /* mod's register set address */ + unsigned long more_pgs; /* number of additional I/O pgs */ +}; +#endif + +struct pdc_tod { + unsigned long tod_sec; + unsigned long tod_usec; +}; + +/* architected results from PDC_PIM/transfer hpmc on a PA1.1 machine */ + +struct pdc_hpmc_pim_11 { /* PDC_PIM */ + __u32 gr[32]; + __u32 cr[32]; + __u32 sr[8]; + __u32 iasq_back; + __u32 iaoq_back; + __u32 check_type; + __u32 cpu_state; + __u32 rsvd1; + __u32 cache_check; + __u32 tlb_check; + __u32 bus_check; + __u32 assists_check; + __u32 rsvd2; + __u32 assist_state; + __u32 responder_addr; + __u32 requestor_addr; + __u32 path_info; + __u64 fr[32]; +}; + +/* + * architected results from PDC_PIM/transfer hpmc on a PA2.0 machine + * + * Note that PDC_PIM doesn't care whether or not wide mode was enabled + * so the results are different on PA1.1 vs. PA2.0 when in narrow mode. + * + * Note also that there are unarchitected results available, which + * are hversion dependent. Do a "ser pim 0 hpmc" after rebooting, since + * the firmware is probably the best way of printing hversion dependent + * data. + */ + +struct pdc_hpmc_pim_20 { /* PDC_PIM */ + __u64 gr[32]; + __u64 cr[32]; + __u64 sr[8]; + __u64 iasq_back; + __u64 iaoq_back; + __u32 check_type; + __u32 cpu_state; + __u32 cache_check; + __u32 tlb_check; + __u32 bus_check; + __u32 assists_check; + __u32 assist_state; + __u32 path_info; + __u64 responder_addr; + __u64 requestor_addr; + __u64 fr[32]; +}; + +void pdc_console_init(void); /* in pdc_console.c */ +void pdc_console_restart(void); + +void setup_pdc(void); /* in inventory.c */ + +/* wrapper-functions from pdc.c */ + +int pdc_add_valid(unsigned long address); +int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsigned long len); +int pdc_chassis_disp(unsigned long disp); +int pdc_chassis_warn(unsigned long *warn); +int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info); +int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index, + void *iodc_data, unsigned int iodc_data_size); +int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info, + struct pdc_module_path *mod_path, long mod_index); +int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info, + long mod_index, long addr_index); +int pdc_model_info(struct pdc_model *model); +int pdc_model_sysmodel(char *name); +int pdc_model_cpuid(unsigned long *cpu_id); +int pdc_model_versions(unsigned long *versions, int id); +int pdc_model_capabilities(unsigned long *capabilities); +int pdc_cache_info(struct pdc_cache_info *cache); +int pdc_spaceid_bits(unsigned long *space_bits); +#ifndef CONFIG_PA20 +int pdc_btlb_info(struct pdc_btlb_info *btlb); +int pdc_mem_map_hpa(struct pdc_memory_map *r_addr, struct pdc_module_path *mod_path); +#endif /* !CONFIG_PA20 */ +int pdc_lan_station_id(char *lan_addr, unsigned long net_hpa); + +int pdc_stable_read(unsigned long staddr, void *memaddr, unsigned long count); +int pdc_stable_write(unsigned long staddr, void *memaddr, unsigned long count); +int pdc_stable_get_size(unsigned long *size); +int pdc_stable_verify_contents(void); +int pdc_stable_initialize(void); + +int pdc_pci_irt_size(unsigned long *num_entries, unsigned long hpa); +int pdc_pci_irt(unsigned long num_entries, unsigned long hpa, void *tbl); + +int pdc_get_initiator(struct hardware_path *, struct pdc_initiator *); +int pdc_tod_read(struct pdc_tod *tod); +int pdc_tod_set(unsigned long sec, unsigned long usec); + +#ifdef CONFIG_64BIT +int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr, + struct pdc_memory_table *tbl, unsigned long entries); +#endif + +void set_firmware_width(void); +int pdc_do_firm_test_reset(unsigned long ftc_bitmap); +int pdc_do_reset(void); +int pdc_soft_power_info(unsigned long *power_reg); +int pdc_soft_power_button(int sw_control); +void pdc_io_reset(void); +void pdc_io_reset_devices(void); +int pdc_iodc_getc(void); +int pdc_iodc_print(const unsigned char *str, unsigned count); + +void pdc_emergency_unlock(void); +int pdc_sti_call(unsigned long func, unsigned long flags, + unsigned long inptr, unsigned long outputr, + unsigned long glob_cfg); + +static inline char * os_id_to_string(u16 os_id) { + switch(os_id) { + case OS_ID_NONE: return "No OS"; + case OS_ID_HPUX: return "HP-UX"; + case OS_ID_MPEXL: return "MPE-iX"; + case OS_ID_OSF: return "OSF"; + case OS_ID_HPRT: return "HP-RT"; + case OS_ID_NOVEL: return "Novell Netware"; + case OS_ID_LINUX: return "Linux"; + default: return "Unknown"; + } +} + +#endif /* __KERNEL__ */ + +#define PAGE0 ((struct zeropage *)__PAGE_OFFSET) + +/* DEFINITION OF THE ZERO-PAGE (PAG0) */ +/* based on work by Jason Eckhardt (jason@equator.com) */ + +/* flags of the device_path */ +#define PF_AUTOBOOT 0x80 +#define PF_AUTOSEARCH 0x40 +#define PF_TIMER 0x0F + +struct device_path { /* page 1-69 */ + unsigned char flags; /* flags see above! */ + unsigned char bc[6]; /* bus converter routing info */ + unsigned char mod; + unsigned int layers[6];/* device-specific layer-info */ +} __attribute__((aligned(8))) ; + +struct pz_device { + struct device_path dp; /* see above */ + /* struct iomod *hpa; */ + unsigned int hpa; /* HPA base address */ + /* char *spa; */ + unsigned int spa; /* SPA base address */ + /* int (*iodc_io)(struct iomod*, ...); */ + unsigned int iodc_io; /* device entry point */ + short pad; /* reserved */ + unsigned short cl_class;/* see below */ +} __attribute__((aligned(8))) ; + +struct zeropage { + /* [0x000] initialize vectors (VEC) */ + unsigned int vec_special; /* must be zero */ + /* int (*vec_pow_fail)(void);*/ + unsigned int vec_pow_fail; /* power failure handler */ + /* int (*vec_toc)(void); */ + unsigned int vec_toc; + unsigned int vec_toclen; + /* int (*vec_rendz)(void); */ + unsigned int vec_rendz; + int vec_pow_fail_flen; + int vec_pad[10]; + + /* [0x040] reserved processor dependent */ + int pad0[112]; + + /* [0x200] reserved */ + int pad1[84]; + + /* [0x350] memory configuration (MC) */ + int memc_cont; /* contiguous mem size (bytes) */ + int memc_phsize; /* physical memory size */ + int memc_adsize; /* additional mem size, bytes of SPA space used by PDC */ + unsigned int mem_pdc_hi; /* used for 64-bit */ + + /* [0x360] various parameters for the boot-CPU */ + /* unsigned int *mem_booterr[8]; */ + unsigned int mem_booterr[8]; /* ptr to boot errors */ + unsigned int mem_free; /* first location, where OS can be loaded */ + /* struct iomod *mem_hpa; */ + unsigned int mem_hpa; /* HPA of the boot-CPU */ + /* int (*mem_pdc)(int, ...); */ + unsigned int mem_pdc; /* PDC entry point */ + unsigned int mem_10msec; /* number of clock ticks in 10msec */ + + /* [0x390] initial memory module (IMM) */ + /* struct iomod *imm_hpa; */ + unsigned int imm_hpa; /* HPA of the IMM */ + int imm_soft_boot; /* 0 = was hard boot, 1 = was soft boot */ + unsigned int imm_spa_size; /* SPA size of the IMM in bytes */ + unsigned int imm_max_mem; /* bytes of mem in IMM */ + + /* [0x3A0] boot console, display device and keyboard */ + struct pz_device mem_cons; /* description of console device */ + struct pz_device mem_boot; /* description of boot device */ + struct pz_device mem_kbd; /* description of keyboard device */ + + /* [0x430] reserved */ + int pad430[116]; + + /* [0x600] processor dependent */ + __u32 pad600[1]; + __u32 proc_sti; /* pointer to STI ROM */ + __u32 pad608[126]; +}; + +#endif /* !defined(__ASSEMBLY__) */ + +#endif /* _PARISC_PDC_H */ diff --git a/arch/parisc/include/asm/pdc_chassis.h b/arch/parisc/include/asm/pdc_chassis.h new file mode 100644 index 00000000000..a609273dc6b --- /dev/null +++ b/arch/parisc/include/asm/pdc_chassis.h @@ -0,0 +1,381 @@ +/* + * include/asm-parisc/pdc_chassis.h + * + * Copyright (C) 2002 Laurent Canet + * Copyright (C) 2002 Thibaut Varene + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * TODO: - handle processor number on SMP systems (Reporting Entity ID) + * - handle message ID + * - handle timestamps + */ + + +#ifndef _PARISC_PDC_CHASSIS_H +#define _PARISC_PDC_CHASSIS_H + +/* + * ---------- + * Prototypes + * ---------- + */ + +int pdc_chassis_send_status(int message); +void parisc_pdc_chassis_init(void); + + +/* + * ----------------- + * Direct call names + * ----------------- + * They setup everything for you, the Log message and the corresponding LED state + */ + +#define PDC_CHASSIS_DIRECT_BSTART 0 +#define PDC_CHASSIS_DIRECT_BCOMPLETE 1 +#define PDC_CHASSIS_DIRECT_SHUTDOWN 2 +#define PDC_CHASSIS_DIRECT_PANIC 3 +#define PDC_CHASSIS_DIRECT_HPMC 4 +#define PDC_CHASSIS_DIRECT_LPMC 5 +#define PDC_CHASSIS_DIRECT_DUMP 6 /* not yet implemented */ +#define PDC_CHASSIS_DIRECT_OOPS 7 /* not yet implemented */ + + +/* + * ------------ + * LEDs control + * ------------ + * Set the three LEDs -- Run, Attn, and Fault. + */ + +/* Old PDC LED control */ +#define PDC_CHASSIS_DISP_DATA(v) ((unsigned long)(v) << 17) + +/* + * Available PDC PAT LED states + */ + +#define PDC_CHASSIS_LED_RUN_OFF (0ULL << 4) +#define PDC_CHASSIS_LED_RUN_FLASH (1ULL << 4) +#define PDC_CHASSIS_LED_RUN_ON (2ULL << 4) +#define PDC_CHASSIS_LED_RUN_NC (3ULL << 4) +#define PDC_CHASSIS_LED_ATTN_OFF (0ULL << 6) +#define PDC_CHASSIS_LED_ATTN_FLASH (1ULL << 6) +#define PDC_CHASSIS_LED_ATTN_NC (3ULL << 6) /* ATTN ON is invalid */ +#define PDC_CHASSIS_LED_FAULT_OFF (0ULL << 8) +#define PDC_CHASSIS_LED_FAULT_FLASH (1ULL << 8) +#define PDC_CHASSIS_LED_FAULT_ON (2ULL << 8) +#define PDC_CHASSIS_LED_FAULT_NC (3ULL << 8) +#define PDC_CHASSIS_LED_VALID (1ULL << 10) + +/* + * Valid PDC PAT LED states combinations + */ + +/* System running normally */ +#define PDC_CHASSIS_LSTATE_RUN_NORMAL (PDC_CHASSIS_LED_RUN_ON | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_OFF | \ + PDC_CHASSIS_LED_VALID ) +/* System crashed and rebooted itself successfully */ +#define PDC_CHASSIS_LSTATE_RUN_CRASHREC (PDC_CHASSIS_LED_RUN_ON | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_FLASH | \ + PDC_CHASSIS_LED_VALID ) +/* There was a system interruption that did not take the system down */ +#define PDC_CHASSIS_LSTATE_RUN_SYSINT (PDC_CHASSIS_LED_RUN_ON | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_OFF | \ + PDC_CHASSIS_LED_VALID ) +/* System running and unexpected reboot or non-critical error detected */ +#define PDC_CHASSIS_LSTATE_RUN_NCRIT (PDC_CHASSIS_LED_RUN_ON | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_FLASH | \ + PDC_CHASSIS_LED_VALID ) +/* Executing non-OS code */ +#define PDC_CHASSIS_LSTATE_NONOS (PDC_CHASSIS_LED_RUN_FLASH | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_OFF | \ + PDC_CHASSIS_LED_VALID ) +/* Boot failed - Executing non-OS code */ +#define PDC_CHASSIS_LSTATE_NONOS_BFAIL (PDC_CHASSIS_LED_RUN_FLASH | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_ON | \ + PDC_CHASSIS_LED_VALID ) +/* Unexpected reboot occurred - Executing non-OS code */ +#define PDC_CHASSIS_LSTATE_NONOS_UNEXP (PDC_CHASSIS_LED_RUN_FLASH | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_FLASH | \ + PDC_CHASSIS_LED_VALID ) +/* Executing non-OS code - Non-critical error detected */ +#define PDC_CHASSIS_LSTATE_NONOS_NCRIT (PDC_CHASSIS_LED_RUN_FLASH | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_OFF | \ + PDC_CHASSIS_LED_VALID ) +/* Boot failed - Executing non-OS code - Non-critical error detected */ +#define PDC_CHASSIS_LSTATE_BFAIL_NCRIT (PDC_CHASSIS_LED_RUN_FLASH | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_ON | \ + PDC_CHASSIS_LED_VALID ) +/* Unexpected reboot/recovering - Executing non-OS code - Non-critical error detected */ +#define PDC_CHASSIS_LSTATE_UNEXP_NCRIT (PDC_CHASSIS_LED_RUN_FLASH | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_FLASH | \ + PDC_CHASSIS_LED_VALID ) +/* Cannot execute PDC */ +#define PDC_CHASSIS_LSTATE_CANNOT_PDC (PDC_CHASSIS_LED_RUN_OFF | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_OFF | \ + PDC_CHASSIS_LED_VALID ) +/* Boot failed - OS not up - PDC has detected a failure that prevents boot */ +#define PDC_CHASSIS_LSTATE_FATAL_BFAIL (PDC_CHASSIS_LED_RUN_OFF | \ + PDC_CHASSIS_LED_ATTN_OFF | \ + PDC_CHASSIS_LED_FAULT_ON | \ + PDC_CHASSIS_LED_VALID ) +/* No code running - Non-critical error detected (double fault situation) */ +#define PDC_CHASSIS_LSTATE_NOCODE_NCRIT (PDC_CHASSIS_LED_RUN_OFF | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_OFF | \ + PDC_CHASSIS_LED_VALID ) +/* Boot failed - OS not up - Fatal failure detected - Non-critical error detected */ +#define PDC_CHASSIS_LSTATE_FATAL_NCRIT (PDC_CHASSIS_LED_RUN_OFF | \ + PDC_CHASSIS_LED_ATTN_FLASH | \ + PDC_CHASSIS_LED_FAULT_ON | \ + PDC_CHASSIS_LED_VALID ) +/* All other states are invalid */ + + +/* + * -------------- + * PDC Log events + * -------------- + * Here follows bits needed to fill up the log event sent to PDC_CHASSIS + * The log message contains: Alert level, Source, Source detail, + * Source ID, Problem detail, Caller activity, Activity status, + * Caller subactivity, Reporting entity type, Reporting entity ID, + * Data type, Unique message ID and EOM. + */ + +/* Alert level */ +#define PDC_CHASSIS_ALERT_FORWARD (0ULL << 36) /* no failure detected */ +#define PDC_CHASSIS_ALERT_SERPROC (1ULL << 36) /* service proc - no failure */ +#define PDC_CHASSIS_ALERT_NURGENT (2ULL << 36) /* non-urgent operator attn */ +#define PDC_CHASSIS_ALERT_BLOCKED (3ULL << 36) /* system blocked */ +#define PDC_CHASSIS_ALERT_CONF_CHG (4ULL << 36) /* unexpected configuration change */ +#define PDC_CHASSIS_ALERT_ENV_PB (5ULL << 36) /* boot possible, environmental pb */ +#define PDC_CHASSIS_ALERT_PENDING (6ULL << 36) /* boot possible, pending failure */ +#define PDC_CHASSIS_ALERT_PERF_IMP (8ULL << 36) /* boot possible, performance impaired */ +#define PDC_CHASSIS_ALERT_FUNC_IMP (10ULL << 36) /* boot possible, functionality impaired */ +#define PDC_CHASSIS_ALERT_SOFT_FAIL (12ULL << 36) /* software failure */ +#define PDC_CHASSIS_ALERT_HANG (13ULL << 36) /* system hang */ +#define PDC_CHASSIS_ALERT_ENV_FATAL (14ULL << 36) /* fatal power or environmental pb */ +#define PDC_CHASSIS_ALERT_HW_FATAL (15ULL << 36) /* fatal hardware problem */ + +/* Source */ +#define PDC_CHASSIS_SRC_NONE (0ULL << 28) /* unknown, no source stated */ +#define PDC_CHASSIS_SRC_PROC (1ULL << 28) /* processor */ +/* For later use ? */ +#define PDC_CHASSIS_SRC_PROC_CACHE (2ULL << 28) /* processor cache*/ +#define PDC_CHASSIS_SRC_PDH (3ULL << 28) /* processor dependent hardware */ +#define PDC_CHASSIS_SRC_PWR (4ULL << 28) /* power */ +#define PDC_CHASSIS_SRC_FAB (5ULL << 28) /* fabric connector */ +#define PDC_CHASSIS_SRC_PLATi (6ULL << 28) /* platform */ +#define PDC_CHASSIS_SRC_MEM (7ULL << 28) /* memory */ +#define PDC_CHASSIS_SRC_IO (8ULL << 28) /* I/O */ +#define PDC_CHASSIS_SRC_CELL (9ULL << 28) /* cell */ +#define PDC_CHASSIS_SRC_PD (10ULL << 28) /* protected domain */ + +/* Source detail field */ +#define PDC_CHASSIS_SRC_D_PROC (1ULL << 24) /* processor general */ + +/* Source ID - platform dependent */ +#define PDC_CHASSIS_SRC_ID_UNSPEC (0ULL << 16) + +/* Problem detail - problem source dependent */ +#define PDC_CHASSIS_PB_D_PROC_NONE (0ULL << 32) /* no problem detail */ +#define PDC_CHASSIS_PB_D_PROC_TIMEOUT (4ULL << 32) /* timeout */ + +/* Caller activity */ +#define PDC_CHASSIS_CALL_ACT_HPUX_BL (7ULL << 12) /* Boot Loader */ +#define PDC_CHASSIS_CALL_ACT_HPUX_PD (8ULL << 12) /* SAL_PD activities */ +#define PDC_CHASSIS_CALL_ACT_HPUX_EVENT (9ULL << 12) /* SAL_EVENTS activities */ +#define PDC_CHASSIS_CALL_ACT_HPUX_IO (10ULL << 12) /* SAL_IO activities */ +#define PDC_CHASSIS_CALL_ACT_HPUX_PANIC (11ULL << 12) /* System panic */ +#define PDC_CHASSIS_CALL_ACT_HPUX_INIT (12ULL << 12) /* System initialization */ +#define PDC_CHASSIS_CALL_ACT_HPUX_SHUT (13ULL << 12) /* System shutdown */ +#define PDC_CHASSIS_CALL_ACT_HPUX_WARN (14ULL << 12) /* System warning */ +#define PDC_CHASSIS_CALL_ACT_HPUX_DU (15ULL << 12) /* Display_Activity() update */ + +/* Activity status - implementation dependent */ +#define PDC_CHASSIS_ACT_STATUS_UNSPEC (0ULL << 0) + +/* Caller subactivity - implementation dependent */ +/* FIXME: other subactivities ? */ +#define PDC_CHASSIS_CALL_SACT_UNSPEC (0ULL << 4) /* implementation dependent */ + +/* Reporting entity type */ +#define PDC_CHASSIS_RET_GENERICOS (12ULL << 52) /* generic OSes */ +#define PDC_CHASSIS_RET_IA64_NT (13ULL << 52) /* IA-64 NT */ +#define PDC_CHASSIS_RET_HPUX (14ULL << 52) /* HP-UX */ +#define PDC_CHASSIS_RET_DIAG (15ULL << 52) /* offline diagnostics & utilities */ + +/* Reporting entity ID */ +#define PDC_CHASSIS_REID_UNSPEC (0ULL << 44) + +/* Data type */ +#define PDC_CHASSIS_DT_NONE (0ULL << 59) /* data field unused */ +/* For later use ? Do we need these ? */ +#define PDC_CHASSIS_DT_PHYS_ADDR (1ULL << 59) /* physical address */ +#define PDC_CHASSIS_DT_DATA_EXPECT (2ULL << 59) /* expected data */ +#define PDC_CHASSIS_DT_ACTUAL (3ULL << 59) /* actual data */ +#define PDC_CHASSIS_DT_PHYS_LOC (4ULL << 59) /* physical location */ +#define PDC_CHASSIS_DT_PHYS_LOC_EXT (5ULL << 59) /* physical location extension */ +#define PDC_CHASSIS_DT_TAG (6ULL << 59) /* tag */ +#define PDC_CHASSIS_DT_SYNDROME (7ULL << 59) /* syndrome */ +#define PDC_CHASSIS_DT_CODE_ADDR (8ULL << 59) /* code address */ +#define PDC_CHASSIS_DT_ASCII_MSG (9ULL << 59) /* ascii message */ +#define PDC_CHASSIS_DT_POST (10ULL << 59) /* POST code */ +#define PDC_CHASSIS_DT_TIMESTAMP (11ULL << 59) /* timestamp */ +#define PDC_CHASSIS_DT_DEV_STAT (12ULL << 59) /* device status */ +#define PDC_CHASSIS_DT_DEV_TYPE (13ULL << 59) /* device type */ +#define PDC_CHASSIS_DT_PB_DET (14ULL << 59) /* problem detail */ +#define PDC_CHASSIS_DT_ACT_LEV (15ULL << 59) /* activity level/timeout */ +#define PDC_CHASSIS_DT_SER_NUM (16ULL << 59) /* serial number */ +#define PDC_CHASSIS_DT_REV_NUM (17ULL << 59) /* revision number */ +#define PDC_CHASSIS_DT_INTERRUPT (18ULL << 59) /* interruption information */ +#define PDC_CHASSIS_DT_TEST_NUM (19ULL << 59) /* test number */ +#define PDC_CHASSIS_DT_STATE_CHG (20ULL << 59) /* major changes in system state */ +#define PDC_CHASSIS_DT_PROC_DEALLOC (21ULL << 59) /* processor deallocate */ +#define PDC_CHASSIS_DT_RESET (30ULL << 59) /* reset type and cause */ +#define PDC_CHASSIS_DT_PA_LEGACY (31ULL << 59) /* legacy PA hex chassis code */ + +/* System states - part of major changes in system state data field */ +#define PDC_CHASSIS_SYSTATE_BSTART (0ULL << 0) /* boot start */ +#define PDC_CHASSIS_SYSTATE_BCOMP (1ULL << 0) /* boot complete */ +#define PDC_CHASSIS_SYSTATE_CHANGE (2ULL << 0) /* major change */ +#define PDC_CHASSIS_SYSTATE_LED (3ULL << 0) /* LED change */ +#define PDC_CHASSIS_SYSTATE_PANIC (9ULL << 0) /* OS Panic */ +#define PDC_CHASSIS_SYSTATE_DUMP (10ULL << 0) /* memory dump */ +#define PDC_CHASSIS_SYSTATE_HPMC (11ULL << 0) /* processing HPMC */ +#define PDC_CHASSIS_SYSTATE_HALT (15ULL << 0) /* system halted */ + +/* Message ID */ +#define PDC_CHASSIS_MSG_ID (0ULL << 40) /* we do not handle msg IDs atm */ + +/* EOM - separates log entries */ +#define PDC_CHASSIS_EOM_CLEAR (0ULL << 43) +#define PDC_CHASSIS_EOM_SET (1ULL << 43) + +/* + * Preformated well known messages + */ + +/* Boot started */ +#define PDC_CHASSIS_PMSG_BSTART (PDC_CHASSIS_ALERT_SERPROC | \ + PDC_CHASSIS_SRC_PROC | \ + PDC_CHASSIS_SRC_D_PROC | \ + PDC_CHASSIS_SRC_ID_UNSPEC | \ + PDC_CHASSIS_PB_D_PROC_NONE | \ + PDC_CHASSIS_CALL_ACT_HPUX_INIT | \ + PDC_CHASSIS_ACT_STATUS_UNSPEC | \ + PDC_CHASSIS_CALL_SACT_UNSPEC | \ + PDC_CHASSIS_RET_HPUX | \ + PDC_CHASSIS_REID_UNSPEC | \ + PDC_CHASSIS_DT_STATE_CHG | \ + PDC_CHASSIS_SYSTATE_BSTART | \ + PDC_CHASSIS_MSG_ID | \ + PDC_CHASSIS_EOM_SET ) + +/* Boot complete */ +#define PDC_CHASSIS_PMSG_BCOMPLETE (PDC_CHASSIS_ALERT_SERPROC | \ + PDC_CHASSIS_SRC_PROC | \ + PDC_CHASSIS_SRC_D_PROC | \ + PDC_CHASSIS_SRC_ID_UNSPEC | \ + PDC_CHASSIS_PB_D_PROC_NONE | \ + PDC_CHASSIS_CALL_ACT_HPUX_INIT | \ + PDC_CHASSIS_ACT_STATUS_UNSPEC | \ + PDC_CHASSIS_CALL_SACT_UNSPEC | \ + PDC_CHASSIS_RET_HPUX | \ + PDC_CHASSIS_REID_UNSPEC | \ + PDC_CHASSIS_DT_STATE_CHG | \ + PDC_CHASSIS_SYSTATE_BCOMP | \ + PDC_CHASSIS_MSG_ID | \ + PDC_CHASSIS_EOM_SET ) + +/* Shutdown */ +#define PDC_CHASSIS_PMSG_SHUTDOWN (PDC_CHASSIS_ALERT_SERPROC | \ + PDC_CHASSIS_SRC_PROC | \ + PDC_CHASSIS_SRC_D_PROC | \ + PDC_CHASSIS_SRC_ID_UNSPEC | \ + PDC_CHASSIS_PB_D_PROC_NONE | \ + PDC_CHASSIS_CALL_ACT_HPUX_SHUT | \ + PDC_CHASSIS_ACT_STATUS_UNSPEC | \ + PDC_CHASSIS_CALL_SACT_UNSPEC | \ + PDC_CHASSIS_RET_HPUX | \ + PDC_CHASSIS_REID_UNSPEC | \ + PDC_CHASSIS_DT_STATE_CHG | \ + PDC_CHASSIS_SYSTATE_HALT | \ + PDC_CHASSIS_MSG_ID | \ + PDC_CHASSIS_EOM_SET ) + +/* Panic */ +#define PDC_CHASSIS_PMSG_PANIC (PDC_CHASSIS_ALERT_SOFT_FAIL | \ + PDC_CHASSIS_SRC_PROC | \ + PDC_CHASSIS_SRC_D_PROC | \ + PDC_CHASSIS_SRC_ID_UNSPEC | \ + PDC_CHASSIS_PB_D_PROC_NONE | \ + PDC_CHASSIS_CALL_ACT_HPUX_PANIC| \ + PDC_CHASSIS_ACT_STATUS_UNSPEC | \ + PDC_CHASSIS_CALL_SACT_UNSPEC | \ + PDC_CHASSIS_RET_HPUX | \ + PDC_CHASSIS_REID_UNSPEC | \ + PDC_CHASSIS_DT_STATE_CHG | \ + PDC_CHASSIS_SYSTATE_PANIC | \ + PDC_CHASSIS_MSG_ID | \ + PDC_CHASSIS_EOM_SET ) + +// FIXME: extrapolated data +/* HPMC */ +#define PDC_CHASSIS_PMSG_HPMC (PDC_CHASSIS_ALERT_CONF_CHG /*?*/ | \ + PDC_CHASSIS_SRC_PROC | \ + PDC_CHASSIS_SRC_D_PROC | \ + PDC_CHASSIS_SRC_ID_UNSPEC | \ + PDC_CHASSIS_PB_D_PROC_NONE | \ + PDC_CHASSIS_CALL_ACT_HPUX_WARN | \ + PDC_CHASSIS_RET_HPUX | \ + PDC_CHASSIS_DT_STATE_CHG | \ + PDC_CHASSIS_SYSTATE_HPMC | \ + PDC_CHASSIS_MSG_ID | \ + PDC_CHASSIS_EOM_SET ) + +/* LPMC */ +#define PDC_CHASSIS_PMSG_LPMC (PDC_CHASSIS_ALERT_BLOCKED /*?*/| \ + PDC_CHASSIS_SRC_PROC | \ + PDC_CHASSIS_SRC_D_PROC | \ + PDC_CHASSIS_SRC_ID_UNSPEC | \ + PDC_CHASSIS_PB_D_PROC_NONE | \ + PDC_CHASSIS_CALL_ACT_HPUX_WARN | \ + PDC_CHASSIS_ACT_STATUS_UNSPEC | \ + PDC_CHASSIS_CALL_SACT_UNSPEC | \ + PDC_CHASSIS_RET_HPUX | \ + PDC_CHASSIS_REID_UNSPEC | \ + PDC_CHASSIS_DT_STATE_CHG | \ + PDC_CHASSIS_SYSTATE_CHANGE | \ + PDC_CHASSIS_MSG_ID | \ + PDC_CHASSIS_EOM_SET ) + +#endif /* _PARISC_PDC_CHASSIS_H */ +/* vim: set ts=8 */ diff --git a/arch/parisc/include/asm/pdcpat.h b/arch/parisc/include/asm/pdcpat.h new file mode 100644 index 00000000000..47539f11795 --- /dev/null +++ b/arch/parisc/include/asm/pdcpat.h @@ -0,0 +1,308 @@ +#ifndef __PARISC_PATPDC_H +#define __PARISC_PATPDC_H + +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright 2000 (c) Hewlett Packard (Paul Bame ) + * Copyright 2000,2004 (c) Grant Grundler + */ + + +#define PDC_PAT_CELL 64L /* Interface for gaining and + * manipulatin g cell state within PD */ +#define PDC_PAT_CELL_GET_NUMBER 0L /* Return Cell number */ +#define PDC_PAT_CELL_GET_INFO 1L /* Returns info about Cell */ +#define PDC_PAT_CELL_MODULE 2L /* Returns info about Module */ +#define PDC_PAT_CELL_SET_ATTENTION 9L /* Set Cell Attention indicator */ +#define PDC_PAT_CELL_NUMBER_TO_LOC 10L /* Cell Number -> Location */ +#define PDC_PAT_CELL_WALK_FABRIC 11L /* Walk the Fabric */ +#define PDC_PAT_CELL_GET_RDT_SIZE 12L /* Return Route Distance Table Sizes */ +#define PDC_PAT_CELL_GET_RDT 13L /* Return Route Distance Tables */ +#define PDC_PAT_CELL_GET_LOCAL_PDH_SZ 14L /* Read Local PDH Buffer Size */ +#define PDC_PAT_CELL_SET_LOCAL_PDH 15L /* Write Local PDH Buffer */ +#define PDC_PAT_CELL_GET_REMOTE_PDH_SZ 16L /* Return Remote PDH Buffer Size */ +#define PDC_PAT_CELL_GET_REMOTE_PDH 17L /* Read Remote PDH Buffer */ +#define PDC_PAT_CELL_GET_DBG_INFO 128L /* Return DBG Buffer Info */ +#define PDC_PAT_CELL_CHANGE_ALIAS 129L /* Change Non-Equivalent Alias Chacking */ + + +/* +** Arg to PDC_PAT_CELL_MODULE memaddr[4] +** +** Addresses on the Merced Bus != all Runway Bus addresses. +** This is intended for programming SBA/LBA chips range registers. +*/ +#define IO_VIEW 0UL +#define PA_VIEW 1UL + +/* PDC_PAT_CELL_MODULE entity type values */ +#define PAT_ENTITY_CA 0 /* central agent */ +#define PAT_ENTITY_PROC 1 /* processor */ +#define PAT_ENTITY_MEM 2 /* memory controller */ +#define PAT_ENTITY_SBA 3 /* system bus adapter */ +#define PAT_ENTITY_LBA 4 /* local bus adapter */ +#define PAT_ENTITY_PBC 5 /* processor bus converter */ +#define PAT_ENTITY_XBC 6 /* crossbar fabric connect */ +#define PAT_ENTITY_RC 7 /* fabric interconnect */ + +/* PDC_PAT_CELL_MODULE address range type values */ +#define PAT_PBNUM 0 /* PCI Bus Number */ +#define PAT_LMMIO 1 /* < 4G MMIO Space */ +#define PAT_GMMIO 2 /* > 4G MMIO Space */ +#define PAT_NPIOP 3 /* Non Postable I/O Port Space */ +#define PAT_PIOP 4 /* Postable I/O Port Space */ +#define PAT_AHPA 5 /* Addional HPA Space */ +#define PAT_UFO 6 /* HPA Space (UFO for Mariposa) */ +#define PAT_GNIP 7 /* GNI Reserved Space */ + + + +/* PDC PAT CHASSIS LOG -- Platform logging & forward progress functions */ + +#define PDC_PAT_CHASSIS_LOG 65L +#define PDC_PAT_CHASSIS_WRITE_LOG 0L /* Write Log Entry */ +#define PDC_PAT_CHASSIS_READ_LOG 1L /* Read Log Entry */ + + +/* PDC PAT CPU -- CPU configuration within the protection domain */ + +#define PDC_PAT_CPU 67L +#define PDC_PAT_CPU_INFO 0L /* Return CPU config info */ +#define PDC_PAT_CPU_DELETE 1L /* Delete CPU */ +#define PDC_PAT_CPU_ADD 2L /* Add CPU */ +#define PDC_PAT_CPU_GET_NUMBER 3L /* Return CPU Number */ +#define PDC_PAT_CPU_GET_HPA 4L /* Return CPU HPA */ +#define PDC_PAT_CPU_STOP 5L /* Stop CPU */ +#define PDC_PAT_CPU_RENDEZVOUS 6L /* Rendezvous CPU */ +#define PDC_PAT_CPU_GET_CLOCK_INFO 7L /* Return CPU Clock info */ +#define PDC_PAT_CPU_GET_RENDEZVOUS_STATE 8L /* Return Rendezvous State */ +#define PDC_PAT_CPU_PLUNGE_FABRIC 128L /* Plunge Fabric */ +#define PDC_PAT_CPU_UPDATE_CACHE_CLEANSING 129L /* Manipulate Cache + * Cleansing Mode */ +/* PDC PAT EVENT -- Platform Events */ + +#define PDC_PAT_EVENT 68L +#define PDC_PAT_EVENT_GET_CAPS 0L /* Get Capabilities */ +#define PDC_PAT_EVENT_SET_MODE 1L /* Set Notification Mode */ +#define PDC_PAT_EVENT_SCAN 2L /* Scan Event */ +#define PDC_PAT_EVENT_HANDLE 3L /* Handle Event */ +#define PDC_PAT_EVENT_GET_NB_CALL 4L /* Get Non-Blocking call Args */ + +/* PDC PAT HPMC -- Cause processor to go into spin loop, and wait + * for wake up from Monarch Processor. + */ + +#define PDC_PAT_HPMC 70L +#define PDC_PAT_HPMC_RENDEZ_CPU 0L /* go into spin loop */ +#define PDC_PAT_HPMC_SET_PARAMS 1L /* Allows OS to specify intr which PDC + * will use to interrupt OS during + * machine check rendezvous */ + +/* parameters for PDC_PAT_HPMC_SET_PARAMS: */ +#define HPMC_SET_PARAMS_INTR 1L /* Rendezvous Interrupt */ +#define HPMC_SET_PARAMS_WAKE 2L /* Wake up processor */ + + +/* PDC PAT IO -- On-line services for I/O modules */ + +#define PDC_PAT_IO 71L +#define PDC_PAT_IO_GET_SLOT_STATUS 5L /* Get Slot Status Info*/ +#define PDC_PAT_IO_GET_LOC_FROM_HARDWARE 6L /* Get Physical Location from */ + /* Hardware Path */ +#define PDC_PAT_IO_GET_HARDWARE_FROM_LOC 7L /* Get Hardware Path from + * Physical Location */ +#define PDC_PAT_IO_GET_PCI_CONFIG_FROM_HW 11L /* Get PCI Configuration + * Address from Hardware Path */ +#define PDC_PAT_IO_GET_HW_FROM_PCI_CONFIG 12L /* Get Hardware Path + * from PCI Configuration Address */ +#define PDC_PAT_IO_READ_HOST_BRIDGE_INFO 13L /* Read Host Bridge State Info */ +#define PDC_PAT_IO_CLEAR_HOST_BRIDGE_INFO 14L /* Clear Host Bridge State Info*/ +#define PDC_PAT_IO_GET_PCI_ROUTING_TABLE_SIZE 15L /* Get PCI INT Routing Table + * Size */ +#define PDC_PAT_IO_GET_PCI_ROUTING_TABLE 16L /* Get PCI INT Routing Table */ +#define PDC_PAT_IO_GET_HINT_TABLE_SIZE 17L /* Get Hint Table Size */ +#define PDC_PAT_IO_GET_HINT_TABLE 18L /* Get Hint Table */ +#define PDC_PAT_IO_PCI_CONFIG_READ 19L /* PCI Config Read */ +#define PDC_PAT_IO_PCI_CONFIG_WRITE 20L /* PCI Config Write */ +#define PDC_PAT_IO_GET_NUM_IO_SLOTS 21L /* Get Number of I/O Bay Slots in + * Cabinet */ +#define PDC_PAT_IO_GET_LOC_IO_SLOTS 22L /* Get Physical Location of I/O */ + /* Bay Slots in Cabinet */ +#define PDC_PAT_IO_BAY_STATUS_INFO 28L /* Get I/O Bay Slot Status Info */ +#define PDC_PAT_IO_GET_PROC_VIEW 29L /* Get Processor view of IO address */ +#define PDC_PAT_IO_PROG_SBA_DIR_RANGE 30L /* Program directed range */ + + +/* PDC PAT MEM -- Manage memory page deallocation */ + +#define PDC_PAT_MEM 72L +#define PDC_PAT_MEM_PD_INFO 0L /* Return PDT info for PD */ +#define PDC_PAT_MEM_PD_CLEAR 1L /* Clear PDT for PD */ +#define PDC_PAT_MEM_PD_READ 2L /* Read PDT entries for PD */ +#define PDC_PAT_MEM_PD_RESET 3L /* Reset clear bit for PD */ +#define PDC_PAT_MEM_CELL_INFO 5L /* Return PDT info For Cell */ +#define PDC_PAT_MEM_CELL_CLEAR 6L /* Clear PDT For Cell */ +#define PDC_PAT_MEM_CELL_READ 7L /* Read PDT entries For Cell */ +#define PDC_PAT_MEM_CELL_RESET 8L /* Reset clear bit For Cell */ +#define PDC_PAT_MEM_SETGM 9L /* Set Golden Memory value */ +#define PDC_PAT_MEM_ADD_PAGE 10L /* ADDs a page to the cell */ +#define PDC_PAT_MEM_ADDRESS 11L /* Get Physical Location From */ + /* Memory Address */ +#define PDC_PAT_MEM_GET_TXT_SIZE 12L /* Get Formatted Text Size */ +#define PDC_PAT_MEM_GET_PD_TXT 13L /* Get PD Formatted Text */ +#define PDC_PAT_MEM_GET_CELL_TXT 14L /* Get Cell Formatted Text */ +#define PDC_PAT_MEM_RD_STATE_INFO 15L /* Read Mem Module State Info*/ +#define PDC_PAT_MEM_CLR_STATE_INFO 16L /*Clear Mem Module State Info*/ +#define PDC_PAT_MEM_CLEAN_RANGE 128L /*Clean Mem in specific range*/ +#define PDC_PAT_MEM_GET_TBL_SIZE 131L /* Get Memory Table Size */ +#define PDC_PAT_MEM_GET_TBL 132L /* Get Memory Table */ + + +/* PDC PAT NVOLATILE -- Access Non-Volatile Memory */ + +#define PDC_PAT_NVOLATILE 73L +#define PDC_PAT_NVOLATILE_READ 0L /* Read Non-Volatile Memory */ +#define PDC_PAT_NVOLATILE_WRITE 1L /* Write Non-Volatile Memory */ +#define PDC_PAT_NVOLATILE_GET_SIZE 2L /* Return size of NVM */ +#define PDC_PAT_NVOLATILE_VERIFY 3L /* Verify contents of NVM */ +#define PDC_PAT_NVOLATILE_INIT 4L /* Initialize NVM */ + +/* PDC PAT PD */ +#define PDC_PAT_PD 74L /* Protection Domain Info */ +#define PDC_PAT_PD_GET_ADDR_MAP 0L /* Get Address Map */ + +/* PDC_PAT_PD_GET_ADDR_MAP entry types */ +#define PAT_MEMORY_DESCRIPTOR 1 + +/* PDC_PAT_PD_GET_ADDR_MAP memory types */ +#define PAT_MEMTYPE_MEMORY 0 +#define PAT_MEMTYPE_FIRMWARE 4 + +/* PDC_PAT_PD_GET_ADDR_MAP memory usage */ +#define PAT_MEMUSE_GENERAL 0 +#define PAT_MEMUSE_GI 128 +#define PAT_MEMUSE_GNI 129 + + +#ifndef __ASSEMBLY__ +#include + +#ifdef CONFIG_64BIT +#define is_pdc_pat() (PDC_TYPE_PAT == pdc_type) +extern int pdc_pat_get_irt_size(unsigned long *num_entries, unsigned long cell_num); +extern int pdc_pat_get_irt(void *r_addr, unsigned long cell_num); +#else /* ! CONFIG_64BIT */ +/* No PAT support for 32-bit kernels...sorry */ +#define is_pdc_pat() (0) +#define pdc_pat_get_irt_size(num_entries, cell_numn) PDC_BAD_PROC +#define pdc_pat_get_irt(r_addr, cell_num) PDC_BAD_PROC +#endif /* ! CONFIG_64BIT */ + + +struct pdc_pat_cell_num { + unsigned long cell_num; + unsigned long cell_loc; +}; + +struct pdc_pat_cpu_num { + unsigned long cpu_num; + unsigned long cpu_loc; +}; + +struct pdc_pat_pd_addr_map_entry { + unsigned char entry_type; /* 1 = Memory Descriptor Entry Type */ + unsigned char reserve1[5]; + unsigned char memory_type; + unsigned char memory_usage; + unsigned long paddr; + unsigned int pages; /* Length in 4K pages */ + unsigned int reserve2; + unsigned long cell_map; +}; + +/******************************************************************** +* PDC_PAT_CELL[Return Cell Module] memaddr[0] conf_base_addr +* ---------------------------------------------------------- +* Bit 0 to 51 - conf_base_addr +* Bit 52 to 62 - reserved +* Bit 63 - endianess bit +********************************************************************/ +#define PAT_GET_CBA(value) ((value) & 0xfffffffffffff000UL) + +/******************************************************************** +* PDC_PAT_CELL[Return Cell Module] memaddr[1] mod_info +* ---------------------------------------------------- +* Bit 0 to 7 - entity type +* 0 = central agent, 1 = processor, +* 2 = memory controller, 3 = system bus adapter, +* 4 = local bus adapter, 5 = processor bus converter, +* 6 = crossbar fabric connect, 7 = fabric interconnect, +* 8 to 254 reserved, 255 = unknown. +* Bit 8 to 15 - DVI +* Bit 16 to 23 - IOC functions +* Bit 24 to 39 - reserved +* Bit 40 to 63 - mod_pages +* number of 4K pages a module occupies starting at conf_base_addr +********************************************************************/ +#define PAT_GET_ENTITY(value) (((value) >> 56) & 0xffUL) +#define PAT_GET_DVI(value) (((value) >> 48) & 0xffUL) +#define PAT_GET_IOC(value) (((value) >> 40) & 0xffUL) +#define PAT_GET_MOD_PAGES(value) ((value) & 0xffffffUL) + + +/* +** PDC_PAT_CELL_GET_INFO return block +*/ +typedef struct pdc_pat_cell_info_rtn_block { + unsigned long cpu_info; + unsigned long cell_info; + unsigned long cell_location; + unsigned long reo_location; + unsigned long mem_size; + unsigned long dimm_status; + unsigned long pdc_rev; + unsigned long fabric_info0; + unsigned long fabric_info1; + unsigned long fabric_info2; + unsigned long fabric_info3; + unsigned long reserved[21]; +} pdc_pat_cell_info_rtn_block_t; + + +/* FIXME: mod[508] should really be a union of the various mod components */ +struct pdc_pat_cell_mod_maddr_block { /* PDC_PAT_CELL_MODULE */ + unsigned long cba; /* func 0 cfg space address */ + unsigned long mod_info; /* module information */ + unsigned long mod_location; /* physical location of the module */ + struct hardware_path mod_path; /* module path (device path - layers) */ + unsigned long mod[508]; /* PAT cell module components */ +} __attribute__((aligned(8))) ; + +typedef struct pdc_pat_cell_mod_maddr_block pdc_pat_cell_mod_maddr_block_t; + + +extern int pdc_pat_chassis_send_log(unsigned long status, unsigned long data); +extern int pdc_pat_cell_get_number(struct pdc_pat_cell_num *cell_info); +extern int pdc_pat_cell_module(unsigned long *actcnt, unsigned long ploc, unsigned long mod, unsigned long view_type, void *mem_addr); +extern int pdc_pat_cell_num_to_loc(void *, unsigned long); + +extern int pdc_pat_cpu_get_number(struct pdc_pat_cpu_num *cpu_info, void *hpa); + +extern int pdc_pat_pd_get_addr_map(unsigned long *actual_len, void *mem_addr, unsigned long count, unsigned long offset); + + +extern int pdc_pat_io_pci_cfg_read(unsigned long pci_addr, int pci_size, u32 *val); +extern int pdc_pat_io_pci_cfg_write(unsigned long pci_addr, int pci_size, u32 val); + + +/* Flag to indicate this is a PAT box...don't use this unless you +** really have to...it might go away some day. +*/ +extern int pdc_pat; /* arch/parisc/kernel/inventory.c */ + +#endif /* __ASSEMBLY__ */ + +#endif /* ! __PARISC_PATPDC_H */ diff --git a/arch/parisc/include/asm/percpu.h b/arch/parisc/include/asm/percpu.h new file mode 100644 index 00000000000..a0dcd197012 --- /dev/null +++ b/arch/parisc/include/asm/percpu.h @@ -0,0 +1,7 @@ +#ifndef _PARISC_PERCPU_H +#define _PARISC_PERCPU_H + +#include + +#endif + diff --git a/arch/parisc/include/asm/perf.h b/arch/parisc/include/asm/perf.h new file mode 100644 index 00000000000..a18e11972c0 --- /dev/null +++ b/arch/parisc/include/asm/perf.h @@ -0,0 +1,74 @@ +#ifndef _ASM_PERF_H_ +#define _ASM_PERF_H_ + +/* ioctls */ +#define PA_PERF_ON _IO('p', 1) +#define PA_PERF_OFF _IOR('p', 2, unsigned int) +#define PA_PERF_VERSION _IOR('p', 3, int) + +#define PA_PERF_DEV "perf" +#define PA_PERF_MINOR 146 + +/* Interface types */ +#define UNKNOWN_INTF 255 +#define ONYX_INTF 0 +#define CUDA_INTF 1 + +/* Common Onyx and Cuda images */ +#define CPI 0 +#define BUSUTIL 1 +#define TLBMISS 2 +#define TLBHANDMISS 3 +#define PTKN 4 +#define PNTKN 5 +#define IMISS 6 +#define DMISS 7 +#define DMISS_ACCESS 8 +#define BIG_CPI 9 +#define BIG_LS 10 +#define BR_ABORT 11 +#define ISNT 12 +#define QUADRANT 13 +#define RW_PDFET 14 +#define RW_WDFET 15 +#define SHLIB_CPI 16 + +/* Cuda only Images */ +#define FLOPS 17 +#define CACHEMISS 18 +#define BRANCHES 19 +#define CRSTACK 20 +#define I_CACHE_SPEC 21 +#define MAX_CUDA_IMAGES 22 + +/* Onyx only Images */ +#define ADDR_INV_ABORT_ALU 17 +#define BRAD_STALL 18 +#define CNTL_IN_PIPEL 19 +#define DSNT_XFH 20 +#define FET_SIG1 21 +#define FET_SIG2 22 +#define G7_1 23 +#define G7_2 24 +#define G7_3 25 +#define G7_4 26 +#define MPB_LABORT 27 +#define PANIC 28 +#define RARE_INST 29 +#define RW_DFET 30 +#define RW_IFET 31 +#define RW_SDFET 32 +#define SPEC_IFET 33 +#define ST_COND0 34 +#define ST_COND1 35 +#define ST_COND2 36 +#define ST_COND3 37 +#define ST_COND4 38 +#define ST_UNPRED0 39 +#define ST_UNPRED1 40 +#define UNPRED 41 +#define GO_STORE 42 +#define SHLIB_CALL 43 +#define MAX_ONYX_IMAGES 44 + +#endif diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h new file mode 100644 index 00000000000..fc987a1c12a --- /dev/null +++ b/arch/parisc/include/asm/pgalloc.h @@ -0,0 +1,149 @@ +#ifndef _ASM_PGALLOC_H +#define _ASM_PGALLOC_H + +#include +#include +#include +#include +#include + +#include + +/* Allocate the top level pgd (page directory) + * + * Here (for 64 bit kernels) we implement a Hybrid L2/L3 scheme: we + * allocate the first pmd adjacent to the pgd. This means that we can + * subtract a constant offset to get to it. The pmd and pgd sizes are + * arranged so that a single pmd covers 4GB (giving a full 64-bit + * process access to 8TB) so our lookups are effectively L2 for the + * first 4GB of the kernel (i.e. for all ILP32 processes and all the + * kernel for machines with under 4GB of memory) */ +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, + PGD_ALLOC_ORDER); + pgd_t *actual_pgd = pgd; + + if (likely(pgd != NULL)) { + memset(pgd, 0, PAGE_SIZE<> PxD_VALUE_SHIFT)); + /* The first pmd entry also is marked with _PAGE_GATEWAY as + * a signal that this pmd may not be freed */ + __pgd_val_set(*pgd, PxD_FLAG_ATTACHED); +#endif + } + return actual_pgd; +} + +static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ +#ifdef CONFIG_64BIT + pgd -= PTRS_PER_PGD; +#endif + free_pages((unsigned long)pgd, PGD_ALLOC_ORDER); +} + +#if PT_NLEVELS == 3 + +/* Three Level Page Table Support for pmd's */ + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmd) +{ + __pgd_val_set(*pgd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) + + (__u32)(__pa((unsigned long)pmd) >> PxD_VALUE_SHIFT)); +} + +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address) +{ + pmd_t *pmd = (pmd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT, + PMD_ORDER); + if (pmd) + memset(pmd, 0, PAGE_SIZE<> PxD_VALUE_SHIFT)); + else +#endif + __pmd_val_set(*pmd, (PxD_FLAG_PRESENT | PxD_FLAG_VALID) + + (__u32)(__pa((unsigned long)pte) >> PxD_VALUE_SHIFT)); +} + +#define pmd_populate(mm, pmd, pte_page) \ + pmd_populate_kernel(mm, pmd, page_address(pte_page)) +#define pmd_pgtable(pmd) pmd_page(pmd) + +static inline pgtable_t +pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *page = alloc_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + if (page) + pgtable_page_ctor(page); + return page; +} + +static inline pte_t * +pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr) +{ + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + return pte; +} + +static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) +{ + free_page((unsigned long)pte); +} + +static inline void pte_free(struct mm_struct *mm, struct page *pte) +{ + pgtable_page_dtor(pte); + pte_free_kernel(mm, page_address(pte)); +} + +#define check_pgt_cache() do { } while (0) + +#endif diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h new file mode 100644 index 00000000000..470a4b88124 --- /dev/null +++ b/arch/parisc/include/asm/pgtable.h @@ -0,0 +1,508 @@ +#ifndef _PARISC_PGTABLE_H +#define _PARISC_PGTABLE_H + +#include + +#include + +#ifndef __ASSEMBLY__ +/* + * we simulate an x86-style page table for the linux mm code + */ + +#include /* for vm_area_struct */ +#include +#include +#include + +/* + * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel + * memory. For the return value to be meaningful, ADDR must be >= + * PAGE_OFFSET. This operation can be relatively expensive (e.g., + * require a hash-, or multi-level tree-lookup or something of that + * sort) but it guarantees to return TRUE only if accessing the page + * at that address does not cause an error. Note that there may be + * addresses for which kern_addr_valid() returns FALSE even though an + * access would not cause an error (e.g., this is typically true for + * memory mapped I/O regions. + * + * XXX Need to implement this for parisc. + */ +#define kern_addr_valid(addr) (1) + +/* Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +#define set_pte(pteptr, pteval) \ + do{ \ + *(pteptr) = (pteval); \ + } while(0) +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval) + +#endif /* !__ASSEMBLY__ */ + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, (unsigned long)pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, (unsigned long)pgd_val(e)) + +/* This is the size of the initially mapped kernel memory */ +#ifdef CONFIG_64BIT +#define KERNEL_INITIAL_ORDER 24 /* 0 to 1<<24 = 16MB */ +#else +#define KERNEL_INITIAL_ORDER 23 /* 0 to 1<<23 = 8MB */ +#endif +#define KERNEL_INITIAL_SIZE (1 << KERNEL_INITIAL_ORDER) + +#if defined(CONFIG_64BIT) && defined(CONFIG_PARISC_PAGE_SIZE_4KB) +#define PT_NLEVELS 3 +#define PGD_ORDER 1 /* Number of pages per pgd */ +#define PMD_ORDER 1 /* Number of pages per pmd */ +#define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */ +#else +#define PT_NLEVELS 2 +#define PGD_ORDER 1 /* Number of pages per pgd */ +#define PGD_ALLOC_ORDER PGD_ORDER +#endif + +/* Definitions for 3rd level (we use PLD here for Page Lower directory + * because PTE_SHIFT is used lower down to mean shift that has to be + * done to get usable bits out of the PTE) */ +#define PLD_SHIFT PAGE_SHIFT +#define PLD_SIZE PAGE_SIZE +#define BITS_PER_PTE (PAGE_SHIFT - BITS_PER_PTE_ENTRY) +#define PTRS_PER_PTE (1UL << BITS_PER_PTE) + +/* Definitions for 2nd level */ +#define pgtable_cache_init() do { } while (0) + +#define PMD_SHIFT (PLD_SHIFT + BITS_PER_PTE) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#if PT_NLEVELS == 3 +#define BITS_PER_PMD (PAGE_SHIFT + PMD_ORDER - BITS_PER_PMD_ENTRY) +#else +#define BITS_PER_PMD 0 +#endif +#define PTRS_PER_PMD (1UL << BITS_PER_PMD) + +/* Definitions for 1st level */ +#define PGDIR_SHIFT (PMD_SHIFT + BITS_PER_PMD) +#define BITS_PER_PGD (PAGE_SHIFT + PGD_ORDER - BITS_PER_PGD_ENTRY) +#define PGDIR_SIZE (1UL << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD (1UL << BITS_PER_PGD) +#define USER_PTRS_PER_PGD PTRS_PER_PGD + +#define MAX_ADDRBITS (PGDIR_SHIFT + BITS_PER_PGD) +#define MAX_ADDRESS (1UL << MAX_ADDRBITS) + +#define SPACEID_SHIFT (MAX_ADDRBITS - 32) + +/* This calculates the number of initial pages we need for the initial + * page tables */ +#if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT) +# define PT_INITIAL (1 << (KERNEL_INITIAL_ORDER - PMD_SHIFT)) +#else +# define PT_INITIAL (1) /* all initial PTEs fit into one page */ +#endif + +/* + * pgd entries used up by user/kernel: + */ + +#define FIRST_USER_ADDRESS 0 + +/* NB: The tlb miss handlers make certain assumptions about the order */ +/* of the following bits, so be careful (One example, bits 25-31 */ +/* are moved together in one instruction). */ + +#define _PAGE_READ_BIT 31 /* (0x001) read access allowed */ +#define _PAGE_WRITE_BIT 30 /* (0x002) write access allowed */ +#define _PAGE_EXEC_BIT 29 /* (0x004) execute access allowed */ +#define _PAGE_GATEWAY_BIT 28 /* (0x008) privilege promotion allowed */ +#define _PAGE_DMB_BIT 27 /* (0x010) Data Memory Break enable (B bit) */ +#define _PAGE_DIRTY_BIT 26 /* (0x020) Page Dirty (D bit) */ +#define _PAGE_FILE_BIT _PAGE_DIRTY_BIT /* overload this bit */ +#define _PAGE_REFTRAP_BIT 25 /* (0x040) Page Ref. Trap enable (T bit) */ +#define _PAGE_NO_CACHE_BIT 24 /* (0x080) Uncached Page (U bit) */ +#define _PAGE_ACCESSED_BIT 23 /* (0x100) Software: Page Accessed */ +#define _PAGE_PRESENT_BIT 22 /* (0x200) Software: translation valid */ +#define _PAGE_FLUSH_BIT 21 /* (0x400) Software: translation valid */ + /* for cache flushing only */ +#define _PAGE_USER_BIT 20 /* (0x800) Software: User accessible page */ + +/* N.B. The bits are defined in terms of a 32 bit word above, so the */ +/* following macro is ok for both 32 and 64 bit. */ + +#define xlate_pabit(x) (31 - x) + +/* this defines the shift to the usable bits in the PTE it is set so + * that the valid bits _PAGE_PRESENT_BIT and _PAGE_USER_BIT are set + * to zero */ +#define PTE_SHIFT xlate_pabit(_PAGE_USER_BIT) + +/* PFN_PTE_SHIFT defines the shift of a PTE value to access the PFN field */ +#define PFN_PTE_SHIFT 12 + + +/* this is how many bits may be used by the file functions */ +#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_SHIFT) + +#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_SHIFT) +#define pgoff_to_pte(off) ((pte_t) { ((off) << PTE_SHIFT) | _PAGE_FILE }) + +#define _PAGE_READ (1 << xlate_pabit(_PAGE_READ_BIT)) +#define _PAGE_WRITE (1 << xlate_pabit(_PAGE_WRITE_BIT)) +#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE) +#define _PAGE_EXEC (1 << xlate_pabit(_PAGE_EXEC_BIT)) +#define _PAGE_GATEWAY (1 << xlate_pabit(_PAGE_GATEWAY_BIT)) +#define _PAGE_DMB (1 << xlate_pabit(_PAGE_DMB_BIT)) +#define _PAGE_DIRTY (1 << xlate_pabit(_PAGE_DIRTY_BIT)) +#define _PAGE_REFTRAP (1 << xlate_pabit(_PAGE_REFTRAP_BIT)) +#define _PAGE_NO_CACHE (1 << xlate_pabit(_PAGE_NO_CACHE_BIT)) +#define _PAGE_ACCESSED (1 << xlate_pabit(_PAGE_ACCESSED_BIT)) +#define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) +#define _PAGE_FLUSH (1 << xlate_pabit(_PAGE_FLUSH_BIT)) +#define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) +#define _PAGE_FILE (1 << xlate_pabit(_PAGE_FILE_BIT)) + +#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_KERNEL (_PAGE_PRESENT | _PAGE_EXEC | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) + +/* The pgd/pmd contains a ptr (in phys addr space); since all pgds/pmds + * are page-aligned, we don't care about the PAGE_OFFSET bits, except + * for a few meta-information bits, so we shift the address to be + * able to effectively address 40/42/44-bits of physical address space + * depending on 4k/16k/64k PAGE_SIZE */ +#define _PxD_PRESENT_BIT 31 +#define _PxD_ATTACHED_BIT 30 +#define _PxD_VALID_BIT 29 + +#define PxD_FLAG_PRESENT (1 << xlate_pabit(_PxD_PRESENT_BIT)) +#define PxD_FLAG_ATTACHED (1 << xlate_pabit(_PxD_ATTACHED_BIT)) +#define PxD_FLAG_VALID (1 << xlate_pabit(_PxD_VALID_BIT)) +#define PxD_FLAG_MASK (0xf) +#define PxD_FLAG_SHIFT (4) +#define PxD_VALUE_SHIFT (8) /* (PAGE_SHIFT-PxD_FLAG_SHIFT) */ + +#ifndef __ASSEMBLY__ + +#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) +#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_ACCESSED) +/* Others seem to make this executable, I don't know if that's correct + or not. The stack is mapped this way though so this is necessary + in the short term - dhd@linuxcare.com, 2000-08-08 */ +#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_ACCESSED) +#define PAGE_WRITEONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_WRITE | _PAGE_ACCESSED) +#define PAGE_EXECREAD __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_EXEC |_PAGE_ACCESSED) +#define PAGE_COPY PAGE_EXECREAD +#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_READ | _PAGE_WRITE | _PAGE_EXEC |_PAGE_ACCESSED) +#define PAGE_KERNEL __pgprot(_PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot(_PAGE_KERNEL & ~_PAGE_WRITE) +#define PAGE_KERNEL_UNC __pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE) +#define PAGE_GATEWAY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED | _PAGE_GATEWAY| _PAGE_READ) +#define PAGE_FLUSH __pgprot(_PAGE_FLUSH) + + +/* + * We could have an execute only page using "gateway - promote to priv + * level 3", but that is kind of silly. So, the way things are defined + * now, we must always have read permission for pages with execute + * permission. For the fun of it we'll go ahead and support write only + * pages. + */ + + /*xwr*/ +#define __P000 PAGE_NONE +#define __P001 PAGE_READONLY +#define __P010 __P000 /* copy on write */ +#define __P011 __P001 /* copy on write */ +#define __P100 PAGE_EXECREAD +#define __P101 PAGE_EXECREAD +#define __P110 __P100 /* copy on write */ +#define __P111 __P101 /* copy on write */ + +#define __S000 PAGE_NONE +#define __S001 PAGE_READONLY +#define __S010 PAGE_WRITEONLY +#define __S011 PAGE_SHARED +#define __S100 PAGE_EXECREAD +#define __S101 PAGE_EXECREAD +#define __S110 PAGE_RWX +#define __S111 PAGE_RWX + + +extern pgd_t swapper_pg_dir[]; /* declared in init_task.c */ + +/* initial page tables for 0-8MB for kernel */ + +extern pte_t pg0[]; + +/* zero page used for uninitialized stuff */ + +extern unsigned long *empty_zero_page; + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ + +#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) + +#define pte_none(x) ((pte_val(x) == 0) || (pte_val(x) & _PAGE_FLUSH)) +#define pte_present(x) (pte_val(x) & _PAGE_PRESENT) +#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) + +#define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) +#define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) +#define pgd_flag(x) (pgd_val(x) & PxD_FLAG_MASK) +#define pgd_address(x) ((unsigned long)(pgd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) + +#if PT_NLEVELS == 3 +/* The first entry of the permanent pmd is not there if it contains + * the gateway marker */ +#define pmd_none(x) (!pmd_val(x) || pmd_flag(x) == PxD_FLAG_ATTACHED) +#else +#define pmd_none(x) (!pmd_val(x)) +#endif +#define pmd_bad(x) (!(pmd_flag(x) & PxD_FLAG_VALID)) +#define pmd_present(x) (pmd_flag(x) & PxD_FLAG_PRESENT) +static inline void pmd_clear(pmd_t *pmd) { +#if PT_NLEVELS == 3 + if (pmd_flag(*pmd) & PxD_FLAG_ATTACHED) + /* This is the entry pointing to the permanent pmd + * attached to the pgd; cannot clear it */ + __pmd_val_set(*pmd, PxD_FLAG_ATTACHED); + else +#endif + __pmd_val_set(*pmd, 0); +} + + + +#if PT_NLEVELS == 3 +#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_address(pgd))) +#define pgd_page(pgd) virt_to_page((void *)pgd_page_vaddr(pgd)) + +/* For 64 bit we have three level tables */ + +#define pgd_none(x) (!pgd_val(x)) +#define pgd_bad(x) (!(pgd_flag(x) & PxD_FLAG_VALID)) +#define pgd_present(x) (pgd_flag(x) & PxD_FLAG_PRESENT) +static inline void pgd_clear(pgd_t *pgd) { +#if PT_NLEVELS == 3 + if(pgd_flag(*pgd) & PxD_FLAG_ATTACHED) + /* This is the permanent pmd attached to the pgd; cannot + * free it */ + return; +#endif + __pgd_val_set(*pgd, 0); +} +#else +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +static inline int pgd_none(pgd_t pgd) { return 0; } +static inline int pgd_bad(pgd_t pgd) { return 0; } +static inline int pgd_present(pgd_t pgd) { return 1; } +static inline void pgd_clear(pgd_t * pgdp) { } +#endif + +/* + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ +static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } +static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } +static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } +static inline int pte_special(pte_t pte) { return 0; } + +static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } +static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } +static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; return pte; } +static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } +static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } +static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { return pte; } + +/* + * Conversion functions: convert a page and protection to a page entry, + * and a page entry and page directory to the page they refer to. + */ +#define __mk_pte(addr,pgprot) \ +({ \ + pte_t __pte; \ + \ + pte_val(__pte) = ((((addr)>>PAGE_SHIFT)<> PFN_PTE_SHIFT) + +#define pte_page(pte) (pfn_to_page(pte_pfn(pte))) + +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_address(pmd))) + +#define __pmd_page(pmd) ((unsigned long) __va(pmd_address(pmd))) +#define pmd_page(pmd) virt_to_page((void *)__pmd_page(pmd)) + +#define pgd_index(address) ((address) >> PGDIR_SHIFT) + +/* to find an entry in a page-table-directory */ +#define pgd_offset(mm, address) \ +((mm)->pgd + ((address) >> PGDIR_SHIFT)) + +/* to find an entry in a kernel page-table-directory */ +#define pgd_offset_k(address) pgd_offset(&init_mm, address) + +/* Find an entry in the second-level page table.. */ + +#if PT_NLEVELS == 3 +#define pmd_offset(dir,address) \ +((pmd_t *) pgd_page_vaddr(*(dir)) + (((address)>>PMD_SHIFT) & (PTRS_PER_PMD-1))) +#else +#define pmd_offset(dir,addr) ((pmd_t *) dir) +#endif + +/* Find an entry in the third-level page table.. */ +#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1)) +#define pte_offset_kernel(pmd, address) \ + ((pte_t *) pmd_page_vaddr(*(pmd)) + pte_index(address)) +#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address) +#define pte_offset_map_nested(pmd, address) pte_offset_kernel(pmd, address) +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +#define pte_unmap(pte) do { } while (0) +#define pte_unmap_nested(pte) do { } while (0) + +extern void paging_init (void); + +/* Used for deferring calls to flush_dcache_page() */ + +#define PG_dcache_dirty PG_arch_1 + +extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t); + +/* Encode and de-code a swap entry */ + +#define __swp_type(x) ((x).val & 0x1f) +#define __swp_offset(x) ( (((x).val >> 6) & 0x7) | \ + (((x).val >> 8) & ~0x7) ) +#define __swp_entry(type, offset) ((swp_entry_t) { (type) | \ + ((offset & 0x7) << 6) | \ + ((offset & ~0x7) << 8) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + if (!pte_young(*ptep)) + return 0; + return test_and_clear_bit(xlate_pabit(_PAGE_ACCESSED_BIT), &pte_val(*ptep)); +#else + pte_t pte = *ptep; + if (!pte_young(pte)) + return 0; + set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return 1; +#endif +} + +extern spinlock_t pa_dbit_lock; + +struct mm_struct; +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pte_t old_pte; + pte_t pte; + + spin_lock(&pa_dbit_lock); + pte = old_pte = *ptep; + pte_val(pte) &= ~_PAGE_PRESENT; + pte_val(pte) |= _PAGE_FLUSH; + set_pte_at(mm,addr,ptep,pte); + spin_unlock(&pa_dbit_lock); + + return old_pte; +} + +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + unsigned long new, old; + + do { + old = pte_val(*ptep); + new = pte_val(pte_wrprotect(__pte (old))); + } while (cmpxchg((unsigned long *) ptep, old, new) != old); +#else + pte_t old_pte = *ptep; + set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); +#endif +} + +#define pte_same(A,B) (pte_val(A) == pte_val(B)) + +#endif /* !__ASSEMBLY__ */ + + +/* TLB page size encoding - see table 3-1 in parisc20.pdf */ +#define _PAGE_SIZE_ENCODING_4K 0 +#define _PAGE_SIZE_ENCODING_16K 1 +#define _PAGE_SIZE_ENCODING_64K 2 +#define _PAGE_SIZE_ENCODING_256K 3 +#define _PAGE_SIZE_ENCODING_1M 4 +#define _PAGE_SIZE_ENCODING_4M 5 +#define _PAGE_SIZE_ENCODING_16M 6 +#define _PAGE_SIZE_ENCODING_64M 7 + +#if defined(CONFIG_PARISC_PAGE_SIZE_4KB) +# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_4K +#elif defined(CONFIG_PARISC_PAGE_SIZE_16KB) +# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_16K +#elif defined(CONFIG_PARISC_PAGE_SIZE_64KB) +# define _PAGE_SIZE_ENCODING_DEFAULT _PAGE_SIZE_ENCODING_64K +#endif + + +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ + remap_pfn_range(vma, vaddr, pfn, size, prot) + +#define pgprot_noncached(prot) __pgprot(pgprot_val(prot) | _PAGE_NO_CACHE) + +/* We provide our own get_unmapped_area to provide cache coherency */ + +#define HAVE_ARCH_UNMAPPED_AREA + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define __HAVE_ARCH_PTE_SAME +#include + +#endif /* _PARISC_PGTABLE_H */ diff --git a/arch/parisc/include/asm/poll.h b/arch/parisc/include/asm/poll.h new file mode 100644 index 00000000000..c98509d3149 --- /dev/null +++ b/arch/parisc/include/asm/poll.h @@ -0,0 +1 @@ +#include diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h new file mode 100644 index 00000000000..bb725a6630b --- /dev/null +++ b/arch/parisc/include/asm/posix_types.h @@ -0,0 +1,129 @@ +#ifndef __ARCH_PARISC_POSIX_TYPES_H +#define __ARCH_PARISC_POSIX_TYPES_H + +/* + * This file is generally used by user-level software, so you need to + * be a little careful about namespace pollution etc. Also, we cannot + * assume GCC is being used. + */ +typedef unsigned long __kernel_ino_t; +typedef unsigned short __kernel_mode_t; +typedef unsigned short __kernel_nlink_t; +typedef long __kernel_off_t; +typedef int __kernel_pid_t; +typedef unsigned short __kernel_ipc_pid_t; +typedef unsigned int __kernel_uid_t; +typedef unsigned int __kernel_gid_t; +typedef int __kernel_suseconds_t; +typedef long __kernel_clock_t; +typedef int __kernel_timer_t; +typedef int __kernel_clockid_t; +typedef int __kernel_daddr_t; +/* Note these change from narrow to wide kernels */ +#ifdef CONFIG_64BIT +typedef unsigned long __kernel_size_t; +typedef long __kernel_ssize_t; +typedef long __kernel_ptrdiff_t; +typedef long __kernel_time_t; +#else +typedef unsigned int __kernel_size_t; +typedef int __kernel_ssize_t; +typedef int __kernel_ptrdiff_t; +typedef long __kernel_time_t; +#endif +typedef char * __kernel_caddr_t; + +typedef unsigned short __kernel_uid16_t; +typedef unsigned short __kernel_gid16_t; +typedef unsigned int __kernel_uid32_t; +typedef unsigned int __kernel_gid32_t; + +#ifdef __GNUC__ +typedef long long __kernel_loff_t; +typedef long long __kernel_off64_t; +typedef unsigned long long __kernel_ino64_t; +#endif + +typedef unsigned int __kernel_old_dev_t; + +typedef struct { + int val[2]; +} __kernel_fsid_t; + +/* compatibility stuff */ +typedef __kernel_uid_t __kernel_old_uid_t; +typedef __kernel_gid_t __kernel_old_gid_t; + +#if defined(__KERNEL__) + +#undef __FD_SET +static __inline__ void __FD_SET(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] |= (1UL<<__rem); +} + +#undef __FD_CLR +static __inline__ void __FD_CLR(unsigned long __fd, __kernel_fd_set *__fdsetp) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + __fdsetp->fds_bits[__tmp] &= ~(1UL<<__rem); +} + +#undef __FD_ISSET +static __inline__ int __FD_ISSET(unsigned long __fd, const __kernel_fd_set *__p) +{ + unsigned long __tmp = __fd / __NFDBITS; + unsigned long __rem = __fd % __NFDBITS; + return (__p->fds_bits[__tmp] & (1UL<<__rem)) != 0; +} + +/* + * This will unroll the loop for the normal constant case (8 ints, + * for a 256-bit fd_set) + */ +#undef __FD_ZERO +static __inline__ void __FD_ZERO(__kernel_fd_set *__p) +{ + unsigned long *__tmp = __p->fds_bits; + int __i; + + if (__builtin_constant_p(__FDSET_LONGS)) { + switch (__FDSET_LONGS) { + case 16: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + __tmp[ 8] = 0; __tmp[ 9] = 0; + __tmp[10] = 0; __tmp[11] = 0; + __tmp[12] = 0; __tmp[13] = 0; + __tmp[14] = 0; __tmp[15] = 0; + return; + + case 8: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + __tmp[ 4] = 0; __tmp[ 5] = 0; + __tmp[ 6] = 0; __tmp[ 7] = 0; + return; + + case 4: + __tmp[ 0] = 0; __tmp[ 1] = 0; + __tmp[ 2] = 0; __tmp[ 3] = 0; + return; + } + } + __i = __FDSET_LONGS; + while (__i) { + __i--; + *__tmp = 0; + __tmp++; + } +} + +#endif /* defined(__KERNEL__) */ + +#endif diff --git a/arch/parisc/include/asm/prefetch.h b/arch/parisc/include/asm/prefetch.h new file mode 100644 index 00000000000..c5edc60c059 --- /dev/null +++ b/arch/parisc/include/asm/prefetch.h @@ -0,0 +1,39 @@ +/* + * include/asm-parisc/prefetch.h + * + * PA 2.0 defines data prefetch instructions on page 6-11 of the Kane book. + * In addition, many implementations do hardware prefetching of both + * instructions and data. + * + * PA7300LC (page 14-4 of the ERS) also implements prefetching by a load + * to gr0 but not in a way that Linux can use. If the load would cause an + * interruption (eg due to prefetching 0), it is suppressed on PA2.0 + * processors, but not on 7300LC. + * + */ + +#ifndef __ASM_PARISC_PREFETCH_H +#define __ASM_PARISC_PREFETCH_H + +#ifndef __ASSEMBLY__ +#ifdef CONFIG_PREFETCH + +#define ARCH_HAS_PREFETCH +static inline void prefetch(const void *addr) +{ + __asm__("ldw 0(%0), %%r0" : : "r" (addr)); +} + +/* LDD is a PA2.0 addition. */ +#ifdef CONFIG_PA20 +#define ARCH_HAS_PREFETCHW +static inline void prefetchw(const void *addr) +{ + __asm__("ldd 0(%0), %%r0" : : "r" (addr)); +} +#endif /* CONFIG_PA20 */ + +#endif /* CONFIG_PREFETCH */ +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_PARISC_PROCESSOR_H */ diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h new file mode 100644 index 00000000000..3c9d34844c8 --- /dev/null +++ b/arch/parisc/include/asm/processor.h @@ -0,0 +1,357 @@ +/* + * include/asm-parisc/processor.h + * + * Copyright (C) 1994 Linus Torvalds + * Copyright (C) 2001 Grant Grundler + */ + +#ifndef __ASM_PARISC_PROCESSOR_H +#define __ASM_PARISC_PROCESSOR_H + +#ifndef __ASSEMBLY__ +#include + +#include +#include +#include +#include +#include +#include +#endif /* __ASSEMBLY__ */ + +#define KERNEL_STACK_SIZE (4*PAGE_SIZE) + +/* + * Default implementation of macro that returns current + * instruction pointer ("program counter"). + */ +#ifdef CONFIG_PA20 +#define current_ia(x) __asm__("mfia %0" : "=r"(x)) +#else /* mfia added in pa2.0 */ +#define current_ia(x) __asm__("blr 0,%0\n\tnop" : "=r"(x)) +#endif +#define current_text_addr() ({ void *pc; current_ia(pc); pc; }) + +#define TASK_SIZE_OF(tsk) ((tsk)->thread.task_size) +#define TASK_SIZE TASK_SIZE_OF(current) +#define TASK_UNMAPPED_BASE (current->thread.map_base) + +#define DEFAULT_TASK_SIZE32 (0xFFF00000UL) +#define DEFAULT_MAP_BASE32 (0x40000000UL) + +#ifdef CONFIG_64BIT +#define DEFAULT_TASK_SIZE (MAX_ADDRESS-0xf000000) +#define DEFAULT_MAP_BASE (0x200000000UL) +#else +#define DEFAULT_TASK_SIZE DEFAULT_TASK_SIZE32 +#define DEFAULT_MAP_BASE DEFAULT_MAP_BASE32 +#endif + +#ifdef __KERNEL__ + +/* XXX: STACK_TOP actually should be STACK_BOTTOM for parisc. + * prumpf */ + +#define STACK_TOP TASK_SIZE +#define STACK_TOP_MAX DEFAULT_TASK_SIZE + +#endif + +#ifndef __ASSEMBLY__ + +/* + * Data detected about CPUs at boot time which is the same for all CPU's. + * HP boxes are SMP - ie identical processors. + * + * FIXME: some CPU rev info may be processor specific... + */ +struct system_cpuinfo_parisc { + unsigned int cpu_count; + unsigned int cpu_hz; + unsigned int hversion; + unsigned int sversion; + enum cpu_type cpu_type; + + struct { + struct pdc_model model; + unsigned long versions; + unsigned long cpuid; + unsigned long capabilities; + char sys_model_name[81]; /* PDC-ROM returnes this model name */ + } pdc; + + const char *cpu_name; /* e.g. "PA7300LC (PCX-L2)" */ + const char *family_name; /* e.g. "1.1e" */ +}; + + +/* Per CPU data structure - ie varies per CPU. */ +struct cpuinfo_parisc { + unsigned long it_value; /* Interval Timer at last timer Intr */ + unsigned long it_delta; /* Interval delta (tic_10ms / HZ * 100) */ + unsigned long irq_count; /* number of IRQ's since boot */ + unsigned long irq_max_cr16; /* longest time to handle a single IRQ */ + unsigned long cpuid; /* aka slot_number or set to NO_PROC_ID */ + unsigned long hpa; /* Host Physical address */ + unsigned long txn_addr; /* MMIO addr of EIR or id_eid */ +#ifdef CONFIG_SMP + unsigned long pending_ipi; /* bitmap of type ipi_message_type */ + unsigned long ipi_count; /* number ipi Interrupts */ +#endif + unsigned long bh_count; /* number of times bh was invoked */ + unsigned long prof_counter; /* per CPU profiling support */ + unsigned long prof_multiplier; /* per CPU profiling support */ + unsigned long fp_rev; + unsigned long fp_model; + unsigned int state; + struct parisc_device *dev; + unsigned long loops_per_jiffy; +}; + +extern struct system_cpuinfo_parisc boot_cpu_data; +extern struct cpuinfo_parisc cpu_data[NR_CPUS]; +#define current_cpu_data cpu_data[smp_processor_id()] + +#define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF) + +typedef struct { + int seg; +} mm_segment_t; + +#define ARCH_MIN_TASKALIGN 8 + +struct thread_struct { + struct pt_regs regs; + unsigned long task_size; + unsigned long map_base; + unsigned long flags; +}; + +/* Thread struct flags. */ +#define PARISC_UAC_NOPRINT (1UL << 0) /* see prctl and unaligned.c */ +#define PARISC_UAC_SIGBUS (1UL << 1) +#define PARISC_KERNEL_DEATH (1UL << 31) /* see die_if_kernel()... */ + +#define PARISC_UAC_SHIFT 0 +#define PARISC_UAC_MASK (PARISC_UAC_NOPRINT|PARISC_UAC_SIGBUS) + +#define SET_UNALIGN_CTL(task,value) \ + ({ \ + (task)->thread.flags = (((task)->thread.flags & ~PARISC_UAC_MASK) \ + | (((value) << PARISC_UAC_SHIFT) & \ + PARISC_UAC_MASK)); \ + 0; \ + }) + +#define GET_UNALIGN_CTL(task,addr) \ + ({ \ + put_user(((task)->thread.flags & PARISC_UAC_MASK) \ + >> PARISC_UAC_SHIFT, (int __user *) (addr)); \ + }) + +#define INIT_THREAD { \ + .regs = { .gr = { 0, }, \ + .fr = { 0, }, \ + .sr = { 0, }, \ + .iasq = { 0, }, \ + .iaoq = { 0, }, \ + .cr27 = 0, \ + }, \ + .task_size = DEFAULT_TASK_SIZE, \ + .map_base = DEFAULT_MAP_BASE, \ + .flags = 0 \ + } + +/* + * Return saved PC of a blocked thread. This is used by ps mostly. + */ + +unsigned long thread_saved_pc(struct task_struct *t); +void show_trace(struct task_struct *task, unsigned long *stack); + +/* + * Start user thread in another space. + * + * Note that we set both the iaoq and r31 to the new pc. When + * the kernel initially calls execve it will return through an + * rfi path that will use the values in the iaoq. The execve + * syscall path will return through the gateway page, and + * that uses r31 to branch to. + * + * For ELF we clear r23, because the dynamic linker uses it to pass + * the address of the finalizer function. + * + * We also initialize sr3 to an illegal value (illegal for our + * implementation, not for the architecture). + */ +typedef unsigned int elf_caddr_t; + +#define start_thread_som(regs, new_pc, new_sp) do { \ + unsigned long *sp = (unsigned long *)new_sp; \ + __u32 spaceid = (__u32)current->mm->context; \ + unsigned long pc = (unsigned long)new_pc; \ + /* offset pc for priv. level */ \ + pc |= 3; \ + \ + set_fs(USER_DS); \ + regs->iasq[0] = spaceid; \ + regs->iasq[1] = spaceid; \ + regs->iaoq[0] = pc; \ + regs->iaoq[1] = pc + 4; \ + regs->sr[2] = LINUX_GATEWAY_SPACE; \ + regs->sr[3] = 0xffff; \ + regs->sr[4] = spaceid; \ + regs->sr[5] = spaceid; \ + regs->sr[6] = spaceid; \ + regs->sr[7] = spaceid; \ + regs->gr[ 0] = USER_PSW; \ + regs->gr[30] = ((new_sp)+63)&~63; \ + regs->gr[31] = pc; \ + \ + get_user(regs->gr[26],&sp[0]); \ + get_user(regs->gr[25],&sp[-1]); \ + get_user(regs->gr[24],&sp[-2]); \ + get_user(regs->gr[23],&sp[-3]); \ +} while(0) + +/* The ELF abi wants things done a "wee bit" differently than + * som does. Supporting this behavior here avoids + * having our own version of create_elf_tables. + * + * Oh, and yes, that is not a typo, we are really passing argc in r25 + * and argv in r24 (rather than r26 and r25). This is because that's + * where __libc_start_main wants them. + * + * Duplicated from dl-machine.h for the benefit of readers: + * + * Our initial stack layout is rather different from everyone else's + * due to the unique PA-RISC ABI. As far as I know it looks like + * this: + + ----------------------------------- (user startup code creates this frame) + | 32 bytes of magic | + |---------------------------------| + | 32 bytes argument/sp save area | + |---------------------------------| (bprm->p) + | ELF auxiliary info | + | (up to 28 words) | + |---------------------------------| + | NULL | + |---------------------------------| + | Environment pointers | + |---------------------------------| + | NULL | + |---------------------------------| + | Argument pointers | + |---------------------------------| <- argv + | argc (1 word) | + |---------------------------------| <- bprm->exec (HACK!) + | N bytes of slack | + |---------------------------------| + | filename passed to execve | + |---------------------------------| (mm->env_end) + | env strings | + |---------------------------------| (mm->env_start, mm->arg_end) + | arg strings | + |---------------------------------| + | additional faked arg strings if | + | we're invoked via binfmt_script | + |---------------------------------| (mm->arg_start) + stack base is at TASK_SIZE - rlim_max. + +on downward growing arches, it looks like this: + stack base at TASK_SIZE + | filename passed to execve + | env strings + | arg strings + | faked arg strings + | slack + | ELF + | envps + | argvs + | argc + + * The pleasant part of this is that if we need to skip arguments we + * can just decrement argc and move argv, because the stack pointer + * is utterly unrelated to the location of the environment and + * argument vectors. + * + * Note that the S/390 people took the easy way out and hacked their + * GCC to make the stack grow downwards. + * + * Final Note: For entry from syscall, the W (wide) bit of the PSW + * is stuffed into the lowest bit of the user sp (%r30), so we fill + * it in here from the current->personality + */ + +#ifdef CONFIG_64BIT +#define USER_WIDE_MODE (!test_thread_flag(TIF_32BIT)) +#else +#define USER_WIDE_MODE 0 +#endif + +#define start_thread(regs, new_pc, new_sp) do { \ + elf_addr_t *sp = (elf_addr_t *)new_sp; \ + __u32 spaceid = (__u32)current->mm->context; \ + elf_addr_t pc = (elf_addr_t)new_pc | 3; \ + elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1; \ + \ + set_fs(USER_DS); \ + regs->iasq[0] = spaceid; \ + regs->iasq[1] = spaceid; \ + regs->iaoq[0] = pc; \ + regs->iaoq[1] = pc + 4; \ + regs->sr[2] = LINUX_GATEWAY_SPACE; \ + regs->sr[3] = 0xffff; \ + regs->sr[4] = spaceid; \ + regs->sr[5] = spaceid; \ + regs->sr[6] = spaceid; \ + regs->sr[7] = spaceid; \ + regs->gr[ 0] = USER_PSW | (USER_WIDE_MODE ? PSW_W : 0); \ + regs->fr[ 0] = 0LL; \ + regs->fr[ 1] = 0LL; \ + regs->fr[ 2] = 0LL; \ + regs->fr[ 3] = 0LL; \ + regs->gr[30] = (((unsigned long)sp + 63) &~ 63) | (USER_WIDE_MODE ? 1 : 0); \ + regs->gr[31] = pc; \ + \ + get_user(regs->gr[25], (argv - 1)); \ + regs->gr[24] = (long) argv; \ + regs->gr[23] = 0; \ +} while(0) + +struct task_struct; +struct mm_struct; + +/* Free all resources held by a thread. */ +extern void release_thread(struct task_struct *); +extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + +/* Prepare to copy thread state - unlazy all lazy status */ +#define prepare_to_copy(tsk) do { } while (0) + +extern void map_hpux_gateway_page(struct task_struct *tsk, struct mm_struct *mm); + +extern unsigned long get_wchan(struct task_struct *p); + +#define KSTK_EIP(tsk) ((tsk)->thread.regs.iaoq[0]) +#define KSTK_ESP(tsk) ((tsk)->thread.regs.gr[30]) + +#define cpu_relax() barrier() + +/* Used as a macro to identify the combined VIPT/PIPT cached + * CPUs which require a guarantee of coherency (no inequivalent + * aliases with different data, whether clean or not) to operate */ +static inline int parisc_requires_coherency(void) +{ +#ifdef CONFIG_PA8X00 + return (boot_cpu_data.cpu_type == mako) || + (boot_cpu_data.cpu_type == mako2); +#else + return 0; +#endif +} + +#endif /* __ASSEMBLY__ */ + +#endif /* __ASM_PARISC_PROCESSOR_H */ diff --git a/arch/parisc/include/asm/psw.h b/arch/parisc/include/asm/psw.h new file mode 100644 index 00000000000..5a3e23c9ce6 --- /dev/null +++ b/arch/parisc/include/asm/psw.h @@ -0,0 +1,62 @@ +#ifndef _PARISC_PSW_H + + +#define PSW_I 0x00000001 +#define PSW_D 0x00000002 +#define PSW_P 0x00000004 +#define PSW_Q 0x00000008 + +#define PSW_R 0x00000010 +#define PSW_F 0x00000020 +#define PSW_G 0x00000040 /* PA1.x only */ +#define PSW_O 0x00000080 /* PA2.0 only */ + +/* ssm/rsm instructions number PSW_W and PSW_E differently */ +#define PSW_SM_I PSW_I /* Enable External Interrupts */ +#define PSW_SM_D PSW_D +#define PSW_SM_P PSW_P +#define PSW_SM_Q PSW_Q /* Enable Interrupt State Collection */ +#define PSW_SM_R PSW_R /* Enable Recover Counter Trap */ +#define PSW_SM_W 0x200 /* PA2.0 only : Enable Wide Mode */ + +#define PSW_SM_QUIET PSW_SM_R+PSW_SM_Q+PSW_SM_P+PSW_SM_D+PSW_SM_I + +#define PSW_CB 0x0000ff00 + +#define PSW_M 0x00010000 +#define PSW_V 0x00020000 +#define PSW_C 0x00040000 +#define PSW_B 0x00080000 + +#define PSW_X 0x00100000 +#define PSW_N 0x00200000 +#define PSW_L 0x00400000 +#define PSW_H 0x00800000 + +#define PSW_T 0x01000000 +#define PSW_S 0x02000000 +#define PSW_E 0x04000000 +#define PSW_W 0x08000000 /* PA2.0 only */ +#define PSW_W_BIT 36 /* PA2.0 only */ + +#define PSW_Z 0x40000000 /* PA1.x only */ +#define PSW_Y 0x80000000 /* PA1.x only */ + +#ifdef CONFIG_64BIT +# define PSW_HI_CB 0x000000ff /* PA2.0 only */ +#endif + +#ifdef CONFIG_64BIT +# define USER_PSW_HI_MASK PSW_HI_CB +# define WIDE_PSW PSW_W +#else +# define WIDE_PSW 0 +#endif + +/* Used when setting up for rfi */ +#define KERNEL_PSW (WIDE_PSW | PSW_C | PSW_Q | PSW_P | PSW_D) +#define REAL_MODE_PSW (WIDE_PSW | PSW_Q) +#define USER_PSW_MASK (WIDE_PSW | PSW_T | PSW_N | PSW_X | PSW_B | PSW_V | PSW_CB) +#define USER_PSW (PSW_C | PSW_Q | PSW_P | PSW_D | PSW_I) + +#endif diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h new file mode 100644 index 00000000000..3e94c5d85ff --- /dev/null +++ b/arch/parisc/include/asm/ptrace.h @@ -0,0 +1,58 @@ +#ifndef _PARISC_PTRACE_H +#define _PARISC_PTRACE_H + +/* written by Philipp Rumpf, Copyright (C) 1999 SuSE GmbH Nuernberg +** Copyright (C) 2000 Grant Grundler, Hewlett-Packard +*/ + +#include + +/* This struct defines the way the registers are stored on the + * stack during a system call. + * + * N.B. gdb/strace care about the size and offsets within this + * structure. If you change things, you may break object compatibility + * for those applications. + */ + +struct pt_regs { + unsigned long gr[32]; /* PSW is in gr[0] */ + __u64 fr[32]; + unsigned long sr[ 8]; + unsigned long iasq[2]; + unsigned long iaoq[2]; + unsigned long cr27; + unsigned long pad0; /* available for other uses */ + unsigned long orig_r28; + unsigned long ksp; + unsigned long kpc; + unsigned long sar; /* CR11 */ + unsigned long iir; /* CR19 */ + unsigned long isr; /* CR20 */ + unsigned long ior; /* CR21 */ + unsigned long ipsw; /* CR22 */ +}; + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in . + * + * These ones are taken from IA-64 on the assumption that theirs are + * the most correct (and we also want to support PTRACE_SINGLEBLOCK + * since we have taken branch traps too) + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ + +#ifdef __KERNEL__ + +#define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS)) + +/* XXX should we use iaoq[1] or iaoq[0] ? */ +#define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) +#define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) +#define instruction_pointer(regs) ((regs)->iaoq[0] & ~3) +unsigned long profile_pc(struct pt_regs *); +extern void show_regs(struct pt_regs *); +#endif + +#endif diff --git a/arch/parisc/include/asm/real.h b/arch/parisc/include/asm/real.h new file mode 100644 index 00000000000..82acb25db39 --- /dev/null +++ b/arch/parisc/include/asm/real.h @@ -0,0 +1,5 @@ +#ifndef _PARISC_REAL_H +#define _PARISC_REAL_H + + +#endif diff --git a/arch/parisc/include/asm/resource.h b/arch/parisc/include/asm/resource.h new file mode 100644 index 00000000000..8b06343b62e --- /dev/null +++ b/arch/parisc/include/asm/resource.h @@ -0,0 +1,7 @@ +#ifndef _ASM_PARISC_RESOURCE_H +#define _ASM_PARISC_RESOURCE_H + +#define _STK_LIM_MAX 10 * _STK_LIM +#include + +#endif diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h new file mode 100644 index 00000000000..09f51d5ab57 --- /dev/null +++ b/arch/parisc/include/asm/ropes.h @@ -0,0 +1,322 @@ +#ifndef _ASM_PARISC_ROPES_H_ +#define _ASM_PARISC_ROPES_H_ + +#include + +#ifdef CONFIG_64BIT +/* "low end" PA8800 machines use ZX1 chipset: PAT PDC and only run 64-bit */ +#define ZX1_SUPPORT +#endif + +#ifdef CONFIG_PROC_FS +/* depends on proc fs support. But costs CPU performance */ +#undef SBA_COLLECT_STATS +#endif + +/* +** The number of pdir entries to "free" before issuing +** a read to PCOM register to flush out PCOM writes. +** Interacts with allocation granularity (ie 4 or 8 entries +** allocated and free'd/purged at a time might make this +** less interesting). +*/ +#define DELAYED_RESOURCE_CNT 16 + +#define MAX_IOC 2 /* per Ike. Pluto/Astro only have 1. */ +#define ROPES_PER_IOC 8 /* per Ike half or Pluto/Astro */ + +struct ioc { + void __iomem *ioc_hpa; /* I/O MMU base address */ + char *res_map; /* resource map, bit == pdir entry */ + u64 *pdir_base; /* physical base address */ + unsigned long ibase; /* pdir IOV Space base - shared w/lba_pci */ + unsigned long imask; /* pdir IOV Space mask - shared w/lba_pci */ +#ifdef ZX1_SUPPORT + unsigned long iovp_mask; /* help convert IOVA to IOVP */ +#endif + unsigned long *res_hint; /* next avail IOVP - circular search */ + spinlock_t res_lock; + unsigned int res_bitshift; /* from the LEFT! */ + unsigned int res_size; /* size of resource map in bytes */ +#ifdef SBA_HINT_SUPPORT +/* FIXME : DMA HINTs not used */ + unsigned long hint_mask_pdir; /* bits used for DMA hints */ + unsigned int hint_shift_pdir; +#endif +#if DELAYED_RESOURCE_CNT > 0 + int saved_cnt; + struct sba_dma_pair { + dma_addr_t iova; + size_t size; + } saved[DELAYED_RESOURCE_CNT]; +#endif + +#ifdef SBA_COLLECT_STATS +#define SBA_SEARCH_SAMPLE 0x100 + unsigned long avg_search[SBA_SEARCH_SAMPLE]; + unsigned long avg_idx; /* current index into avg_search */ + unsigned long used_pages; + unsigned long msingle_calls; + unsigned long msingle_pages; + unsigned long msg_calls; + unsigned long msg_pages; + unsigned long usingle_calls; + unsigned long usingle_pages; + unsigned long usg_calls; + unsigned long usg_pages; +#endif + /* STUFF We don't need in performance path */ + unsigned int pdir_size; /* in bytes, determined by IOV Space size */ +}; + +struct sba_device { + struct sba_device *next; /* list of SBA's in system */ + struct parisc_device *dev; /* dev found in bus walk */ + const char *name; + void __iomem *sba_hpa; /* base address */ + spinlock_t sba_lock; + unsigned int flags; /* state/functionality enabled */ + unsigned int hw_rev; /* HW revision of chip */ + + struct resource chip_resv; /* MMIO reserved for chip */ + struct resource iommu_resv; /* MMIO reserved for iommu */ + + unsigned int num_ioc; /* number of on-board IOC's */ + struct ioc ioc[MAX_IOC]; +}; + +#define ASTRO_RUNWAY_PORT 0x582 +#define IKE_MERCED_PORT 0x803 +#define REO_MERCED_PORT 0x804 +#define REOG_MERCED_PORT 0x805 +#define PLUTO_MCKINLEY_PORT 0x880 + +static inline int IS_ASTRO(struct parisc_device *d) { + return d->id.hversion == ASTRO_RUNWAY_PORT; +} + +static inline int IS_IKE(struct parisc_device *d) { + return d->id.hversion == IKE_MERCED_PORT; +} + +static inline int IS_PLUTO(struct parisc_device *d) { + return d->id.hversion == PLUTO_MCKINLEY_PORT; +} + +#define PLUTO_IOVA_BASE (1UL*1024*1024*1024) /* 1GB */ +#define PLUTO_IOVA_SIZE (1UL*1024*1024*1024) /* 1GB */ +#define PLUTO_GART_SIZE (PLUTO_IOVA_SIZE / 2) + +#define SBA_PDIR_VALID_BIT 0x8000000000000000ULL + +#define SBA_AGPGART_COOKIE 0x0000badbadc0ffeeULL + +#define SBA_FUNC_ID 0x0000 /* function id */ +#define SBA_FCLASS 0x0008 /* function class, bist, header, rev... */ + +#define SBA_FUNC_SIZE 4096 /* SBA configuration function reg set */ + +#define ASTRO_IOC_OFFSET (32 * SBA_FUNC_SIZE) +#define PLUTO_IOC_OFFSET (1 * SBA_FUNC_SIZE) +/* Ike's IOC's occupy functions 2 and 3 */ +#define IKE_IOC_OFFSET(p) ((p+2) * SBA_FUNC_SIZE) + +#define IOC_CTRL 0x8 /* IOC_CTRL offset */ +#define IOC_CTRL_TC (1 << 0) /* TOC Enable */ +#define IOC_CTRL_CE (1 << 1) /* Coalesce Enable */ +#define IOC_CTRL_DE (1 << 2) /* Dillon Enable */ +#define IOC_CTRL_RM (1 << 8) /* Real Mode */ +#define IOC_CTRL_NC (1 << 9) /* Non Coherent Mode */ +#define IOC_CTRL_D4 (1 << 11) /* Disable 4-byte coalescing */ +#define IOC_CTRL_DD (1 << 13) /* Disable distr. LMMIO range coalescing */ + +/* +** Offsets into MBIB (Function 0 on Ike and hopefully Astro) +** Firmware programs this stuff. Don't touch it. +*/ +#define LMMIO_DIRECT0_BASE 0x300 +#define LMMIO_DIRECT0_MASK 0x308 +#define LMMIO_DIRECT0_ROUTE 0x310 + +#define LMMIO_DIST_BASE 0x360 +#define LMMIO_DIST_MASK 0x368 +#define LMMIO_DIST_ROUTE 0x370 + +#define IOS_DIST_BASE 0x390 +#define IOS_DIST_MASK 0x398 +#define IOS_DIST_ROUTE 0x3A0 + +#define IOS_DIRECT_BASE 0x3C0 +#define IOS_DIRECT_MASK 0x3C8 +#define IOS_DIRECT_ROUTE 0x3D0 + +/* +** Offsets into I/O TLB (Function 2 and 3 on Ike) +*/ +#define ROPE0_CTL 0x200 /* "regbus pci0" */ +#define ROPE1_CTL 0x208 +#define ROPE2_CTL 0x210 +#define ROPE3_CTL 0x218 +#define ROPE4_CTL 0x220 +#define ROPE5_CTL 0x228 +#define ROPE6_CTL 0x230 +#define ROPE7_CTL 0x238 + +#define IOC_ROPE0_CFG 0x500 /* pluto only */ +#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ + +#define HF_ENABLE 0x40 + +#define IOC_IBASE 0x300 /* IO TLB */ +#define IOC_IMASK 0x308 +#define IOC_PCOM 0x310 +#define IOC_TCNFG 0x318 +#define IOC_PDIR_BASE 0x320 + +/* +** IOC supports 4/8/16/64KB page sizes (see TCNFG register) +** It's safer (avoid memory corruption) to keep DMA page mappings +** equivalently sized to VM PAGE_SIZE. +** +** We really can't avoid generating a new mapping for each +** page since the Virtual Coherence Index has to be generated +** and updated for each page. +** +** PAGE_SIZE could be greater than IOVP_SIZE. But not the inverse. +*/ +#define IOVP_SIZE PAGE_SIZE +#define IOVP_SHIFT PAGE_SHIFT +#define IOVP_MASK PAGE_MASK + +#define SBA_PERF_CFG 0x708 /* Performance Counter stuff */ +#define SBA_PERF_MASK1 0x718 +#define SBA_PERF_MASK2 0x730 + +/* +** Offsets into PCI Performance Counters (functions 12 and 13) +** Controlled by PERF registers in function 2 & 3 respectively. +*/ +#define SBA_PERF_CNT1 0x200 +#define SBA_PERF_CNT2 0x208 +#define SBA_PERF_CNT3 0x210 + +/* +** lba_device: Per instance Elroy data structure +*/ +struct lba_device { + struct pci_hba_data hba; + + spinlock_t lba_lock; + void *iosapic_obj; + +#ifdef CONFIG_64BIT + void __iomem *iop_base; /* PA_VIEW - for IO port accessor funcs */ +#endif + + int flags; /* state/functionality enabled */ + int hw_rev; /* HW revision of chip */ +}; + +#define ELROY_HVERS 0x782 +#define MERCURY_HVERS 0x783 +#define QUICKSILVER_HVERS 0x784 + +static inline int IS_ELROY(struct parisc_device *d) { + return (d->id.hversion == ELROY_HVERS); +} + +static inline int IS_MERCURY(struct parisc_device *d) { + return (d->id.hversion == MERCURY_HVERS); +} + +static inline int IS_QUICKSILVER(struct parisc_device *d) { + return (d->id.hversion == QUICKSILVER_HVERS); +} + +static inline int agp_mode_mercury(void __iomem *hpa) { + u64 bus_mode; + + bus_mode = readl(hpa + 0x0620); + if (bus_mode & 1) + return 1; + + return 0; +} + +/* +** I/O SAPIC init function +** Caller knows where an I/O SAPIC is. LBA has an integrated I/O SAPIC. +** Call setup as part of per instance initialization. +** (ie *not* init_module() function unless only one is present.) +** fixup_irq is to initialize PCI IRQ line support and +** virtualize pcidev->irq value. To be called by pci_fixup_bus(). +*/ +extern void *iosapic_register(unsigned long hpa); +extern int iosapic_fixup_irq(void *obj, struct pci_dev *pcidev); + +#define LBA_FUNC_ID 0x0000 /* function id */ +#define LBA_FCLASS 0x0008 /* function class, bist, header, rev... */ +#define LBA_CAPABLE 0x0030 /* capabilities register */ + +#define LBA_PCI_CFG_ADDR 0x0040 /* poke CFG address here */ +#define LBA_PCI_CFG_DATA 0x0048 /* read or write data here */ + +#define LBA_PMC_MTLT 0x0050 /* Firmware sets this - read only. */ +#define LBA_FW_SCRATCH 0x0058 /* Firmware writes the PCI bus number here. */ +#define LBA_ERROR_ADDR 0x0070 /* On error, address gets logged here */ + +#define LBA_ARB_MASK 0x0080 /* bit 0 enable arbitration. PAT/PDC enables */ +#define LBA_ARB_PRI 0x0088 /* firmware sets this. */ +#define LBA_ARB_MODE 0x0090 /* firmware sets this. */ +#define LBA_ARB_MTLT 0x0098 /* firmware sets this. */ + +#define LBA_MOD_ID 0x0100 /* Module ID. PDC_PAT_CELL reports 4 */ + +#define LBA_STAT_CTL 0x0108 /* Status & Control */ +#define LBA_BUS_RESET 0x01 /* Deassert PCI Bus Reset Signal */ +#define CLEAR_ERRLOG 0x10 /* "Clear Error Log" cmd */ +#define CLEAR_ERRLOG_ENABLE 0x20 /* "Clear Error Log" Enable */ +#define HF_ENABLE 0x40 /* enable HF mode (default is -1 mode) */ + +#define LBA_LMMIO_BASE 0x0200 /* < 4GB I/O address range */ +#define LBA_LMMIO_MASK 0x0208 + +#define LBA_GMMIO_BASE 0x0210 /* > 4GB I/O address range */ +#define LBA_GMMIO_MASK 0x0218 + +#define LBA_WLMMIO_BASE 0x0220 /* All < 4GB ranges under the same *SBA* */ +#define LBA_WLMMIO_MASK 0x0228 + +#define LBA_WGMMIO_BASE 0x0230 /* All > 4GB ranges under the same *SBA* */ +#define LBA_WGMMIO_MASK 0x0238 + +#define LBA_IOS_BASE 0x0240 /* I/O port space for this LBA */ +#define LBA_IOS_MASK 0x0248 + +#define LBA_ELMMIO_BASE 0x0250 /* Extra LMMIO range */ +#define LBA_ELMMIO_MASK 0x0258 + +#define LBA_EIOS_BASE 0x0260 /* Extra I/O port space */ +#define LBA_EIOS_MASK 0x0268 + +#define LBA_GLOBAL_MASK 0x0270 /* Mercury only: Global Address Mask */ +#define LBA_DMA_CTL 0x0278 /* firmware sets this */ + +#define LBA_IBASE 0x0300 /* SBA DMA support */ +#define LBA_IMASK 0x0308 + +/* FIXME: ignore DMA Hint stuff until we can measure performance */ +#define LBA_HINT_CFG 0x0310 +#define LBA_HINT_BASE 0x0380 /* 14 registers at every 8 bytes. */ + +#define LBA_BUS_MODE 0x0620 + +/* ERROR regs are needed for config cycle kluges */ +#define LBA_ERROR_CONFIG 0x0680 +#define LBA_SMART_MODE 0x20 +#define LBA_ERROR_STATUS 0x0688 +#define LBA_ROPE_CTL 0x06A0 + +#define LBA_IOSAPIC_BASE 0x800 /* Offset of IRQ logic */ + +#endif /*_ASM_PARISC_ROPES_H_*/ diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h new file mode 100644 index 00000000000..f0dd3b30f6c --- /dev/null +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -0,0 +1,23 @@ +#ifndef _ASM_PARISC_RT_SIGFRAME_H +#define _ASM_PARISC_RT_SIGFRAME_H + +#define SIGRETURN_TRAMP 4 +#define SIGRESTARTBLOCK_TRAMP 5 +#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) + +struct rt_sigframe { + /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c + Secondary to that it must protect the ERESTART_RESTARTBLOCK + trampoline we left on the stack (we were bad and didn't + change sp so we could run really fast.) */ + unsigned int tramp[TRAMP_SIZE]; + struct siginfo info; + struct ucontext uc; +}; + +#define SIGFRAME 128 +#define FUNCTIONCALLFRAME 96 +#define PARISC_RT_SIGFRAME_SIZE \ + (((sizeof(struct rt_sigframe) + FUNCTIONCALLFRAME) + SIGFRAME) & -SIGFRAME) + +#endif diff --git a/arch/parisc/include/asm/rtc.h b/arch/parisc/include/asm/rtc.h new file mode 100644 index 00000000000..099d641a42c --- /dev/null +++ b/arch/parisc/include/asm/rtc.h @@ -0,0 +1,131 @@ +/* + * include/asm-parisc/rtc.h + * + * Copyright 2002 Randolph CHung + * + * Based on: include/asm-ppc/rtc.h and the genrtc driver in the + * 2.4 parisc linux tree + */ + +#ifndef __ASM_RTC_H__ +#define __ASM_RTC_H__ + +#ifdef __KERNEL__ + +#include + +#include + +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) + + +#define RTC_PIE 0x40 /* periodic interrupt enable */ +#define RTC_AIE 0x20 /* alarm interrupt enable */ +#define RTC_UIE 0x10 /* update-finished interrupt enable */ + +#define RTC_BATT_BAD 0x100 /* battery bad */ + +/* some dummy definitions */ +#define RTC_SQWE 0x08 /* enable square-wave output */ +#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */ +#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */ +#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */ + +# define __isleap(year) \ + ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0)) + +/* How many days come before each month (0-12). */ +static const unsigned short int __mon_yday[2][13] = +{ + /* Normal years. */ + { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + /* Leap years. */ + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } +}; + +static inline unsigned int get_rtc_time(struct rtc_time *wtime) +{ + struct pdc_tod tod_data; + long int days, rem, y; + const unsigned short int *ip; + + memset(wtime, 0, sizeof(*wtime)); + if (pdc_tod_read(&tod_data) < 0) + return RTC_24H | RTC_BATT_BAD; + + // most of the remainder of this function is: +// Copyright (C) 1991, 1993, 1997, 1998 Free Software Foundation, Inc. +// This was originally a part of the GNU C Library. +// It is distributed under the GPL, and was swiped from offtime.c + + + days = tod_data.tod_sec / SECS_PER_DAY; + rem = tod_data.tod_sec % SECS_PER_DAY; + + wtime->tm_hour = rem / SECS_PER_HOUR; + rem %= SECS_PER_HOUR; + wtime->tm_min = rem / 60; + wtime->tm_sec = rem % 60; + + y = 1970; + +#define DIV(a, b) ((a) / (b) - ((a) % (b) < 0)) +#define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400)) + + while (days < 0 || days >= (__isleap (y) ? 366 : 365)) + { + /* Guess a corrected year, assuming 365 days per year. */ + long int yg = y + days / 365 - (days % 365 < 0); + + /* Adjust DAYS and Y to match the guessed year. */ + days -= ((yg - y) * 365 + + LEAPS_THRU_END_OF (yg - 1) + - LEAPS_THRU_END_OF (y - 1)); + y = yg; + } + wtime->tm_year = y - 1900; + + ip = __mon_yday[__isleap(y)]; + for (y = 11; days < (long int) ip[y]; --y) + continue; + days -= ip[y]; + wtime->tm_mon = y; + wtime->tm_mday = days + 1; + + return RTC_24H; +} + +static int set_rtc_time(struct rtc_time *wtime) +{ + u_int32_t secs; + + secs = mktime(wtime->tm_year + 1900, wtime->tm_mon + 1, wtime->tm_mday, + wtime->tm_hour, wtime->tm_min, wtime->tm_sec); + + if(pdc_tod_set(secs, 0) < 0) + return -1; + else + return 0; + +} + +static inline unsigned int get_rtc_ss(void) +{ + struct rtc_time h; + + get_rtc_time(&h); + return h.tm_sec; +} + +static inline int get_rtc_pll(struct rtc_pll_info *pll) +{ + return -EINVAL; +} +static inline int set_rtc_pll(struct rtc_pll_info *pll) +{ + return -EINVAL; +} + +#endif /* __KERNEL__ */ +#endif /* __ASM_RTC_H__ */ diff --git a/arch/parisc/include/asm/runway.h b/arch/parisc/include/asm/runway.h new file mode 100644 index 00000000000..5bea02da7e2 --- /dev/null +++ b/arch/parisc/include/asm/runway.h @@ -0,0 +1,12 @@ +#ifndef ASM_PARISC_RUNWAY_H +#define ASM_PARISC_RUNWAY_H +#ifdef __KERNEL__ + +/* declared in arch/parisc/kernel/setup.c */ +extern struct proc_dir_entry * proc_runway_root; + +#define RUNWAY_STATUS 0x10 +#define RUNWAY_DEBUG 0x40 + +#endif /* __KERNEL__ */ +#endif /* ASM_PARISC_RUNWAY_H */ diff --git a/arch/parisc/include/asm/scatterlist.h b/arch/parisc/include/asm/scatterlist.h new file mode 100644 index 00000000000..62269b31ebf --- /dev/null +++ b/arch/parisc/include/asm/scatterlist.h @@ -0,0 +1,27 @@ +#ifndef _ASM_PARISC_SCATTERLIST_H +#define _ASM_PARISC_SCATTERLIST_H + +#include +#include + +struct scatterlist { +#ifdef CONFIG_DEBUG_SG + unsigned long sg_magic; +#endif + unsigned long page_link; + unsigned int offset; + + unsigned int length; + + /* an IOVA can be 64-bits on some PA-Risc platforms. */ + dma_addr_t iova; /* I/O Virtual Address */ + __u32 iova_length; /* bytes mapped */ +}; + +#define sg_virt_addr(sg) ((unsigned long)sg_virt(sg)) +#define sg_dma_address(sg) ((sg)->iova) +#define sg_dma_len(sg) ((sg)->iova_length) + +#define ISA_DMA_THRESHOLD (~0UL) + +#endif /* _ASM_PARISC_SCATTERLIST_H */ diff --git a/arch/parisc/include/asm/sections.h b/arch/parisc/include/asm/sections.h new file mode 100644 index 00000000000..9d13c3507ad --- /dev/null +++ b/arch/parisc/include/asm/sections.h @@ -0,0 +1,12 @@ +#ifndef _PARISC_SECTIONS_H +#define _PARISC_SECTIONS_H + +/* nothing to see, move along */ +#include + +#ifdef CONFIG_64BIT +#undef dereference_function_descriptor +void *dereference_function_descriptor(void *); +#endif + +#endif diff --git a/arch/parisc/include/asm/segment.h b/arch/parisc/include/asm/segment.h new file mode 100644 index 00000000000..26794ddb652 --- /dev/null +++ b/arch/parisc/include/asm/segment.h @@ -0,0 +1,6 @@ +#ifndef __PARISC_SEGMENT_H +#define __PARISC_SEGMENT_H + +/* Only here because we have some old header files that expect it.. */ + +#endif diff --git a/arch/parisc/include/asm/sembuf.h b/arch/parisc/include/asm/sembuf.h new file mode 100644 index 00000000000..1e59ffd3bd1 --- /dev/null +++ b/arch/parisc/include/asm/sembuf.h @@ -0,0 +1,29 @@ +#ifndef _PARISC_SEMBUF_H +#define _PARISC_SEMBUF_H + +/* + * The semid64_ds structure for parisc architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ +#ifndef CONFIG_64BIT + unsigned int __pad1; +#endif + __kernel_time_t sem_otime; /* last semop time */ +#ifndef CONFIG_64BIT + unsigned int __pad2; +#endif + __kernel_time_t sem_ctime; /* last change time */ + unsigned int sem_nsems; /* no. of semaphores in array */ + unsigned int __unused1; + unsigned int __unused2; +}; + +#endif /* _PARISC_SEMBUF_H */ diff --git a/arch/parisc/include/asm/serial.h b/arch/parisc/include/asm/serial.h new file mode 100644 index 00000000000..d7e3cc60dbc --- /dev/null +++ b/arch/parisc/include/asm/serial.h @@ -0,0 +1,10 @@ +/* + * include/asm-parisc/serial.h + */ + +/* + * This is used for 16550-compatible UARTs + */ +#define BASE_BAUD ( 1843200 / 16 ) + +#define SERIAL_PORT_DFNS diff --git a/arch/parisc/include/asm/setup.h b/arch/parisc/include/asm/setup.h new file mode 100644 index 00000000000..7da2e5b8747 --- /dev/null +++ b/arch/parisc/include/asm/setup.h @@ -0,0 +1,6 @@ +#ifndef _PARISC_SETUP_H +#define _PARISC_SETUP_H + +#define COMMAND_LINE_SIZE 1024 + +#endif /* _PARISC_SETUP_H */ diff --git a/arch/parisc/include/asm/shmbuf.h b/arch/parisc/include/asm/shmbuf.h new file mode 100644 index 00000000000..0a3eada1863 --- /dev/null +++ b/arch/parisc/include/asm/shmbuf.h @@ -0,0 +1,58 @@ +#ifndef _PARISC_SHMBUF_H +#define _PARISC_SHMBUF_H + +/* + * The shmid64_ds structure for parisc architecture. + * Note extra padding because this structure is passed back and forth + * between kernel and user space. + * + * Pad space is left for: + * - 64-bit time_t to solve y2038 problem + * - 2 miscellaneous 32-bit values + */ + +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ +#ifndef CONFIG_64BIT + unsigned int __pad1; +#endif + __kernel_time_t shm_atime; /* last attach time */ +#ifndef CONFIG_64BIT + unsigned int __pad2; +#endif + __kernel_time_t shm_dtime; /* last detach time */ +#ifndef CONFIG_64BIT + unsigned int __pad3; +#endif + __kernel_time_t shm_ctime; /* last change time */ +#ifndef CONFIG_64BIT + unsigned int __pad4; +#endif + size_t shm_segsz; /* size of segment (bytes) */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned int shm_nattch; /* no. of current attaches */ + unsigned int __unused1; + unsigned int __unused2; +}; + +#ifdef CONFIG_64BIT +/* The 'unsigned int' (formerly 'unsigned long') data types below will + * ensure that a 32-bit app calling shmctl(*,IPC_INFO,*) will work on + * a wide kernel, but if some of these values are meant to contain pointers + * they may need to be 'long long' instead. -PB XXX FIXME + */ +#endif +struct shminfo64 { + unsigned int shmmax; + unsigned int shmmin; + unsigned int shmmni; + unsigned int shmseg; + unsigned int shmall; + unsigned int __unused1; + unsigned int __unused2; + unsigned int __unused3; + unsigned int __unused4; +}; + +#endif /* _PARISC_SHMBUF_H */ diff --git a/arch/parisc/include/asm/shmparam.h b/arch/parisc/include/asm/shmparam.h new file mode 100644 index 00000000000..628ddc22faa --- /dev/null +++ b/arch/parisc/include/asm/shmparam.h @@ -0,0 +1,8 @@ +#ifndef _ASMPARISC_SHMPARAM_H +#define _ASMPARISC_SHMPARAM_H + +#define __ARCH_FORCE_SHMLBA 1 + +#define SHMLBA 0x00400000 /* attach addr needs to be 4 Mb aligned */ + +#endif /* _ASMPARISC_SHMPARAM_H */ diff --git a/arch/parisc/include/asm/sigcontext.h b/arch/parisc/include/asm/sigcontext.h new file mode 100644 index 00000000000..27ef31bb3b6 --- /dev/null +++ b/arch/parisc/include/asm/sigcontext.h @@ -0,0 +1,20 @@ +#ifndef _ASMPARISC_SIGCONTEXT_H +#define _ASMPARISC_SIGCONTEXT_H + +#define PARISC_SC_FLAG_ONSTACK 1<<0 +#define PARISC_SC_FLAG_IN_SYSCALL 1<<1 + +/* We will add more stuff here as it becomes necessary, until we know + it works. */ +struct sigcontext { + unsigned long sc_flags; + + unsigned long sc_gr[32]; /* PSW in sc_gr[0] */ + unsigned long long sc_fr[32]; /* FIXME, do we need other state info? */ + unsigned long sc_iasq[2]; + unsigned long sc_iaoq[2]; + unsigned long sc_sar; /* cr11 */ +}; + + +#endif diff --git a/arch/parisc/include/asm/siginfo.h b/arch/parisc/include/asm/siginfo.h new file mode 100644 index 00000000000..d4909f55fe3 --- /dev/null +++ b/arch/parisc/include/asm/siginfo.h @@ -0,0 +1,14 @@ +#ifndef _PARISC_SIGINFO_H +#define _PARISC_SIGINFO_H + +#include + +/* + * SIGTRAP si_codes + */ +#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */ +#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */ +#undef NSIGTRAP +#define NSIGTRAP 4 + +#endif diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h new file mode 100644 index 00000000000..c20356375d1 --- /dev/null +++ b/arch/parisc/include/asm/signal.h @@ -0,0 +1,153 @@ +#ifndef _ASM_PARISC_SIGNAL_H +#define _ASM_PARISC_SIGNAL_H + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGEMT 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGBUS 10 +#define SIGSEGV 11 +#define SIGSYS 12 /* Linux doesn't use this */ +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGUSR1 16 +#define SIGUSR2 17 +#define SIGCHLD 18 +#define SIGPWR 19 +#define SIGVTALRM 20 +#define SIGPROF 21 +#define SIGIO 22 +#define SIGPOLL SIGIO +#define SIGWINCH 23 +#define SIGSTOP 24 +#define SIGTSTP 25 +#define SIGCONT 26 +#define SIGTTIN 27 +#define SIGTTOU 28 +#define SIGURG 29 +#define SIGLOST 30 /* Linux doesn't use this either */ +#define SIGUNUSED 31 +#define SIGRESERVE SIGUNUSED + +#define SIGXCPU 33 +#define SIGXFSZ 34 +#define SIGSTKFLT 36 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 37 +#define SIGRTMAX _NSIG /* it's 44 under HP/UX */ + +/* + * SA_FLAGS values: + * + * SA_ONSTACK indicates that a registered stack_t will be used. + * SA_RESTART flag to get restarting signals (which were the default long ago) + * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop. + * SA_RESETHAND clears the handler when the signal is delivered. + * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies. + * SA_NODEFER prevents the current signal from being masked in the handler. + * + * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single + * Unix names RESETHAND and NODEFER respectively. + */ +#define SA_ONSTACK 0x00000001 +#define SA_RESETHAND 0x00000004 +#define SA_NOCLDSTOP 0x00000008 +#define SA_SIGINFO 0x00000010 +#define SA_NODEFER 0x00000020 +#define SA_RESTART 0x00000040 +#define SA_NOCLDWAIT 0x00000080 +#define _SA_SIGGFAULT 0x00000100 /* HPUX */ + +#define SA_NOMASK SA_NODEFER +#define SA_ONESHOT SA_RESETHAND + +#define SA_RESTORER 0x04000000 /* obsolete -- ignored */ + +/* + * sigaltstack controls + */ +#define SS_ONSTACK 1 +#define SS_DISABLE 2 + +#define MINSIGSTKSZ 2048 +#define SIGSTKSZ 8192 + +#ifdef __KERNEL__ + +#define _NSIG 64 +/* bits-per-word, where word apparently means 'long' not 'int' */ +#define _NSIG_BPW BITS_PER_LONG +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +#endif /* __KERNEL__ */ + +#define SIG_BLOCK 0 /* for blocking signals */ +#define SIG_UNBLOCK 1 /* for unblocking signals */ +#define SIG_SETMASK 2 /* for setting the signal mask */ + +#define SIG_DFL ((__sighandler_t)0) /* default signal handling */ +#define SIG_IGN ((__sighandler_t)1) /* ignore signal */ +#define SIG_ERR ((__sighandler_t)-1) /* error return from signal */ + +# ifndef __ASSEMBLY__ + +# include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +/* Type of a signal handler. */ +#ifdef CONFIG_64BIT +/* function pointers on 64-bit parisc are pointers to little structs and the + * compiler doesn't support code which changes or tests the address of + * the function in the little struct. This is really ugly -PB + */ +typedef char __user *__sighandler_t; +#else +typedef void __signalfn_t(int); +typedef __signalfn_t __user *__sighandler_t; +#endif + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + size_t ss_size; +} stack_t; + +#ifdef __KERNEL__ + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +typedef unsigned long old_sigset_t; /* at least 32 bits */ + +typedef struct { + /* next_signal() assumes this is a long - no choice */ + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +struct sigaction { + __sighandler_t sa_handler; + unsigned long sa_flags; + sigset_t sa_mask; /* mask last for extensibility */ +}; + +struct k_sigaction { + struct sigaction sa; +}; + +#define ptrace_signal_deliver(regs, cookie) do { } while (0) + +#include + +#endif /* __KERNEL__ */ +#endif /* !__ASSEMBLY */ +#endif /* _ASM_PARISC_SIGNAL_H */ diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h new file mode 100644 index 00000000000..398cdbaf4e5 --- /dev/null +++ b/arch/parisc/include/asm/smp.h @@ -0,0 +1,68 @@ +#ifndef __ASM_SMP_H +#define __ASM_SMP_H + + +#if defined(CONFIG_SMP) + +/* Page Zero Location PDC will look for the address to branch to when we poke +** slave CPUs still in "Icache loop". +*/ +#define PDC_OS_BOOT_RENDEZVOUS 0x10 +#define PDC_OS_BOOT_RENDEZVOUS_HI 0x28 + +#ifndef ASSEMBLY +#include +#include /* for NR_CPUS */ +#include +typedef unsigned long address_t; + +extern cpumask_t cpu_online_map; + + +/* + * Private routines/data + * + * physical and logical are equivalent until we support CPU hotplug. + */ +#define cpu_number_map(cpu) (cpu) +#define cpu_logical_map(cpu) (cpu) + +extern void smp_send_reschedule(int cpu); +extern void smp_send_all_nop(void); + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi(cpumask_t mask); + +#endif /* !ASSEMBLY */ + +/* + * This magic constant controls our willingness to transfer + * a process across CPUs. Such a transfer incurs cache and tlb + * misses. The current value is inherited from i386. Still needs + * to be tuned for parisc. + */ + +#define PROC_CHANGE_PENALTY 15 /* Schedule penalty */ + +extern unsigned long cpu_present_mask; + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +#else /* CONFIG_SMP */ + +static inline void smp_send_all_nop(void) { return; } + +#endif + +#define NO_PROC_ID 0xFF /* No processor magic marker */ +#define ANY_PROC_ID 0xFF /* Any processor magic marker */ +static inline int __cpu_disable (void) { + return 0; +} +static inline void __cpu_die (unsigned int cpu) { + while(1) + ; +} +extern int __cpu_up (unsigned int cpu); + +#endif /* __ASM_SMP_H */ diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h new file mode 100644 index 00000000000..fba402c95ac --- /dev/null +++ b/arch/parisc/include/asm/socket.h @@ -0,0 +1,62 @@ +#ifndef _ASM_SOCKET_H +#define _ASM_SOCKET_H + +#include + +/* For setsockopt(2) */ +#define SOL_SOCKET 0xffff + +#define SO_DEBUG 0x0001 +#define SO_REUSEADDR 0x0004 +#define SO_KEEPALIVE 0x0008 +#define SO_DONTROUTE 0x0010 +#define SO_BROADCAST 0x0020 +#define SO_LINGER 0x0080 +#define SO_OOBINLINE 0x0100 +/* To add :#define SO_REUSEPORT 0x0200 */ +#define SO_SNDBUF 0x1001 +#define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b +#define SO_SNDLOWAT 0x1003 +#define SO_RCVLOWAT 0x1004 +#define SO_SNDTIMEO 0x1005 +#define SO_RCVTIMEO 0x1006 +#define SO_ERROR 0x1007 +#define SO_TYPE 0x1008 +#define SO_PEERNAME 0x2000 + +#define SO_NO_CHECK 0x400b +#define SO_PRIORITY 0x400c +#define SO_BSDCOMPAT 0x400e +#define SO_PASSCRED 0x4010 +#define SO_PEERCRED 0x4011 +#define SO_TIMESTAMP 0x4012 +#define SCM_TIMESTAMP SO_TIMESTAMP +#define SO_TIMESTAMPNS 0x4013 +#define SCM_TIMESTAMPNS SO_TIMESTAMPNS + +/* Security levels - as per NRL IPv6 - don't actually do anything */ +#define SO_SECURITY_AUTHENTICATION 0x4016 +#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x4017 +#define SO_SECURITY_ENCRYPTION_NETWORK 0x4018 + +#define SO_BINDTODEVICE 0x4019 + +/* Socket filtering */ +#define SO_ATTACH_FILTER 0x401a +#define SO_DETACH_FILTER 0x401b + +#define SO_ACCEPTCONN 0x401c + +#define SO_PEERSEC 0x401d +#define SO_PASSSEC 0x401e + +#define SO_MARK 0x401f + +/* O_NONBLOCK clashes with the bits used for socket types. Therefore we + * have to define SOCK_NONBLOCK to a different value here. + */ +#define SOCK_NONBLOCK 0x40000000 + +#endif /* _ASM_SOCKET_H */ diff --git a/arch/parisc/include/asm/sockios.h b/arch/parisc/include/asm/sockios.h new file mode 100644 index 00000000000..dabfbc7483f --- /dev/null +++ b/arch/parisc/include/asm/sockios.h @@ -0,0 +1,13 @@ +#ifndef __ARCH_PARISC_SOCKIOS__ +#define __ARCH_PARISC_SOCKIOS__ + +/* Socket-level I/O control calls. */ +#define FIOSETOWN 0x8901 +#define SIOCSPGRP 0x8902 +#define FIOGETOWN 0x8903 +#define SIOCGPGRP 0x8904 +#define SIOCATMARK 0x8905 +#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ +#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ + +#endif diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h new file mode 100644 index 00000000000..f3d2090a18d --- /dev/null +++ b/arch/parisc/include/asm/spinlock.h @@ -0,0 +1,194 @@ +#ifndef __ASM_SPINLOCK_H +#define __ASM_SPINLOCK_H + +#include +#include +#include + +static inline int __raw_spin_is_locked(raw_spinlock_t *x) +{ + volatile unsigned int *a = __ldcw_align(x); + return *a == 0; +} + +#define __raw_spin_lock(lock) __raw_spin_lock_flags(lock, 0) +#define __raw_spin_unlock_wait(x) \ + do { cpu_relax(); } while (__raw_spin_is_locked(x)) + +static inline void __raw_spin_lock_flags(raw_spinlock_t *x, + unsigned long flags) +{ + volatile unsigned int *a; + + mb(); + a = __ldcw_align(x); + while (__ldcw(a) == 0) + while (*a == 0) + if (flags & PSW_SM_I) { + local_irq_enable(); + cpu_relax(); + local_irq_disable(); + } else + cpu_relax(); + mb(); +} + +static inline void __raw_spin_unlock(raw_spinlock_t *x) +{ + volatile unsigned int *a; + mb(); + a = __ldcw_align(x); + *a = 1; + mb(); +} + +static inline int __raw_spin_trylock(raw_spinlock_t *x) +{ + volatile unsigned int *a; + int ret; + + mb(); + a = __ldcw_align(x); + ret = __ldcw(a) != 0; + mb(); + + return ret; +} + +/* + * Read-write spinlocks, allowing multiple readers but only one writer. + * Linux rwlocks are unfair to writers; they can be starved for an indefinite + * time by readers. With care, they can also be taken in interrupt context. + * + * In the PA-RISC implementation, we have a spinlock and a counter. + * Readers use the lock to serialise their access to the counter (which + * records how many readers currently hold the lock). + * Writers hold the spinlock, preventing any readers or other writers from + * grabbing the rwlock. + */ + +/* Note that we have to ensure interrupts are disabled in case we're + * interrupted by some other code that wants to grab the same read lock */ +static __inline__ void __raw_read_lock(raw_rwlock_t *rw) +{ + unsigned long flags; + local_irq_save(flags); + __raw_spin_lock_flags(&rw->lock, flags); + rw->counter++; + __raw_spin_unlock(&rw->lock); + local_irq_restore(flags); +} + +/* Note that we have to ensure interrupts are disabled in case we're + * interrupted by some other code that wants to grab the same read lock */ +static __inline__ void __raw_read_unlock(raw_rwlock_t *rw) +{ + unsigned long flags; + local_irq_save(flags); + __raw_spin_lock_flags(&rw->lock, flags); + rw->counter--; + __raw_spin_unlock(&rw->lock); + local_irq_restore(flags); +} + +/* Note that we have to ensure interrupts are disabled in case we're + * interrupted by some other code that wants to grab the same read lock */ +static __inline__ int __raw_read_trylock(raw_rwlock_t *rw) +{ + unsigned long flags; + retry: + local_irq_save(flags); + if (__raw_spin_trylock(&rw->lock)) { + rw->counter++; + __raw_spin_unlock(&rw->lock); + local_irq_restore(flags); + return 1; + } + + local_irq_restore(flags); + /* If write-locked, we fail to acquire the lock */ + if (rw->counter < 0) + return 0; + + /* Wait until we have a realistic chance at the lock */ + while (__raw_spin_is_locked(&rw->lock) && rw->counter >= 0) + cpu_relax(); + + goto retry; +} + +/* Note that we have to ensure interrupts are disabled in case we're + * interrupted by some other code that wants to read_trylock() this lock */ +static __inline__ void __raw_write_lock(raw_rwlock_t *rw) +{ + unsigned long flags; +retry: + local_irq_save(flags); + __raw_spin_lock_flags(&rw->lock, flags); + + if (rw->counter != 0) { + __raw_spin_unlock(&rw->lock); + local_irq_restore(flags); + + while (rw->counter != 0) + cpu_relax(); + + goto retry; + } + + rw->counter = -1; /* mark as write-locked */ + mb(); + local_irq_restore(flags); +} + +static __inline__ void __raw_write_unlock(raw_rwlock_t *rw) +{ + rw->counter = 0; + __raw_spin_unlock(&rw->lock); +} + +/* Note that we have to ensure interrupts are disabled in case we're + * interrupted by some other code that wants to read_trylock() this lock */ +static __inline__ int __raw_write_trylock(raw_rwlock_t *rw) +{ + unsigned long flags; + int result = 0; + + local_irq_save(flags); + if (__raw_spin_trylock(&rw->lock)) { + if (rw->counter == 0) { + rw->counter = -1; + result = 1; + } else { + /* Read-locked. Oh well. */ + __raw_spin_unlock(&rw->lock); + } + } + local_irq_restore(flags); + + return result; +} + +/* + * read_can_lock - would read_trylock() succeed? + * @lock: the rwlock in question. + */ +static __inline__ int __raw_read_can_lock(raw_rwlock_t *rw) +{ + return rw->counter >= 0; +} + +/* + * write_can_lock - would write_trylock() succeed? + * @lock: the rwlock in question. + */ +static __inline__ int __raw_write_can_lock(raw_rwlock_t *rw) +{ + return !rw->counter; +} + +#define _raw_spin_relax(lock) cpu_relax() +#define _raw_read_relax(lock) cpu_relax() +#define _raw_write_relax(lock) cpu_relax() + +#endif /* __ASM_SPINLOCK_H */ diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h new file mode 100644 index 00000000000..3f72f47cf4b --- /dev/null +++ b/arch/parisc/include/asm/spinlock_types.h @@ -0,0 +1,21 @@ +#ifndef __ASM_SPINLOCK_TYPES_H +#define __ASM_SPINLOCK_TYPES_H + +typedef struct { +#ifdef CONFIG_PA20 + volatile unsigned int slock; +# define __RAW_SPIN_LOCK_UNLOCKED { 1 } +#else + volatile unsigned int lock[4]; +# define __RAW_SPIN_LOCK_UNLOCKED { { 1, 1, 1, 1 } } +#endif +} raw_spinlock_t; + +typedef struct { + raw_spinlock_t lock; + volatile int counter; +} raw_rwlock_t; + +#define __RAW_RW_LOCK_UNLOCKED { __RAW_SPIN_LOCK_UNLOCKED, 0 } + +#endif diff --git a/arch/parisc/include/asm/stat.h b/arch/parisc/include/asm/stat.h new file mode 100644 index 00000000000..9d5fbbc5c31 --- /dev/null +++ b/arch/parisc/include/asm/stat.h @@ -0,0 +1,100 @@ +#ifndef _PARISC_STAT_H +#define _PARISC_STAT_H + +#include + +struct stat { + unsigned int st_dev; /* dev_t is 32 bits on parisc */ + ino_t st_ino; /* 32 bits */ + mode_t st_mode; /* 16 bits */ + nlink_t st_nlink; /* 16 bits */ + unsigned short st_reserved1; /* old st_uid */ + unsigned short st_reserved2; /* old st_gid */ + unsigned int st_rdev; + off_t st_size; + time_t st_atime; + unsigned int st_atime_nsec; + time_t st_mtime; + unsigned int st_mtime_nsec; + time_t st_ctime; + unsigned int st_ctime_nsec; + int st_blksize; + int st_blocks; + unsigned int __unused1; /* ACL stuff */ + unsigned int __unused2; /* network */ + ino_t __unused3; /* network */ + unsigned int __unused4; /* cnodes */ + unsigned short __unused5; /* netsite */ + short st_fstype; + unsigned int st_realdev; + unsigned short st_basemode; + unsigned short st_spareshort; + uid_t st_uid; + gid_t st_gid; + unsigned int st_spare4[3]; +}; + +#define STAT_HAVE_NSEC + +typedef __kernel_off64_t off64_t; + +struct hpux_stat64 { + unsigned int st_dev; /* dev_t is 32 bits on parisc */ + ino_t st_ino; /* 32 bits */ + mode_t st_mode; /* 16 bits */ + nlink_t st_nlink; /* 16 bits */ + unsigned short st_reserved1; /* old st_uid */ + unsigned short st_reserved2; /* old st_gid */ + unsigned int st_rdev; + off64_t st_size; + time_t st_atime; + unsigned int st_spare1; + time_t st_mtime; + unsigned int st_spare2; + time_t st_ctime; + unsigned int st_spare3; + int st_blksize; + __u64 st_blocks; + unsigned int __unused1; /* ACL stuff */ + unsigned int __unused2; /* network */ + ino_t __unused3; /* network */ + unsigned int __unused4; /* cnodes */ + unsigned short __unused5; /* netsite */ + short st_fstype; + unsigned int st_realdev; + unsigned short st_basemode; + unsigned short st_spareshort; + uid_t st_uid; + gid_t st_gid; + unsigned int st_spare4[3]; +}; + +/* This is the struct that 32-bit userspace applications are expecting. + * How 64-bit apps are going to be compiled, I have no idea. But at least + * this way, we don't have a wrapper in the kernel. + */ +struct stat64 { + unsigned long long st_dev; + unsigned int __pad1; + + unsigned int __st_ino; /* Not actually filled in */ + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned long long st_rdev; + unsigned int __pad2; + signed long long st_size; + signed int st_blksize; + + signed long long st_blocks; + signed int st_atime; + unsigned int st_atime_nsec; + signed int st_mtime; + unsigned int st_mtime_nsec; + signed int st_ctime; + unsigned int st_ctime_nsec; + unsigned long long st_ino; +}; + +#endif diff --git a/arch/parisc/include/asm/statfs.h b/arch/parisc/include/asm/statfs.h new file mode 100644 index 00000000000..1d2b8130b23 --- /dev/null +++ b/arch/parisc/include/asm/statfs.h @@ -0,0 +1,58 @@ +#ifndef _PARISC_STATFS_H +#define _PARISC_STATFS_H + +#ifndef __KERNEL_STRICT_NAMES + +#include + +typedef __kernel_fsid_t fsid_t; + +#endif + +/* + * It appears that PARISC could be 64 _or_ 32 bit. + * 64-bit fields must be explicitly 64-bit in statfs64. + */ +struct statfs { + long f_type; + long f_bsize; + long f_blocks; + long f_bfree; + long f_bavail; + long f_files; + long f_ffree; + __kernel_fsid_t f_fsid; + long f_namelen; + long f_frsize; + long f_spare[5]; +}; + +struct statfs64 { + long f_type; + long f_bsize; + __u64 f_blocks; + __u64 f_bfree; + __u64 f_bavail; + __u64 f_files; + __u64 f_ffree; + __kernel_fsid_t f_fsid; + long f_namelen; + long f_frsize; + long f_spare[5]; +}; + +struct compat_statfs64 { + __u32 f_type; + __u32 f_bsize; + __u64 f_blocks; + __u64 f_bfree; + __u64 f_bavail; + __u64 f_files; + __u64 f_ffree; + __kernel_fsid_t f_fsid; + __u32 f_namelen; + __u32 f_frsize; + __u32 f_spare[5]; +}; + +#endif diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h new file mode 100644 index 00000000000..eda01be65e3 --- /dev/null +++ b/arch/parisc/include/asm/string.h @@ -0,0 +1,10 @@ +#ifndef _PA_STRING_H_ +#define _PA_STRING_H_ + +#define __HAVE_ARCH_MEMSET +extern void * memset(void *, int, size_t); + +#define __HAVE_ARCH_MEMCPY +void * memcpy(void * dest,const void *src,size_t count); + +#endif diff --git a/arch/parisc/include/asm/superio.h b/arch/parisc/include/asm/superio.h new file mode 100644 index 00000000000..6598acb4d46 --- /dev/null +++ b/arch/parisc/include/asm/superio.h @@ -0,0 +1,85 @@ +#ifndef _PARISC_SUPERIO_H +#define _PARISC_SUPERIO_H + +#define IC_PIC1 0x20 /* PCI I/O address of master 8259 */ +#define IC_PIC2 0xA0 /* PCI I/O address of slave */ + +/* Config Space Offsets to configuration and base address registers */ +#define SIO_CR 0x5A /* Configuration Register */ +#define SIO_ACPIBAR 0x88 /* ACPI BAR */ +#define SIO_FDCBAR 0x90 /* Floppy Disk Controller BAR */ +#define SIO_SP1BAR 0x94 /* Serial 1 BAR */ +#define SIO_SP2BAR 0x98 /* Serial 2 BAR */ +#define SIO_PPBAR 0x9C /* Parallel BAR */ + +#define TRIGGER_1 0x67 /* Edge/level trigger register 1 */ +#define TRIGGER_2 0x68 /* Edge/level trigger register 2 */ + +/* Interrupt Routing Control registers */ +#define CFG_IR_SER 0x69 /* Serial 1 [0:3] and Serial 2 [4:7] */ +#define CFG_IR_PFD 0x6a /* Parallel [0:3] and Floppy [4:7] */ +#define CFG_IR_IDE 0x6b /* IDE1 [0:3] and IDE2 [4:7] */ +#define CFG_IR_INTAB 0x6c /* PCI INTA [0:3] and INT B [4:7] */ +#define CFG_IR_INTCD 0x6d /* PCI INTC [0:3] and INT D [4:7] */ +#define CFG_IR_PS2 0x6e /* PS/2 KBINT [0:3] and Mouse [4:7] */ +#define CFG_IR_FXBUS 0x6f /* FXIRQ[0] [0:3] and FXIRQ[1] [4:7] */ +#define CFG_IR_USB 0x70 /* FXIRQ[2] [0:3] and USB [4:7] */ +#define CFG_IR_ACPI 0x71 /* ACPI SCI [0:3] and reserved [4:7] */ + +#define CFG_IR_LOW CFG_IR_SER /* Lowest interrupt routing reg */ +#define CFG_IR_HIGH CFG_IR_ACPI /* Highest interrupt routing reg */ + +/* 8259 operational control words */ +#define OCW2_EOI 0x20 /* Non-specific EOI */ +#define OCW2_SEOI 0x60 /* Specific EOI */ +#define OCW3_IIR 0x0A /* Read request register */ +#define OCW3_ISR 0x0B /* Read service register */ +#define OCW3_POLL 0x0C /* Poll the PIC for an interrupt vector */ + +/* Interrupt lines. Only PIC1 is used */ +#define USB_IRQ 1 /* USB */ +#define SP1_IRQ 3 /* Serial port 1 */ +#define SP2_IRQ 4 /* Serial port 2 */ +#define PAR_IRQ 5 /* Parallel port */ +#define FDC_IRQ 6 /* Floppy controller */ +#define IDE_IRQ 7 /* IDE (pri+sec) */ + +/* ACPI registers */ +#define USB_REG_CR 0x1f /* USB Regulator Control Register */ + +#define SUPERIO_NIRQS 8 + +struct superio_device { + u32 fdc_base; + u32 sp1_base; + u32 sp2_base; + u32 pp_base; + u32 acpi_base; + int suckyio_irq_enabled; + struct pci_dev *lio_pdev; /* pci device for legacy IO (fn 1) */ + struct pci_dev *usb_pdev; /* pci device for USB (fn 2) */ +}; + +/* + * Does NS make a 87415 based plug in PCI card? If so, because of this + * macro we currently don't support it being plugged into a machine + * that contains a SuperIO chip AND has CONFIG_SUPERIO enabled. + * + * This could be fixed by checking to see if function 1 exists, and + * if it is SuperIO Legacy IO; but really now, is this combination + * going to EVER happen? + */ + +#define SUPERIO_IDE_FN 0 /* Function number of IDE controller */ +#define SUPERIO_LIO_FN 1 /* Function number of Legacy IO controller */ +#define SUPERIO_USB_FN 2 /* Function number of USB controller */ + +#define is_superio_device(x) \ + (((x)->vendor == PCI_VENDOR_ID_NS) && \ + ( ((x)->device == PCI_DEVICE_ID_NS_87415) \ + || ((x)->device == PCI_DEVICE_ID_NS_87560_LIO) \ + || ((x)->device == PCI_DEVICE_ID_NS_87560_USB) ) ) + +extern int superio_fixup_irq(struct pci_dev *pcidev); /* called by iosapic */ + +#endif /* _PARISC_SUPERIO_H */ diff --git a/arch/parisc/include/asm/system.h b/arch/parisc/include/asm/system.h new file mode 100644 index 00000000000..ee80c920b46 --- /dev/null +++ b/arch/parisc/include/asm/system.h @@ -0,0 +1,182 @@ +#ifndef __PARISC_SYSTEM_H +#define __PARISC_SYSTEM_H + +#include + +/* The program status word as bitfields. */ +struct pa_psw { + unsigned int y:1; + unsigned int z:1; + unsigned int rv:2; + unsigned int w:1; + unsigned int e:1; + unsigned int s:1; + unsigned int t:1; + + unsigned int h:1; + unsigned int l:1; + unsigned int n:1; + unsigned int x:1; + unsigned int b:1; + unsigned int c:1; + unsigned int v:1; + unsigned int m:1; + + unsigned int cb:8; + + unsigned int o:1; + unsigned int g:1; + unsigned int f:1; + unsigned int r:1; + unsigned int q:1; + unsigned int p:1; + unsigned int d:1; + unsigned int i:1; +}; + +#ifdef CONFIG_64BIT +#define pa_psw(task) ((struct pa_psw *) ((char *) (task) + TASK_PT_PSW + 4)) +#else +#define pa_psw(task) ((struct pa_psw *) ((char *) (task) + TASK_PT_PSW)) +#endif + +struct task_struct; + +extern struct task_struct *_switch_to(struct task_struct *, struct task_struct *); + +#define switch_to(prev, next, last) do { \ + (last) = _switch_to(prev, next); \ +} while(0) + +/* interrupt control */ +#define local_save_flags(x) __asm__ __volatile__("ssm 0, %0" : "=r" (x) : : "memory") +#define local_irq_disable() __asm__ __volatile__("rsm %0,%%r0\n" : : "i" (PSW_I) : "memory" ) +#define local_irq_enable() __asm__ __volatile__("ssm %0,%%r0\n" : : "i" (PSW_I) : "memory" ) + +#define local_irq_save(x) \ + __asm__ __volatile__("rsm %1,%0" : "=r" (x) :"i" (PSW_I) : "memory" ) +#define local_irq_restore(x) \ + __asm__ __volatile__("mtsm %0" : : "r" (x) : "memory" ) + +#define irqs_disabled() \ +({ \ + unsigned long flags; \ + local_save_flags(flags); \ + (flags & PSW_I) == 0; \ +}) + +#define mfctl(reg) ({ \ + unsigned long cr; \ + __asm__ __volatile__( \ + "mfctl " #reg ",%0" : \ + "=r" (cr) \ + ); \ + cr; \ +}) + +#define mtctl(gr, cr) \ + __asm__ __volatile__("mtctl %0,%1" \ + : /* no outputs */ \ + : "r" (gr), "i" (cr) : "memory") + +/* these are here to de-mystefy the calling code, and to provide hooks */ +/* which I needed for debugging EIEM problems -PB */ +#define get_eiem() mfctl(15) +static inline void set_eiem(unsigned long val) +{ + mtctl(val, 15); +} + +#define mfsp(reg) ({ \ + unsigned long cr; \ + __asm__ __volatile__( \ + "mfsp " #reg ",%0" : \ + "=r" (cr) \ + ); \ + cr; \ +}) + +#define mtsp(gr, cr) \ + __asm__ __volatile__("mtsp %0,%1" \ + : /* no outputs */ \ + : "r" (gr), "i" (cr) : "memory") + + +/* +** This is simply the barrier() macro from linux/kernel.h but when serial.c +** uses tqueue.h uses smp_mb() defined using barrier(), linux/kernel.h +** hasn't yet been included yet so it fails, thus repeating the macro here. +** +** PA-RISC architecture allows for weakly ordered memory accesses although +** none of the processors use it. There is a strong ordered bit that is +** set in the O-bit of the page directory entry. Operating systems that +** can not tolerate out of order accesses should set this bit when mapping +** pages. The O-bit of the PSW should also be set to 1 (I don't believe any +** of the processor implemented the PSW O-bit). The PCX-W ERS states that +** the TLB O-bit is not implemented so the page directory does not need to +** have the O-bit set when mapping pages (section 3.1). This section also +** states that the PSW Y, Z, G, and O bits are not implemented. +** So it looks like nothing needs to be done for parisc-linux (yet). +** (thanks to chada for the above comment -ggg) +** +** The __asm__ op below simple prevents gcc/ld from reordering +** instructions across the mb() "call". +*/ +#define mb() __asm__ __volatile__("":::"memory") /* barrier() */ +#define rmb() mb() +#define wmb() mb() +#define smp_mb() mb() +#define smp_rmb() mb() +#define smp_wmb() mb() +#define smp_read_barrier_depends() do { } while(0) +#define read_barrier_depends() do { } while(0) + +#define set_mb(var, value) do { var = value; mb(); } while (0) + +#ifndef CONFIG_PA20 +/* Because kmalloc only guarantees 8-byte alignment for kmalloc'd data, + and GCC only guarantees 8-byte alignment for stack locals, we can't + be assured of 16-byte alignment for atomic lock data even if we + specify "__attribute ((aligned(16)))" in the type declaration. So, + we use a struct containing an array of four ints for the atomic lock + type and dynamically select the 16-byte aligned int from the array + for the semaphore. */ + +#define __PA_LDCW_ALIGNMENT 16 +#define __ldcw_align(a) ({ \ + unsigned long __ret = (unsigned long) &(a)->lock[0]; \ + __ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \ + & ~(__PA_LDCW_ALIGNMENT - 1); \ + (volatile unsigned int *) __ret; \ +}) +#define __LDCW "ldcw" + +#else /*CONFIG_PA20*/ +/* From: "Jim Hull" + I've attached a summary of the change, but basically, for PA 2.0, as + long as the ",CO" (coherent operation) completer is specified, then the + 16-byte alignment requirement for ldcw and ldcd is relaxed, and instead + they only require "natural" alignment (4-byte for ldcw, 8-byte for + ldcd). */ + +#define __PA_LDCW_ALIGNMENT 4 +#define __ldcw_align(a) ((volatile unsigned int *)a) +#define __LDCW "ldcw,co" + +#endif /*!CONFIG_PA20*/ + +/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*. */ +#define __ldcw(a) ({ \ + unsigned __ret; \ + __asm__ __volatile__(__LDCW " 0(%1),%0" \ + : "=r" (__ret) : "r" (a)); \ + __ret; \ +}) + +#ifdef CONFIG_SMP +# define __lock_aligned __attribute__((__section__(".data.lock_aligned"))) +#endif + +#define arch_align_stack(x) (x) + +#endif diff --git a/arch/parisc/include/asm/termbits.h b/arch/parisc/include/asm/termbits.h new file mode 100644 index 00000000000..d8bbc73b16b --- /dev/null +++ b/arch/parisc/include/asm/termbits.h @@ -0,0 +1,200 @@ +#ifndef __ARCH_PARISC_TERMBITS_H__ +#define __ARCH_PARISC_TERMBITS_H__ + +#include + +typedef unsigned char cc_t; +typedef unsigned int speed_t; +typedef unsigned int tcflag_t; + +#define NCCS 19 +struct termios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ +}; + +struct termios2 { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +struct ktermios { + tcflag_t c_iflag; /* input mode flags */ + tcflag_t c_oflag; /* output mode flags */ + tcflag_t c_cflag; /* control mode flags */ + tcflag_t c_lflag; /* local mode flags */ + cc_t c_line; /* line discipline */ + cc_t c_cc[NCCS]; /* control characters */ + speed_t c_ispeed; /* input speed */ + speed_t c_ospeed; /* output speed */ +}; + +/* c_cc characters */ +#define VINTR 0 +#define VQUIT 1 +#define VERASE 2 +#define VKILL 3 +#define VEOF 4 +#define VTIME 5 +#define VMIN 6 +#define VSWTC 7 +#define VSTART 8 +#define VSTOP 9 +#define VSUSP 10 +#define VEOL 11 +#define VREPRINT 12 +#define VDISCARD 13 +#define VWERASE 14 +#define VLNEXT 15 +#define VEOL2 16 + + +/* c_iflag bits */ +#define IGNBRK 0000001 +#define BRKINT 0000002 +#define IGNPAR 0000004 +#define PARMRK 0000010 +#define INPCK 0000020 +#define ISTRIP 0000040 +#define INLCR 0000100 +#define IGNCR 0000200 +#define ICRNL 0000400 +#define IUCLC 0001000 +#define IXON 0002000 +#define IXANY 0004000 +#define IXOFF 0010000 +#define IMAXBEL 0040000 +#define IUTF8 0100000 + +/* c_oflag bits */ +#define OPOST 0000001 +#define OLCUC 0000002 +#define ONLCR 0000004 +#define OCRNL 0000010 +#define ONOCR 0000020 +#define ONLRET 0000040 +#define OFILL 0000100 +#define OFDEL 0000200 +#define NLDLY 0000400 +#define NL0 0000000 +#define NL1 0000400 +#define CRDLY 0003000 +#define CR0 0000000 +#define CR1 0001000 +#define CR2 0002000 +#define CR3 0003000 +#define TABDLY 0014000 +#define TAB0 0000000 +#define TAB1 0004000 +#define TAB2 0010000 +#define TAB3 0014000 +#define XTABS 0014000 +#define BSDLY 0020000 +#define BS0 0000000 +#define BS1 0020000 +#define VTDLY 0040000 +#define VT0 0000000 +#define VT1 0040000 +#define FFDLY 0100000 +#define FF0 0000000 +#define FF1 0100000 + +/* c_cflag bit meaning */ +#define CBAUD 0010017 +#define B0 0000000 /* hang up */ +#define B50 0000001 +#define B75 0000002 +#define B110 0000003 +#define B134 0000004 +#define B150 0000005 +#define B200 0000006 +#define B300 0000007 +#define B600 0000010 +#define B1200 0000011 +#define B1800 0000012 +#define B2400 0000013 +#define B4800 0000014 +#define B9600 0000015 +#define B19200 0000016 +#define B38400 0000017 +#define EXTA B19200 +#define EXTB B38400 +#define CSIZE 0000060 +#define CS5 0000000 +#define CS6 0000020 +#define CS7 0000040 +#define CS8 0000060 +#define CSTOPB 0000100 +#define CREAD 0000200 +#define PARENB 0000400 +#define PARODD 0001000 +#define HUPCL 0002000 +#define CLOCAL 0004000 +#define CBAUDEX 0010000 +#define BOTHER 0010000 +#define B57600 0010001 +#define B115200 0010002 +#define B230400 0010003 +#define B460800 0010004 +#define B500000 0010005 +#define B576000 0010006 +#define B921600 0010007 +#define B1000000 0010010 +#define B1152000 0010011 +#define B1500000 0010012 +#define B2000000 0010013 +#define B2500000 0010014 +#define B3000000 0010015 +#define B3500000 0010016 +#define B4000000 0010017 +#define CIBAUD 002003600000 /* input baud rate */ +#define CMSPAR 010000000000 /* mark or space (stick) parity */ +#define CRTSCTS 020000000000 /* flow control */ + +#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */ + + +/* c_lflag bits */ +#define ISIG 0000001 +#define ICANON 0000002 +#define XCASE 0000004 +#define ECHO 0000010 +#define ECHOE 0000020 +#define ECHOK 0000040 +#define ECHONL 0000100 +#define NOFLSH 0000200 +#define TOSTOP 0000400 +#define ECHOCTL 0001000 +#define ECHOPRT 0002000 +#define ECHOKE 0004000 +#define FLUSHO 0010000 +#define PENDIN 0040000 +#define IEXTEN 0100000 + +/* tcflow() and TCXONC use these */ +#define TCOOFF 0 +#define TCOON 1 +#define TCIOFF 2 +#define TCION 3 + +/* tcflush() and TCFLSH use these */ +#define TCIFLUSH 0 +#define TCOFLUSH 1 +#define TCIOFLUSH 2 + +/* tcsetattr uses these */ +#define TCSANOW 0 +#define TCSADRAIN 1 +#define TCSAFLUSH 2 + +#endif diff --git a/arch/parisc/include/asm/termios.h b/arch/parisc/include/asm/termios.h new file mode 100644 index 00000000000..a2a57a4548a --- /dev/null +++ b/arch/parisc/include/asm/termios.h @@ -0,0 +1,90 @@ +#ifndef _PARISC_TERMIOS_H +#define _PARISC_TERMIOS_H + +#include +#include + +struct winsize { + unsigned short ws_row; + unsigned short ws_col; + unsigned short ws_xpixel; + unsigned short ws_ypixel; +}; + +#define NCC 8 +struct termio { + unsigned short c_iflag; /* input mode flags */ + unsigned short c_oflag; /* output mode flags */ + unsigned short c_cflag; /* control mode flags */ + unsigned short c_lflag; /* local mode flags */ + unsigned char c_line; /* line discipline */ + unsigned char c_cc[NCC]; /* control characters */ +}; + +/* modem lines */ +#define TIOCM_LE 0x001 +#define TIOCM_DTR 0x002 +#define TIOCM_RTS 0x004 +#define TIOCM_ST 0x008 +#define TIOCM_SR 0x010 +#define TIOCM_CTS 0x020 +#define TIOCM_CAR 0x040 +#define TIOCM_RNG 0x080 +#define TIOCM_DSR 0x100 +#define TIOCM_CD TIOCM_CAR +#define TIOCM_RI TIOCM_RNG +#define TIOCM_OUT1 0x2000 +#define TIOCM_OUT2 0x4000 +#define TIOCM_LOOP 0x8000 + +/* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ + +#ifdef __KERNEL__ + +/* intr=^C quit=^\ erase=del kill=^U + eof=^D vtime=\0 vmin=\1 sxtc=\0 + start=^Q stop=^S susp=^Z eol=\0 + reprint=^R discard=^U werase=^W lnext=^V + eol2=\0 +*/ +#define INIT_C_CC "\003\034\177\025\004\0\1\0\021\023\032\0\022\017\027\026\0" + +/* + * Translate a "termio" structure into a "termios". Ugh. + */ +#define SET_LOW_TERMIOS_BITS(termios, termio, x) { \ + unsigned short __tmp; \ + get_user(__tmp,&(termio)->x); \ + *(unsigned short *) &(termios)->x = __tmp; \ +} + +#define user_termio_to_kernel_termios(termios, termio) \ +({ \ + SET_LOW_TERMIOS_BITS(termios, termio, c_iflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); \ + SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); \ + copy_from_user((termios)->c_cc, (termio)->c_cc, NCC); \ +}) + +/* + * Translate a "termios" structure into a "termio". Ugh. + */ +#define kernel_termios_to_user_termio(termio, termios) \ +({ \ + put_user((termios)->c_iflag, &(termio)->c_iflag); \ + put_user((termios)->c_oflag, &(termio)->c_oflag); \ + put_user((termios)->c_cflag, &(termio)->c_cflag); \ + put_user((termios)->c_lflag, &(termio)->c_lflag); \ + put_user((termios)->c_line, &(termio)->c_line); \ + copy_to_user((termio)->c_cc, (termios)->c_cc, NCC); \ +}) + +#define user_termios_to_kernel_termios(k, u) copy_from_user(k, u, sizeof(struct termios2)) +#define kernel_termios_to_user_termios(u, k) copy_to_user(u, k, sizeof(struct termios2)) +#define user_termios_to_kernel_termios_1(k, u) copy_from_user(k, u, sizeof(struct termios)) +#define kernel_termios_to_user_termios_1(u, k) copy_to_user(u, k, sizeof(struct termios)) + +#endif /* __KERNEL__ */ + +#endif /* _PARISC_TERMIOS_H */ diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h new file mode 100644 index 00000000000..9f812741c35 --- /dev/null +++ b/arch/parisc/include/asm/thread_info.h @@ -0,0 +1,74 @@ +#ifndef _ASM_PARISC_THREAD_INFO_H +#define _ASM_PARISC_THREAD_INFO_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ +#include + +struct thread_info { + struct task_struct *task; /* main task structure */ + struct exec_domain *exec_domain;/* execution domain */ + unsigned long flags; /* thread_info flags (see TIF_*) */ + mm_segment_t addr_limit; /* user-level address space limit */ + __u32 cpu; /* current CPU */ + int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ + struct restart_block restart_block; +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .exec_domain = &default_exec_domain, \ + .flags = 0, \ + .cpu = 0, \ + .addr_limit = KERNEL_DS, \ + .preempt_count = 1, \ + .restart_block = { \ + .fn = do_no_restart_syscall \ + } \ +} + +#define init_thread_info (init_thread_union.thread_info) +#define init_stack (init_thread_union.stack) + +/* thread information allocation */ + +#define THREAD_SIZE_ORDER 2 +/* Be sure to hunt all references to this down when you change the size of + * the kernel stack */ +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) +#define THREAD_SHIFT (PAGE_SHIFT + THREAD_SIZE_ORDER) + +/* how to get the thread information struct from C */ +#define current_thread_info() ((struct thread_info *)mfctl(30)) + +#endif /* !__ASSEMBLY */ + +#define PREEMPT_ACTIVE_BIT 28 +#define PREEMPT_ACTIVE (1 << PREEMPT_ACTIVE_BIT) + +/* + * thread information flags + */ +#define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_SIGPENDING 1 /* signal pending */ +#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ +#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling TIF_NEED_RESCHED */ +#define TIF_32BIT 4 /* 32 bit binary */ +#define TIF_MEMDIE 5 +#define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) +#define _TIF_32BIT (1 << TIF_32BIT) +#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) + +#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \ + _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) + +#endif /* __KERNEL__ */ + +#endif /* _ASM_PARISC_THREAD_INFO_H */ diff --git a/arch/parisc/include/asm/timex.h b/arch/parisc/include/asm/timex.h new file mode 100644 index 00000000000..3b68d77273d --- /dev/null +++ b/arch/parisc/include/asm/timex.h @@ -0,0 +1,20 @@ +/* + * linux/include/asm-parisc/timex.h + * + * PARISC architecture timex specifications + */ +#ifndef _ASMPARISC_TIMEX_H +#define _ASMPARISC_TIMEX_H + +#include + +#define CLOCK_TICK_RATE 1193180 /* Underlying HZ */ + +typedef unsigned long cycles_t; + +static inline cycles_t get_cycles (void) +{ + return mfctl(16); +} + +#endif diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h new file mode 100644 index 00000000000..383b1db310e --- /dev/null +++ b/arch/parisc/include/asm/tlb.h @@ -0,0 +1,27 @@ +#ifndef _PARISC_TLB_H +#define _PARISC_TLB_H + +#define tlb_flush(tlb) \ +do { if ((tlb)->fullmm) \ + flush_tlb_mm((tlb)->mm);\ +} while (0) + +#define tlb_start_vma(tlb, vma) \ +do { if (!(tlb)->fullmm) \ + flush_cache_range(vma, vma->vm_start, vma->vm_end); \ +} while (0) + +#define tlb_end_vma(tlb, vma) \ +do { if (!(tlb)->fullmm) \ + flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ +} while (0) + +#define __tlb_remove_tlb_entry(tlb, pte, address) \ + do { } while (0) + +#include + +#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd) +#define __pte_free_tlb(tlb, pte) pte_free((tlb)->mm, pte) + +#endif diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h new file mode 100644 index 00000000000..b72ec66db69 --- /dev/null +++ b/arch/parisc/include/asm/tlbflush.h @@ -0,0 +1,80 @@ +#ifndef _PARISC_TLBFLUSH_H +#define _PARISC_TLBFLUSH_H + +/* TLB flushing routines.... */ + +#include +#include +#include + + +/* This is for the serialisation of PxTLB broadcasts. At least on the + * N class systems, only one PxTLB inter processor broadcast can be + * active at any one time on the Merced bus. This tlb purge + * synchronisation is fairly lightweight and harmless so we activate + * it on all SMP systems not just the N class. We also need to have + * preemption disabled on uniprocessor machines, and spin_lock does that + * nicely. + */ +extern spinlock_t pa_tlb_lock; + +#define purge_tlb_start(x) spin_lock(&pa_tlb_lock) +#define purge_tlb_end(x) spin_unlock(&pa_tlb_lock) + +extern void flush_tlb_all(void); +extern void flush_tlb_all_local(void *); + +/* + * flush_tlb_mm() + * + * XXX This code is NOT valid for HP-UX compatibility processes, + * (although it will probably work 99% of the time). HP-UX + * processes are free to play with the space id's and save them + * over long periods of time, etc. so we have to preserve the + * space and just flush the entire tlb. We need to check the + * personality in order to do that, but the personality is not + * currently being set correctly. + * + * Of course, Linux processes could do the same thing, but + * we don't support that (and the compilers, dynamic linker, + * etc. do not do that). + */ + +static inline void flush_tlb_mm(struct mm_struct *mm) +{ + BUG_ON(mm == &init_mm); /* Should never happen */ + +#ifdef CONFIG_SMP + flush_tlb_all(); +#else + if (mm) { + if (mm->context != 0) + free_sid(mm->context); + mm->context = alloc_sid(); + if (mm == current->active_mm) + load_context(mm->context); + } +#endif +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long addr) +{ + /* For one page, it's not worth testing the split_tlb variable */ + + mb(); + mtsp(vma->vm_mm->context,1); + purge_tlb_start(); + pdtlb(addr); + pitlb(addr); + purge_tlb_end(); +} + +void __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) + +#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) + +#endif diff --git a/arch/parisc/include/asm/topology.h b/arch/parisc/include/asm/topology.h new file mode 100644 index 00000000000..d8133eb0b1e --- /dev/null +++ b/arch/parisc/include/asm/topology.h @@ -0,0 +1,6 @@ +#ifndef _ASM_PARISC_TOPOLOGY_H +#define _ASM_PARISC_TOPOLOGY_H + +#include + +#endif /* _ASM_PARISC_TOPOLOGY_H */ diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h new file mode 100644 index 00000000000..1945f995f2d --- /dev/null +++ b/arch/parisc/include/asm/traps.h @@ -0,0 +1,16 @@ +#ifndef __ASM_TRAPS_H +#define __ASM_TRAPS_H + +#ifdef __KERNEL__ +struct pt_regs; + +/* traps.c */ +void parisc_terminate(char *msg, struct pt_regs *regs, + int code, unsigned long offset); + +/* mm/fault.c */ +void do_page_fault(struct pt_regs *regs, unsigned long code, + unsigned long address); +#endif + +#endif diff --git a/arch/parisc/include/asm/types.h b/arch/parisc/include/asm/types.h new file mode 100644 index 00000000000..7f5a39bfb4c --- /dev/null +++ b/arch/parisc/include/asm/types.h @@ -0,0 +1,36 @@ +#ifndef _PARISC_TYPES_H +#define _PARISC_TYPES_H + +#include + +#ifndef __ASSEMBLY__ + +typedef unsigned short umode_t; + +#endif /* __ASSEMBLY__ */ + +/* + * These aren't exported outside the kernel to avoid name space clashes + */ +#ifdef __KERNEL__ + +#ifdef CONFIG_64BIT +#define BITS_PER_LONG 64 +#define SHIFT_PER_LONG 6 +#else +#define BITS_PER_LONG 32 +#define SHIFT_PER_LONG 5 +#endif + +#ifndef __ASSEMBLY__ + +/* Dma addresses are 32-bits wide. */ + +typedef u32 dma_addr_t; +typedef u64 dma64_addr_t; + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h new file mode 100644 index 00000000000..4878b9501f2 --- /dev/null +++ b/arch/parisc/include/asm/uaccess.h @@ -0,0 +1,244 @@ +#ifndef __PARISC_UACCESS_H +#define __PARISC_UACCESS_H + +/* + * User space memory access functions + */ +#include +#include +#include +#include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + +#define KERNEL_DS ((mm_segment_t){0}) +#define USER_DS ((mm_segment_t){1}) + +#define segment_eq(a,b) ((a).seg == (b).seg) + +#define get_ds() (KERNEL_DS) +#define get_fs() (current_thread_info()->addr_limit) +#define set_fs(x) (current_thread_info()->addr_limit = (x)) + +/* + * Note that since kernel addresses are in a separate address space on + * parisc, we don't need to do anything for access_ok(). + * We just let the page fault handler do the right thing. This also means + * that put_user is the same as __put_user, etc. + */ + +extern int __get_kernel_bad(void); +extern int __get_user_bad(void); +extern int __put_kernel_bad(void); +extern int __put_user_bad(void); + +static inline long access_ok(int type, const void __user * addr, + unsigned long size) +{ + return 1; +} + +#define put_user __put_user +#define get_user __get_user + +#if !defined(CONFIG_64BIT) +#define LDD_KERNEL(ptr) __get_kernel_bad(); +#define LDD_USER(ptr) __get_user_bad(); +#define STD_KERNEL(x, ptr) __put_kernel_asm64(x,ptr) +#define STD_USER(x, ptr) __put_user_asm64(x,ptr) +#define ASM_WORD_INSN ".word\t" +#else +#define LDD_KERNEL(ptr) __get_kernel_asm("ldd",ptr) +#define LDD_USER(ptr) __get_user_asm("ldd",ptr) +#define STD_KERNEL(x, ptr) __put_kernel_asm("std",x,ptr) +#define STD_USER(x, ptr) __put_user_asm("std",x,ptr) +#define ASM_WORD_INSN ".dword\t" +#endif + +/* + * The exception table contains two values: the first is an address + * for an instruction that is allowed to fault, and the second is + * the address to the fixup routine. + */ + +struct exception_table_entry { + unsigned long insn; /* address of insn that is allowed to fault. */ + long fixup; /* fixup routine */ +}; + +#define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ + ".section __ex_table,\"aw\"\n" \ + ASM_WORD_INSN #fault_addr ", " #except_addr "\n\t" \ + ".previous\n" + +/* + * The page fault handler stores, in a per-cpu area, the following information + * if a fixup routine is available. + */ +struct exception_data { + unsigned long fault_ip; + unsigned long fault_space; + unsigned long fault_addr; +}; + +#define __get_user(x,ptr) \ +({ \ + register long __gu_err __asm__ ("r8") = 0; \ + register long __gu_val __asm__ ("r9") = 0; \ + \ + if (segment_eq(get_fs(),KERNEL_DS)) { \ + switch (sizeof(*(ptr))) { \ + case 1: __get_kernel_asm("ldb",ptr); break; \ + case 2: __get_kernel_asm("ldh",ptr); break; \ + case 4: __get_kernel_asm("ldw",ptr); break; \ + case 8: LDD_KERNEL(ptr); break; \ + default: __get_kernel_bad(); break; \ + } \ + } \ + else { \ + switch (sizeof(*(ptr))) { \ + case 1: __get_user_asm("ldb",ptr); break; \ + case 2: __get_user_asm("ldh",ptr); break; \ + case 4: __get_user_asm("ldw",ptr); break; \ + case 8: LDD_USER(ptr); break; \ + default: __get_user_bad(); break; \ + } \ + } \ + \ + (x) = (__typeof__(*(ptr))) __gu_val; \ + __gu_err; \ +}) + +#define __get_kernel_asm(ldx,ptr) \ + __asm__("\n1:\t" ldx "\t0(%2),%0\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b, fixup_get_user_skip_1)\ + : "=r"(__gu_val), "=r"(__gu_err) \ + : "r"(ptr), "1"(__gu_err) \ + : "r1"); + +#define __get_user_asm(ldx,ptr) \ + __asm__("\n1:\t" ldx "\t0(%%sr3,%2),%0\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_get_user_skip_1)\ + : "=r"(__gu_val), "=r"(__gu_err) \ + : "r"(ptr), "1"(__gu_err) \ + : "r1"); + +#define __put_user(x,ptr) \ +({ \ + register long __pu_err __asm__ ("r8") = 0; \ + __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \ + \ + if (segment_eq(get_fs(),KERNEL_DS)) { \ + switch (sizeof(*(ptr))) { \ + case 1: __put_kernel_asm("stb",__x,ptr); break; \ + case 2: __put_kernel_asm("sth",__x,ptr); break; \ + case 4: __put_kernel_asm("stw",__x,ptr); break; \ + case 8: STD_KERNEL(__x,ptr); break; \ + default: __put_kernel_bad(); break; \ + } \ + } \ + else { \ + switch (sizeof(*(ptr))) { \ + case 1: __put_user_asm("stb",__x,ptr); break; \ + case 2: __put_user_asm("sth",__x,ptr); break; \ + case 4: __put_user_asm("stw",__x,ptr); break; \ + case 8: STD_USER(__x,ptr); break; \ + default: __put_user_bad(); break; \ + } \ + } \ + \ + __pu_err; \ +}) + +/* + * The "__put_user/kernel_asm()" macros tell gcc they read from memory + * instead of writing. This is because they do not write to any memory + * gcc knows about, so there are no aliasing issues. These macros must + * also be aware that "fixup_put_user_skip_[12]" are executed in the + * context of the fault, and any registers used there must be listed + * as clobbers. In this case only "r1" is used by the current routines. + * r8/r9 are already listed as err/val. + */ + +#define __put_kernel_asm(stx,x,ptr) \ + __asm__ __volatile__ ( \ + "\n1:\t" stx "\t%2,0(%1)\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_1)\ + : "=r"(__pu_err) \ + : "r"(ptr), "r"(x), "0"(__pu_err) \ + : "r1") + +#define __put_user_asm(stx,x,ptr) \ + __asm__ __volatile__ ( \ + "\n1:\t" stx "\t%2,0(%%sr3,%1)\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_1)\ + : "=r"(__pu_err) \ + : "r"(ptr), "r"(x), "0"(__pu_err) \ + : "r1") + + +#if !defined(CONFIG_64BIT) + +#define __put_kernel_asm64(__val,ptr) do { \ + u64 __val64 = (u64)(__val); \ + u32 hi = (__val64) >> 32; \ + u32 lo = (__val64) & 0xffffffff; \ + __asm__ __volatile__ ( \ + "\n1:\tstw %2,0(%1)" \ + "\n2:\tstw %3,4(%1)\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_2)\ + ASM_EXCEPTIONTABLE_ENTRY(2b,fixup_put_user_skip_1)\ + : "=r"(__pu_err) \ + : "r"(ptr), "r"(hi), "r"(lo), "0"(__pu_err) \ + : "r1"); \ +} while (0) + +#define __put_user_asm64(__val,ptr) do { \ + u64 __val64 = (u64)(__val); \ + u32 hi = (__val64) >> 32; \ + u32 lo = (__val64) & 0xffffffff; \ + __asm__ __volatile__ ( \ + "\n1:\tstw %2,0(%%sr3,%1)" \ + "\n2:\tstw %3,4(%%sr3,%1)\n\t" \ + ASM_EXCEPTIONTABLE_ENTRY(1b,fixup_put_user_skip_2)\ + ASM_EXCEPTIONTABLE_ENTRY(2b,fixup_put_user_skip_1)\ + : "=r"(__pu_err) \ + : "r"(ptr), "r"(hi), "r"(lo), "0"(__pu_err) \ + : "r1"); \ +} while (0) + +#endif /* !defined(CONFIG_64BIT) */ + + +/* + * Complex access routines -- external declarations + */ + +extern unsigned long lcopy_to_user(void __user *, const void *, unsigned long); +extern unsigned long lcopy_from_user(void *, const void __user *, unsigned long); +extern unsigned long lcopy_in_user(void __user *, const void __user *, unsigned long); +extern long lstrncpy_from_user(char *, const char __user *, long); +extern unsigned lclear_user(void __user *,unsigned long); +extern long lstrnlen_user(const char __user *,long); + +/* + * Complex access routines -- macros + */ + +#define strncpy_from_user lstrncpy_from_user +#define strnlen_user lstrnlen_user +#define strlen_user(str) lstrnlen_user(str, 0x7fffffffL) +#define clear_user lclear_user +#define __clear_user lclear_user + +unsigned long copy_to_user(void __user *dst, const void *src, unsigned long len); +#define __copy_to_user copy_to_user +unsigned long copy_from_user(void *dst, const void __user *src, unsigned long len); +#define __copy_from_user copy_from_user +unsigned long copy_in_user(void __user *dst, const void __user *src, unsigned long len); +#define __copy_in_user copy_in_user +#define __copy_to_user_inatomic __copy_to_user +#define __copy_from_user_inatomic __copy_from_user + +#endif /* __PARISC_UACCESS_H */ diff --git a/arch/parisc/include/asm/ucontext.h b/arch/parisc/include/asm/ucontext.h new file mode 100644 index 00000000000..6c8883e4b0b --- /dev/null +++ b/arch/parisc/include/asm/ucontext.h @@ -0,0 +1,12 @@ +#ifndef _ASM_PARISC_UCONTEXT_H +#define _ASM_PARISC_UCONTEXT_H + +struct ucontext { + unsigned int uc_flags; + struct ucontext *uc_link; + stack_t uc_stack; + struct sigcontext uc_mcontext; + sigset_t uc_sigmask; /* mask last for extensibility */ +}; + +#endif /* !_ASM_PARISC_UCONTEXT_H */ diff --git a/arch/parisc/include/asm/unaligned.h b/arch/parisc/include/asm/unaligned.h new file mode 100644 index 00000000000..dfc5d3321a5 --- /dev/null +++ b/arch/parisc/include/asm/unaligned.h @@ -0,0 +1,16 @@ +#ifndef _ASM_PARISC_UNALIGNED_H +#define _ASM_PARISC_UNALIGNED_H + +#include +#include +#include +#define get_unaligned __get_unaligned_be +#define put_unaligned __put_unaligned_be + +#ifdef __KERNEL__ +struct pt_regs; +void handle_unaligned(struct pt_regs *regs); +int check_unaligned(struct pt_regs *regs); +#endif + +#endif /* _ASM_PARISC_UNALIGNED_H */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h new file mode 100644 index 00000000000..a7d857f0e4f --- /dev/null +++ b/arch/parisc/include/asm/unistd.h @@ -0,0 +1,991 @@ +#ifndef _ASM_PARISC_UNISTD_H_ +#define _ASM_PARISC_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +/* + * HP-UX system calls get their native numbers for binary compatibility. + */ + +#define __NR_HPUX_exit 1 +#define __NR_HPUX_fork 2 +#define __NR_HPUX_read 3 +#define __NR_HPUX_write 4 +#define __NR_HPUX_open 5 +#define __NR_HPUX_close 6 +#define __NR_HPUX_wait 7 +#define __NR_HPUX_creat 8 +#define __NR_HPUX_link 9 +#define __NR_HPUX_unlink 10 +#define __NR_HPUX_execv 11 +#define __NR_HPUX_chdir 12 +#define __NR_HPUX_time 13 +#define __NR_HPUX_mknod 14 +#define __NR_HPUX_chmod 15 +#define __NR_HPUX_chown 16 +#define __NR_HPUX_break 17 +#define __NR_HPUX_lchmod 18 +#define __NR_HPUX_lseek 19 +#define __NR_HPUX_getpid 20 +#define __NR_HPUX_mount 21 +#define __NR_HPUX_umount 22 +#define __NR_HPUX_setuid 23 +#define __NR_HPUX_getuid 24 +#define __NR_HPUX_stime 25 +#define __NR_HPUX_ptrace 26 +#define __NR_HPUX_alarm 27 +#define __NR_HPUX_oldfstat 28 +#define __NR_HPUX_pause 29 +#define __NR_HPUX_utime 30 +#define __NR_HPUX_stty 31 +#define __NR_HPUX_gtty 32 +#define __NR_HPUX_access 33 +#define __NR_HPUX_nice 34 +#define __NR_HPUX_ftime 35 +#define __NR_HPUX_sync 36 +#define __NR_HPUX_kill 37 +#define __NR_HPUX_stat 38 +#define __NR_HPUX_setpgrp3 39 +#define __NR_HPUX_lstat 40 +#define __NR_HPUX_dup 41 +#define __NR_HPUX_pipe 42 +#define __NR_HPUX_times 43 +#define __NR_HPUX_profil 44 +#define __NR_HPUX_ki_call 45 +#define __NR_HPUX_setgid 46 +#define __NR_HPUX_getgid 47 +#define __NR_HPUX_sigsys 48 +#define __NR_HPUX_reserved1 49 +#define __NR_HPUX_reserved2 50 +#define __NR_HPUX_acct 51 +#define __NR_HPUX_set_userthreadid 52 +#define __NR_HPUX_oldlock 53 +#define __NR_HPUX_ioctl 54 +#define __NR_HPUX_reboot 55 +#define __NR_HPUX_symlink 56 +#define __NR_HPUX_utssys 57 +#define __NR_HPUX_readlink 58 +#define __NR_HPUX_execve 59 +#define __NR_HPUX_umask 60 +#define __NR_HPUX_chroot 61 +#define __NR_HPUX_fcntl 62 +#define __NR_HPUX_ulimit 63 +#define __NR_HPUX_getpagesize 64 +#define __NR_HPUX_mremap 65 +#define __NR_HPUX_vfork 66 +#define __NR_HPUX_vread 67 +#define __NR_HPUX_vwrite 68 +#define __NR_HPUX_sbrk 69 +#define __NR_HPUX_sstk 70 +#define __NR_HPUX_mmap 71 +#define __NR_HPUX_vadvise 72 +#define __NR_HPUX_munmap 73 +#define __NR_HPUX_mprotect 74 +#define __NR_HPUX_madvise 75 +#define __NR_HPUX_vhangup 76 +#define __NR_HPUX_swapoff 77 +#define __NR_HPUX_mincore 78 +#define __NR_HPUX_getgroups 79 +#define __NR_HPUX_setgroups 80 +#define __NR_HPUX_getpgrp2 81 +#define __NR_HPUX_setpgrp2 82 +#define __NR_HPUX_setitimer 83 +#define __NR_HPUX_wait3 84 +#define __NR_HPUX_swapon 85 +#define __NR_HPUX_getitimer 86 +#define __NR_HPUX_gethostname42 87 +#define __NR_HPUX_sethostname42 88 +#define __NR_HPUX_getdtablesize 89 +#define __NR_HPUX_dup2 90 +#define __NR_HPUX_getdopt 91 +#define __NR_HPUX_fstat 92 +#define __NR_HPUX_select 93 +#define __NR_HPUX_setdopt 94 +#define __NR_HPUX_fsync 95 +#define __NR_HPUX_setpriority 96 +#define __NR_HPUX_socket_old 97 +#define __NR_HPUX_connect_old 98 +#define __NR_HPUX_accept_old 99 +#define __NR_HPUX_getpriority 100 +#define __NR_HPUX_send_old 101 +#define __NR_HPUX_recv_old 102 +#define __NR_HPUX_socketaddr_old 103 +#define __NR_HPUX_bind_old 104 +#define __NR_HPUX_setsockopt_old 105 +#define __NR_HPUX_listen_old 106 +#define __NR_HPUX_vtimes_old 107 +#define __NR_HPUX_sigvector 108 +#define __NR_HPUX_sigblock 109 +#define __NR_HPUX_siggetmask 110 +#define __NR_HPUX_sigpause 111 +#define __NR_HPUX_sigstack 112 +#define __NR_HPUX_recvmsg_old 113 +#define __NR_HPUX_sendmsg_old 114 +#define __NR_HPUX_vtrace_old 115 +#define __NR_HPUX_gettimeofday 116 +#define __NR_HPUX_getrusage 117 +#define __NR_HPUX_getsockopt_old 118 +#define __NR_HPUX_resuba_old 119 +#define __NR_HPUX_readv 120 +#define __NR_HPUX_writev 121 +#define __NR_HPUX_settimeofday 122 +#define __NR_HPUX_fchown 123 +#define __NR_HPUX_fchmod 124 +#define __NR_HPUX_recvfrom_old 125 +#define __NR_HPUX_setresuid 126 +#define __NR_HPUX_setresgid 127 +#define __NR_HPUX_rename 128 +#define __NR_HPUX_truncate 129 +#define __NR_HPUX_ftruncate 130 +#define __NR_HPUX_flock_old 131 +#define __NR_HPUX_sysconf 132 +#define __NR_HPUX_sendto_old 133 +#define __NR_HPUX_shutdown_old 134 +#define __NR_HPUX_socketpair_old 135 +#define __NR_HPUX_mkdir 136 +#define __NR_HPUX_rmdir 137 +#define __NR_HPUX_utimes_old 138 +#define __NR_HPUX_sigcleanup_old 139 +#define __NR_HPUX_setcore 140 +#define __NR_HPUX_getpeername_old 141 +#define __NR_HPUX_gethostid 142 +#define __NR_HPUX_sethostid 143 +#define __NR_HPUX_getrlimit 144 +#define __NR_HPUX_setrlimit 145 +#define __NR_HPUX_killpg_old 146 +#define __NR_HPUX_cachectl 147 +#define __NR_HPUX_quotactl 148 +#define __NR_HPUX_get_sysinfo 149 +#define __NR_HPUX_getsockname_old 150 +#define __NR_HPUX_privgrp 151 +#define __NR_HPUX_rtprio 152 +#define __NR_HPUX_plock 153 +#define __NR_HPUX_reserved3 154 +#define __NR_HPUX_lockf 155 +#define __NR_HPUX_semget 156 +#define __NR_HPUX_osemctl 157 +#define __NR_HPUX_semop 158 +#define __NR_HPUX_msgget 159 +#define __NR_HPUX_omsgctl 160 +#define __NR_HPUX_msgsnd 161 +#define __NR_HPUX_msgrecv 162 +#define __NR_HPUX_shmget 163 +#define __NR_HPUX_oshmctl 164 +#define __NR_HPUX_shmat 165 +#define __NR_HPUX_shmdt 166 +#define __NR_HPUX_m68020_advise 167 +/* [168,189] are for Discless/DUX */ +#define __NR_HPUX_csp 168 +#define __NR_HPUX_cluster 169 +#define __NR_HPUX_mkrnod 170 +#define __NR_HPUX_test 171 +#define __NR_HPUX_unsp_open 172 +#define __NR_HPUX_reserved4 173 +#define __NR_HPUX_getcontext_old 174 +#define __NR_HPUX_osetcontext 175 +#define __NR_HPUX_bigio 176 +#define __NR_HPUX_pipenode 177 +#define __NR_HPUX_lsync 178 +#define __NR_HPUX_getmachineid 179 +#define __NR_HPUX_cnodeid 180 +#define __NR_HPUX_cnodes 181 +#define __NR_HPUX_swapclients 182 +#define __NR_HPUX_rmt_process 183 +#define __NR_HPUX_dskless_stats 184 +#define __NR_HPUX_sigprocmask 185 +#define __NR_HPUX_sigpending 186 +#define __NR_HPUX_sigsuspend 187 +#define __NR_HPUX_sigaction 188 +#define __NR_HPUX_reserved5 189 +#define __NR_HPUX_nfssvc 190 +#define __NR_HPUX_getfh 191 +#define __NR_HPUX_getdomainname 192 +#define __NR_HPUX_setdomainname 193 +#define __NR_HPUX_async_daemon 194 +#define __NR_HPUX_getdirentries 195 +#define __NR_HPUX_statfs 196 +#define __NR_HPUX_fstatfs 197 +#define __NR_HPUX_vfsmount 198 +#define __NR_HPUX_reserved6 199 +#define __NR_HPUX_waitpid 200 +/* 201 - 223 missing */ +#define __NR_HPUX_sigsetreturn 224 +#define __NR_HPUX_sigsetstatemask 225 +/* 226 missing */ +#define __NR_HPUX_cs 227 +#define __NR_HPUX_cds 228 +#define __NR_HPUX_set_no_trunc 229 +#define __NR_HPUX_pathconf 230 +#define __NR_HPUX_fpathconf 231 +/* 232, 233 missing */ +#define __NR_HPUX_nfs_fcntl 234 +#define __NR_HPUX_ogetacl 235 +#define __NR_HPUX_ofgetacl 236 +#define __NR_HPUX_osetacl 237 +#define __NR_HPUX_ofsetacl 238 +#define __NR_HPUX_pstat 239 +#define __NR_HPUX_getaudid 240 +#define __NR_HPUX_setaudid 241 +#define __NR_HPUX_getaudproc 242 +#define __NR_HPUX_setaudproc 243 +#define __NR_HPUX_getevent 244 +#define __NR_HPUX_setevent 245 +#define __NR_HPUX_audwrite 246 +#define __NR_HPUX_audswitch 247 +#define __NR_HPUX_audctl 248 +#define __NR_HPUX_ogetaccess 249 +#define __NR_HPUX_fsctl 250 +/* 251 - 258 missing */ +#define __NR_HPUX_swapfs 259 +#define __NR_HPUX_fss 260 +/* 261 - 266 missing */ +#define __NR_HPUX_tsync 267 +#define __NR_HPUX_getnumfds 268 +#define __NR_HPUX_poll 269 +#define __NR_HPUX_getmsg 270 +#define __NR_HPUX_putmsg 271 +#define __NR_HPUX_fchdir 272 +#define __NR_HPUX_getmount_cnt 273 +#define __NR_HPUX_getmount_entry 274 +#define __NR_HPUX_accept 275 +#define __NR_HPUX_bind 276 +#define __NR_HPUX_connect 277 +#define __NR_HPUX_getpeername 278 +#define __NR_HPUX_getsockname 279 +#define __NR_HPUX_getsockopt 280 +#define __NR_HPUX_listen 281 +#define __NR_HPUX_recv 282 +#define __NR_HPUX_recvfrom 283 +#define __NR_HPUX_recvmsg 284 +#define __NR_HPUX_send 285 +#define __NR_HPUX_sendmsg 286 +#define __NR_HPUX_sendto 287 +#define __NR_HPUX_setsockopt 288 +#define __NR_HPUX_shutdown 289 +#define __NR_HPUX_socket 290 +#define __NR_HPUX_socketpair 291 +#define __NR_HPUX_proc_open 292 +#define __NR_HPUX_proc_close 293 +#define __NR_HPUX_proc_send 294 +#define __NR_HPUX_proc_recv 295 +#define __NR_HPUX_proc_sendrecv 296 +#define __NR_HPUX_proc_syscall 297 +/* 298 - 311 missing */ +#define __NR_HPUX_semctl 312 +#define __NR_HPUX_msgctl 313 +#define __NR_HPUX_shmctl 314 +#define __NR_HPUX_mpctl 315 +#define __NR_HPUX_exportfs 316 +#define __NR_HPUX_getpmsg 317 +#define __NR_HPUX_putpmsg 318 +/* 319 missing */ +#define __NR_HPUX_msync 320 +#define __NR_HPUX_msleep 321 +#define __NR_HPUX_mwakeup 322 +#define __NR_HPUX_msem_init 323 +#define __NR_HPUX_msem_remove 324 +#define __NR_HPUX_adjtime 325 +#define __NR_HPUX_kload 326 +#define __NR_HPUX_fattach 327 +#define __NR_HPUX_fdetach 328 +#define __NR_HPUX_serialize 329 +#define __NR_HPUX_statvfs 330 +#define __NR_HPUX_fstatvfs 331 +#define __NR_HPUX_lchown 332 +#define __NR_HPUX_getsid 333 +#define __NR_HPUX_sysfs 334 +/* 335, 336 missing */ +#define __NR_HPUX_sched_setparam 337 +#define __NR_HPUX_sched_getparam 338 +#define __NR_HPUX_sched_setscheduler 339 +#define __NR_HPUX_sched_getscheduler 340 +#define __NR_HPUX_sched_yield 341 +#define __NR_HPUX_sched_get_priority_max 342 +#define __NR_HPUX_sched_get_priority_min 343 +#define __NR_HPUX_sched_rr_get_interval 344 +#define __NR_HPUX_clock_settime 345 +#define __NR_HPUX_clock_gettime 346 +#define __NR_HPUX_clock_getres 347 +#define __NR_HPUX_timer_create 348 +#define __NR_HPUX_timer_delete 349 +#define __NR_HPUX_timer_settime 350 +#define __NR_HPUX_timer_gettime 351 +#define __NR_HPUX_timer_getoverrun 352 +#define __NR_HPUX_nanosleep 353 +#define __NR_HPUX_toolbox 354 +/* 355 missing */ +#define __NR_HPUX_getdents 356 +#define __NR_HPUX_getcontext 357 +#define __NR_HPUX_sysinfo 358 +#define __NR_HPUX_fcntl64 359 +#define __NR_HPUX_ftruncate64 360 +#define __NR_HPUX_fstat64 361 +#define __NR_HPUX_getdirentries64 362 +#define __NR_HPUX_getrlimit64 363 +#define __NR_HPUX_lockf64 364 +#define __NR_HPUX_lseek64 365 +#define __NR_HPUX_lstat64 366 +#define __NR_HPUX_mmap64 367 +#define __NR_HPUX_setrlimit64 368 +#define __NR_HPUX_stat64 369 +#define __NR_HPUX_truncate64 370 +#define __NR_HPUX_ulimit64 371 +#define __NR_HPUX_pread 372 +#define __NR_HPUX_preadv 373 +#define __NR_HPUX_pwrite 374 +#define __NR_HPUX_pwritev 375 +#define __NR_HPUX_pread64 376 +#define __NR_HPUX_preadv64 377 +#define __NR_HPUX_pwrite64 378 +#define __NR_HPUX_pwritev64 379 +#define __NR_HPUX_setcontext 380 +#define __NR_HPUX_sigaltstack 381 +#define __NR_HPUX_waitid 382 +#define __NR_HPUX_setpgrp 383 +#define __NR_HPUX_recvmsg2 384 +#define __NR_HPUX_sendmsg2 385 +#define __NR_HPUX_socket2 386 +#define __NR_HPUX_socketpair2 387 +#define __NR_HPUX_setregid 388 +#define __NR_HPUX_lwp_create 389 +#define __NR_HPUX_lwp_terminate 390 +#define __NR_HPUX_lwp_wait 391 +#define __NR_HPUX_lwp_suspend 392 +#define __NR_HPUX_lwp_resume 393 +/* 394 missing */ +#define __NR_HPUX_lwp_abort_syscall 395 +#define __NR_HPUX_lwp_info 396 +#define __NR_HPUX_lwp_kill 397 +#define __NR_HPUX_ksleep 398 +#define __NR_HPUX_kwakeup 399 +/* 400 missing */ +#define __NR_HPUX_pstat_getlwp 401 +#define __NR_HPUX_lwp_exit 402 +#define __NR_HPUX_lwp_continue 403 +#define __NR_HPUX_getacl 404 +#define __NR_HPUX_fgetacl 405 +#define __NR_HPUX_setacl 406 +#define __NR_HPUX_fsetacl 407 +#define __NR_HPUX_getaccess 408 +#define __NR_HPUX_lwp_mutex_init 409 +#define __NR_HPUX_lwp_mutex_lock_sys 410 +#define __NR_HPUX_lwp_mutex_unlock 411 +#define __NR_HPUX_lwp_cond_init 412 +#define __NR_HPUX_lwp_cond_signal 413 +#define __NR_HPUX_lwp_cond_broadcast 414 +#define __NR_HPUX_lwp_cond_wait_sys 415 +#define __NR_HPUX_lwp_getscheduler 416 +#define __NR_HPUX_lwp_setscheduler 417 +#define __NR_HPUX_lwp_getstate 418 +#define __NR_HPUX_lwp_setstate 419 +#define __NR_HPUX_lwp_detach 420 +#define __NR_HPUX_mlock 421 +#define __NR_HPUX_munlock 422 +#define __NR_HPUX_mlockall 423 +#define __NR_HPUX_munlockall 424 +#define __NR_HPUX_shm_open 425 +#define __NR_HPUX_shm_unlink 426 +#define __NR_HPUX_sigqueue 427 +#define __NR_HPUX_sigwaitinfo 428 +#define __NR_HPUX_sigtimedwait 429 +#define __NR_HPUX_sigwait 430 +#define __NR_HPUX_aio_read 431 +#define __NR_HPUX_aio_write 432 +#define __NR_HPUX_lio_listio 433 +#define __NR_HPUX_aio_error 434 +#define __NR_HPUX_aio_return 435 +#define __NR_HPUX_aio_cancel 436 +#define __NR_HPUX_aio_suspend 437 +#define __NR_HPUX_aio_fsync 438 +#define __NR_HPUX_mq_open 439 +#define __NR_HPUX_mq_close 440 +#define __NR_HPUX_mq_unlink 441 +#define __NR_HPUX_mq_send 442 +#define __NR_HPUX_mq_receive 443 +#define __NR_HPUX_mq_notify 444 +#define __NR_HPUX_mq_setattr 445 +#define __NR_HPUX_mq_getattr 446 +#define __NR_HPUX_ksem_open 447 +#define __NR_HPUX_ksem_unlink 448 +#define __NR_HPUX_ksem_close 449 +#define __NR_HPUX_ksem_post 450 +#define __NR_HPUX_ksem_wait 451 +#define __NR_HPUX_ksem_read 452 +#define __NR_HPUX_ksem_trywait 453 +#define __NR_HPUX_lwp_rwlock_init 454 +#define __NR_HPUX_lwp_rwlock_destroy 455 +#define __NR_HPUX_lwp_rwlock_rdlock_sys 456 +#define __NR_HPUX_lwp_rwlock_wrlock_sys 457 +#define __NR_HPUX_lwp_rwlock_tryrdlock 458 +#define __NR_HPUX_lwp_rwlock_trywrlock 459 +#define __NR_HPUX_lwp_rwlock_unlock 460 +#define __NR_HPUX_ttrace 461 +#define __NR_HPUX_ttrace_wait 462 +#define __NR_HPUX_lf_wire_mem 463 +#define __NR_HPUX_lf_unwire_mem 464 +#define __NR_HPUX_lf_send_pin_map 465 +#define __NR_HPUX_lf_free_buf 466 +#define __NR_HPUX_lf_wait_nq 467 +#define __NR_HPUX_lf_wakeup_conn_q 468 +#define __NR_HPUX_lf_unused 469 +#define __NR_HPUX_lwp_sema_init 470 +#define __NR_HPUX_lwp_sema_post 471 +#define __NR_HPUX_lwp_sema_wait 472 +#define __NR_HPUX_lwp_sema_trywait 473 +#define __NR_HPUX_lwp_sema_destroy 474 +#define __NR_HPUX_statvfs64 475 +#define __NR_HPUX_fstatvfs64 476 +#define __NR_HPUX_msh_register 477 +#define __NR_HPUX_ptrace64 478 +#define __NR_HPUX_sendfile 479 +#define __NR_HPUX_sendpath 480 +#define __NR_HPUX_sendfile64 481 +#define __NR_HPUX_sendpath64 482 +#define __NR_HPUX_modload 483 +#define __NR_HPUX_moduload 484 +#define __NR_HPUX_modpath 485 +#define __NR_HPUX_getksym 486 +#define __NR_HPUX_modadm 487 +#define __NR_HPUX_modstat 488 +#define __NR_HPUX_lwp_detached_exit 489 +#define __NR_HPUX_crashconf 490 +#define __NR_HPUX_siginhibit 491 +#define __NR_HPUX_sigenable 492 +#define __NR_HPUX_spuctl 493 +#define __NR_HPUX_zerokernelsum 494 +#define __NR_HPUX_nfs_kstat 495 +#define __NR_HPUX_aio_read64 496 +#define __NR_HPUX_aio_write64 497 +#define __NR_HPUX_aio_error64 498 +#define __NR_HPUX_aio_return64 499 +#define __NR_HPUX_aio_cancel64 500 +#define __NR_HPUX_aio_suspend64 501 +#define __NR_HPUX_aio_fsync64 502 +#define __NR_HPUX_lio_listio64 503 +#define __NR_HPUX_recv2 504 +#define __NR_HPUX_recvfrom2 505 +#define __NR_HPUX_send2 506 +#define __NR_HPUX_sendto2 507 +#define __NR_HPUX_acl 508 +#define __NR_HPUX___cnx_p2p_ctl 509 +#define __NR_HPUX___cnx_gsched_ctl 510 +#define __NR_HPUX___cnx_pmon_ctl 511 + +#define __NR_HPUX_syscalls 512 + +/* + * Linux system call numbers. + * + * Cary Coutant says that we should just use another syscall gateway + * page to avoid clashing with the HPUX space, and I think he's right: + * it will would keep a branch out of our syscall entry path, at the + * very least. If we decide to change it later, we can ``just'' tweak + * the LINUX_GATEWAY_ADDR define at the bottom and make __NR_Linux be + * 1024 or something. Oh, and recompile libc. =) + * + * 64-bit HPUX binaries get the syscall gateway address passed in a register + * from the kernel at startup, which seems a sane strategy. + */ + +#define __NR_Linux 0 +#define __NR_restart_syscall (__NR_Linux + 0) +#define __NR_exit (__NR_Linux + 1) +#define __NR_fork (__NR_Linux + 2) +#define __NR_read (__NR_Linux + 3) +#define __NR_write (__NR_Linux + 4) +#define __NR_open (__NR_Linux + 5) +#define __NR_close (__NR_Linux + 6) +#define __NR_waitpid (__NR_Linux + 7) +#define __NR_creat (__NR_Linux + 8) +#define __NR_link (__NR_Linux + 9) +#define __NR_unlink (__NR_Linux + 10) +#define __NR_execve (__NR_Linux + 11) +#define __NR_chdir (__NR_Linux + 12) +#define __NR_time (__NR_Linux + 13) +#define __NR_mknod (__NR_Linux + 14) +#define __NR_chmod (__NR_Linux + 15) +#define __NR_lchown (__NR_Linux + 16) +#define __NR_socket (__NR_Linux + 17) +#define __NR_stat (__NR_Linux + 18) +#define __NR_lseek (__NR_Linux + 19) +#define __NR_getpid (__NR_Linux + 20) +#define __NR_mount (__NR_Linux + 21) +#define __NR_bind (__NR_Linux + 22) +#define __NR_setuid (__NR_Linux + 23) +#define __NR_getuid (__NR_Linux + 24) +#define __NR_stime (__NR_Linux + 25) +#define __NR_ptrace (__NR_Linux + 26) +#define __NR_alarm (__NR_Linux + 27) +#define __NR_fstat (__NR_Linux + 28) +#define __NR_pause (__NR_Linux + 29) +#define __NR_utime (__NR_Linux + 30) +#define __NR_connect (__NR_Linux + 31) +#define __NR_listen (__NR_Linux + 32) +#define __NR_access (__NR_Linux + 33) +#define __NR_nice (__NR_Linux + 34) +#define __NR_accept (__NR_Linux + 35) +#define __NR_sync (__NR_Linux + 36) +#define __NR_kill (__NR_Linux + 37) +#define __NR_rename (__NR_Linux + 38) +#define __NR_mkdir (__NR_Linux + 39) +#define __NR_rmdir (__NR_Linux + 40) +#define __NR_dup (__NR_Linux + 41) +#define __NR_pipe (__NR_Linux + 42) +#define __NR_times (__NR_Linux + 43) +#define __NR_getsockname (__NR_Linux + 44) +#define __NR_brk (__NR_Linux + 45) +#define __NR_setgid (__NR_Linux + 46) +#define __NR_getgid (__NR_Linux + 47) +#define __NR_signal (__NR_Linux + 48) +#define __NR_geteuid (__NR_Linux + 49) +#define __NR_getegid (__NR_Linux + 50) +#define __NR_acct (__NR_Linux + 51) +#define __NR_umount2 (__NR_Linux + 52) +#define __NR_getpeername (__NR_Linux + 53) +#define __NR_ioctl (__NR_Linux + 54) +#define __NR_fcntl (__NR_Linux + 55) +#define __NR_socketpair (__NR_Linux + 56) +#define __NR_setpgid (__NR_Linux + 57) +#define __NR_send (__NR_Linux + 58) +#define __NR_uname (__NR_Linux + 59) +#define __NR_umask (__NR_Linux + 60) +#define __NR_chroot (__NR_Linux + 61) +#define __NR_ustat (__NR_Linux + 62) +#define __NR_dup2 (__NR_Linux + 63) +#define __NR_getppid (__NR_Linux + 64) +#define __NR_getpgrp (__NR_Linux + 65) +#define __NR_setsid (__NR_Linux + 66) +#define __NR_pivot_root (__NR_Linux + 67) +#define __NR_sgetmask (__NR_Linux + 68) +#define __NR_ssetmask (__NR_Linux + 69) +#define __NR_setreuid (__NR_Linux + 70) +#define __NR_setregid (__NR_Linux + 71) +#define __NR_mincore (__NR_Linux + 72) +#define __NR_sigpending (__NR_Linux + 73) +#define __NR_sethostname (__NR_Linux + 74) +#define __NR_setrlimit (__NR_Linux + 75) +#define __NR_getrlimit (__NR_Linux + 76) +#define __NR_getrusage (__NR_Linux + 77) +#define __NR_gettimeofday (__NR_Linux + 78) +#define __NR_settimeofday (__NR_Linux + 79) +#define __NR_getgroups (__NR_Linux + 80) +#define __NR_setgroups (__NR_Linux + 81) +#define __NR_sendto (__NR_Linux + 82) +#define __NR_symlink (__NR_Linux + 83) +#define __NR_lstat (__NR_Linux + 84) +#define __NR_readlink (__NR_Linux + 85) +#define __NR_uselib (__NR_Linux + 86) +#define __NR_swapon (__NR_Linux + 87) +#define __NR_reboot (__NR_Linux + 88) +#define __NR_mmap2 (__NR_Linux + 89) +#define __NR_mmap (__NR_Linux + 90) +#define __NR_munmap (__NR_Linux + 91) +#define __NR_truncate (__NR_Linux + 92) +#define __NR_ftruncate (__NR_Linux + 93) +#define __NR_fchmod (__NR_Linux + 94) +#define __NR_fchown (__NR_Linux + 95) +#define __NR_getpriority (__NR_Linux + 96) +#define __NR_setpriority (__NR_Linux + 97) +#define __NR_recv (__NR_Linux + 98) +#define __NR_statfs (__NR_Linux + 99) +#define __NR_fstatfs (__NR_Linux + 100) +#define __NR_stat64 (__NR_Linux + 101) +/* #define __NR_socketcall (__NR_Linux + 102) */ +#define __NR_syslog (__NR_Linux + 103) +#define __NR_setitimer (__NR_Linux + 104) +#define __NR_getitimer (__NR_Linux + 105) +#define __NR_capget (__NR_Linux + 106) +#define __NR_capset (__NR_Linux + 107) +#define __NR_pread64 (__NR_Linux + 108) +#define __NR_pwrite64 (__NR_Linux + 109) +#define __NR_getcwd (__NR_Linux + 110) +#define __NR_vhangup (__NR_Linux + 111) +#define __NR_fstat64 (__NR_Linux + 112) +#define __NR_vfork (__NR_Linux + 113) +#define __NR_wait4 (__NR_Linux + 114) +#define __NR_swapoff (__NR_Linux + 115) +#define __NR_sysinfo (__NR_Linux + 116) +#define __NR_shutdown (__NR_Linux + 117) +#define __NR_fsync (__NR_Linux + 118) +#define __NR_madvise (__NR_Linux + 119) +#define __NR_clone (__NR_Linux + 120) +#define __NR_setdomainname (__NR_Linux + 121) +#define __NR_sendfile (__NR_Linux + 122) +#define __NR_recvfrom (__NR_Linux + 123) +#define __NR_adjtimex (__NR_Linux + 124) +#define __NR_mprotect (__NR_Linux + 125) +#define __NR_sigprocmask (__NR_Linux + 126) +#define __NR_create_module (__NR_Linux + 127) +#define __NR_init_module (__NR_Linux + 128) +#define __NR_delete_module (__NR_Linux + 129) +#define __NR_get_kernel_syms (__NR_Linux + 130) +#define __NR_quotactl (__NR_Linux + 131) +#define __NR_getpgid (__NR_Linux + 132) +#define __NR_fchdir (__NR_Linux + 133) +#define __NR_bdflush (__NR_Linux + 134) +#define __NR_sysfs (__NR_Linux + 135) +#define __NR_personality (__NR_Linux + 136) +#define __NR_afs_syscall (__NR_Linux + 137) /* Syscall for Andrew File System */ +#define __NR_setfsuid (__NR_Linux + 138) +#define __NR_setfsgid (__NR_Linux + 139) +#define __NR__llseek (__NR_Linux + 140) +#define __NR_getdents (__NR_Linux + 141) +#define __NR__newselect (__NR_Linux + 142) +#define __NR_flock (__NR_Linux + 143) +#define __NR_msync (__NR_Linux + 144) +#define __NR_readv (__NR_Linux + 145) +#define __NR_writev (__NR_Linux + 146) +#define __NR_getsid (__NR_Linux + 147) +#define __NR_fdatasync (__NR_Linux + 148) +#define __NR__sysctl (__NR_Linux + 149) +#define __NR_mlock (__NR_Linux + 150) +#define __NR_munlock (__NR_Linux + 151) +#define __NR_mlockall (__NR_Linux + 152) +#define __NR_munlockall (__NR_Linux + 153) +#define __NR_sched_setparam (__NR_Linux + 154) +#define __NR_sched_getparam (__NR_Linux + 155) +#define __NR_sched_setscheduler (__NR_Linux + 156) +#define __NR_sched_getscheduler (__NR_Linux + 157) +#define __NR_sched_yield (__NR_Linux + 158) +#define __NR_sched_get_priority_max (__NR_Linux + 159) +#define __NR_sched_get_priority_min (__NR_Linux + 160) +#define __NR_sched_rr_get_interval (__NR_Linux + 161) +#define __NR_nanosleep (__NR_Linux + 162) +#define __NR_mremap (__NR_Linux + 163) +#define __NR_setresuid (__NR_Linux + 164) +#define __NR_getresuid (__NR_Linux + 165) +#define __NR_sigaltstack (__NR_Linux + 166) +#define __NR_query_module (__NR_Linux + 167) +#define __NR_poll (__NR_Linux + 168) +#define __NR_nfsservctl (__NR_Linux + 169) +#define __NR_setresgid (__NR_Linux + 170) +#define __NR_getresgid (__NR_Linux + 171) +#define __NR_prctl (__NR_Linux + 172) +#define __NR_rt_sigreturn (__NR_Linux + 173) +#define __NR_rt_sigaction (__NR_Linux + 174) +#define __NR_rt_sigprocmask (__NR_Linux + 175) +#define __NR_rt_sigpending (__NR_Linux + 176) +#define __NR_rt_sigtimedwait (__NR_Linux + 177) +#define __NR_rt_sigqueueinfo (__NR_Linux + 178) +#define __NR_rt_sigsuspend (__NR_Linux + 179) +#define __NR_chown (__NR_Linux + 180) +#define __NR_setsockopt (__NR_Linux + 181) +#define __NR_getsockopt (__NR_Linux + 182) +#define __NR_sendmsg (__NR_Linux + 183) +#define __NR_recvmsg (__NR_Linux + 184) +#define __NR_semop (__NR_Linux + 185) +#define __NR_semget (__NR_Linux + 186) +#define __NR_semctl (__NR_Linux + 187) +#define __NR_msgsnd (__NR_Linux + 188) +#define __NR_msgrcv (__NR_Linux + 189) +#define __NR_msgget (__NR_Linux + 190) +#define __NR_msgctl (__NR_Linux + 191) +#define __NR_shmat (__NR_Linux + 192) +#define __NR_shmdt (__NR_Linux + 193) +#define __NR_shmget (__NR_Linux + 194) +#define __NR_shmctl (__NR_Linux + 195) + +#define __NR_getpmsg (__NR_Linux + 196) /* Somebody *wants* streams? */ +#define __NR_putpmsg (__NR_Linux + 197) + +#define __NR_lstat64 (__NR_Linux + 198) +#define __NR_truncate64 (__NR_Linux + 199) +#define __NR_ftruncate64 (__NR_Linux + 200) +#define __NR_getdents64 (__NR_Linux + 201) +#define __NR_fcntl64 (__NR_Linux + 202) +#define __NR_attrctl (__NR_Linux + 203) +#define __NR_acl_get (__NR_Linux + 204) +#define __NR_acl_set (__NR_Linux + 205) +#define __NR_gettid (__NR_Linux + 206) +#define __NR_readahead (__NR_Linux + 207) +#define __NR_tkill (__NR_Linux + 208) +#define __NR_sendfile64 (__NR_Linux + 209) +#define __NR_futex (__NR_Linux + 210) +#define __NR_sched_setaffinity (__NR_Linux + 211) +#define __NR_sched_getaffinity (__NR_Linux + 212) +#define __NR_set_thread_area (__NR_Linux + 213) +#define __NR_get_thread_area (__NR_Linux + 214) +#define __NR_io_setup (__NR_Linux + 215) +#define __NR_io_destroy (__NR_Linux + 216) +#define __NR_io_getevents (__NR_Linux + 217) +#define __NR_io_submit (__NR_Linux + 218) +#define __NR_io_cancel (__NR_Linux + 219) +#define __NR_alloc_hugepages (__NR_Linux + 220) +#define __NR_free_hugepages (__NR_Linux + 221) +#define __NR_exit_group (__NR_Linux + 222) +#define __NR_lookup_dcookie (__NR_Linux + 223) +#define __NR_epoll_create (__NR_Linux + 224) +#define __NR_epoll_ctl (__NR_Linux + 225) +#define __NR_epoll_wait (__NR_Linux + 226) +#define __NR_remap_file_pages (__NR_Linux + 227) +#define __NR_semtimedop (__NR_Linux + 228) +#define __NR_mq_open (__NR_Linux + 229) +#define __NR_mq_unlink (__NR_Linux + 230) +#define __NR_mq_timedsend (__NR_Linux + 231) +#define __NR_mq_timedreceive (__NR_Linux + 232) +#define __NR_mq_notify (__NR_Linux + 233) +#define __NR_mq_getsetattr (__NR_Linux + 234) +#define __NR_waitid (__NR_Linux + 235) +#define __NR_fadvise64_64 (__NR_Linux + 236) +#define __NR_set_tid_address (__NR_Linux + 237) +#define __NR_setxattr (__NR_Linux + 238) +#define __NR_lsetxattr (__NR_Linux + 239) +#define __NR_fsetxattr (__NR_Linux + 240) +#define __NR_getxattr (__NR_Linux + 241) +#define __NR_lgetxattr (__NR_Linux + 242) +#define __NR_fgetxattr (__NR_Linux + 243) +#define __NR_listxattr (__NR_Linux + 244) +#define __NR_llistxattr (__NR_Linux + 245) +#define __NR_flistxattr (__NR_Linux + 246) +#define __NR_removexattr (__NR_Linux + 247) +#define __NR_lremovexattr (__NR_Linux + 248) +#define __NR_fremovexattr (__NR_Linux + 249) +#define __NR_timer_create (__NR_Linux + 250) +#define __NR_timer_settime (__NR_Linux + 251) +#define __NR_timer_gettime (__NR_Linux + 252) +#define __NR_timer_getoverrun (__NR_Linux + 253) +#define __NR_timer_delete (__NR_Linux + 254) +#define __NR_clock_settime (__NR_Linux + 255) +#define __NR_clock_gettime (__NR_Linux + 256) +#define __NR_clock_getres (__NR_Linux + 257) +#define __NR_clock_nanosleep (__NR_Linux + 258) +#define __NR_tgkill (__NR_Linux + 259) +#define __NR_mbind (__NR_Linux + 260) +#define __NR_get_mempolicy (__NR_Linux + 261) +#define __NR_set_mempolicy (__NR_Linux + 262) +#define __NR_vserver (__NR_Linux + 263) +#define __NR_add_key (__NR_Linux + 264) +#define __NR_request_key (__NR_Linux + 265) +#define __NR_keyctl (__NR_Linux + 266) +#define __NR_ioprio_set (__NR_Linux + 267) +#define __NR_ioprio_get (__NR_Linux + 268) +#define __NR_inotify_init (__NR_Linux + 269) +#define __NR_inotify_add_watch (__NR_Linux + 270) +#define __NR_inotify_rm_watch (__NR_Linux + 271) +#define __NR_migrate_pages (__NR_Linux + 272) +#define __NR_pselect6 (__NR_Linux + 273) +#define __NR_ppoll (__NR_Linux + 274) +#define __NR_openat (__NR_Linux + 275) +#define __NR_mkdirat (__NR_Linux + 276) +#define __NR_mknodat (__NR_Linux + 277) +#define __NR_fchownat (__NR_Linux + 278) +#define __NR_futimesat (__NR_Linux + 279) +#define __NR_fstatat64 (__NR_Linux + 280) +#define __NR_unlinkat (__NR_Linux + 281) +#define __NR_renameat (__NR_Linux + 282) +#define __NR_linkat (__NR_Linux + 283) +#define __NR_symlinkat (__NR_Linux + 284) +#define __NR_readlinkat (__NR_Linux + 285) +#define __NR_fchmodat (__NR_Linux + 286) +#define __NR_faccessat (__NR_Linux + 287) +#define __NR_unshare (__NR_Linux + 288) +#define __NR_set_robust_list (__NR_Linux + 289) +#define __NR_get_robust_list (__NR_Linux + 290) +#define __NR_splice (__NR_Linux + 291) +#define __NR_sync_file_range (__NR_Linux + 292) +#define __NR_tee (__NR_Linux + 293) +#define __NR_vmsplice (__NR_Linux + 294) +#define __NR_move_pages (__NR_Linux + 295) +#define __NR_getcpu (__NR_Linux + 296) +#define __NR_epoll_pwait (__NR_Linux + 297) +#define __NR_statfs64 (__NR_Linux + 298) +#define __NR_fstatfs64 (__NR_Linux + 299) +#define __NR_kexec_load (__NR_Linux + 300) +#define __NR_utimensat (__NR_Linux + 301) +#define __NR_signalfd (__NR_Linux + 302) +#define __NR_timerfd (__NR_Linux + 303) +#define __NR_eventfd (__NR_Linux + 304) +#define __NR_fallocate (__NR_Linux + 305) +#define __NR_timerfd_create (__NR_Linux + 306) +#define __NR_timerfd_settime (__NR_Linux + 307) +#define __NR_timerfd_gettime (__NR_Linux + 308) + +#define __NR_Linux_syscalls (__NR_timerfd_gettime + 1) + + +#define __IGNORE_select /* newselect */ +#define __IGNORE_fadvise64 /* fadvise64_64 */ +#define __IGNORE_utimes /* utime */ + + +#define HPUX_GATEWAY_ADDR 0xC0000004 +#define LINUX_GATEWAY_ADDR 0x100 + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#define SYS_ify(syscall_name) __NR_##syscall_name + +#ifndef ASM_LINE_SEP +# define ASM_LINE_SEP ; +#endif + +/* Definition taken from glibc 2.3.3 + * sysdeps/unix/sysv/linux/hppa/sysdep.h + */ + +#ifdef PIC +/* WARNING: CANNOT BE USED IN A NOP! */ +# define K_STW_ASM_PIC " copy %%r19, %%r4\n" +# define K_LDW_ASM_PIC " copy %%r4, %%r19\n" +# define K_USING_GR4 "%r4", +#else +# define K_STW_ASM_PIC " \n" +# define K_LDW_ASM_PIC " \n" +# define K_USING_GR4 +#endif + +/* GCC has to be warned that a syscall may clobber all the ABI + registers listed as "caller-saves", see page 8, Table 2 + in section 2.2.6 of the PA-RISC RUN-TIME architecture + document. However! r28 is the result and will conflict with + the clobber list so it is left out. Also the input arguments + registers r20 -> r26 will conflict with the list so they + are treated specially. Although r19 is clobbered by the syscall + we cannot say this because it would violate ABI, thus we say + r4 is clobbered and use that register to save/restore r19 + across the syscall. */ + +#define K_CALL_CLOB_REGS "%r1", "%r2", K_USING_GR4 \ + "%r20", "%r29", "%r31" + +#undef K_INLINE_SYSCALL +#define K_INLINE_SYSCALL(name, nr, args...) ({ \ + long __sys_res; \ + { \ + register unsigned long __res __asm__("r28"); \ + K_LOAD_ARGS_##nr(args) \ + /* FIXME: HACK stw/ldw r19 around syscall */ \ + __asm__ volatile( \ + K_STW_ASM_PIC \ + " ble 0x100(%%sr2, %%r0)\n" \ + " ldi %1, %%r20\n" \ + K_LDW_ASM_PIC \ + : "=r" (__res) \ + : "i" (SYS_ify(name)) K_ASM_ARGS_##nr \ + : "memory", K_CALL_CLOB_REGS K_CLOB_ARGS_##nr \ + ); \ + __sys_res = (long)__res; \ + } \ + if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){ \ + errno = -__sys_res; \ + __sys_res = -1; \ + } \ + __sys_res; \ +}) + +#define K_LOAD_ARGS_0() +#define K_LOAD_ARGS_1(r26) \ + register unsigned long __r26 __asm__("r26") = (unsigned long)(r26); \ + K_LOAD_ARGS_0() +#define K_LOAD_ARGS_2(r26,r25) \ + register unsigned long __r25 __asm__("r25") = (unsigned long)(r25); \ + K_LOAD_ARGS_1(r26) +#define K_LOAD_ARGS_3(r26,r25,r24) \ + register unsigned long __r24 __asm__("r24") = (unsigned long)(r24); \ + K_LOAD_ARGS_2(r26,r25) +#define K_LOAD_ARGS_4(r26,r25,r24,r23) \ + register unsigned long __r23 __asm__("r23") = (unsigned long)(r23); \ + K_LOAD_ARGS_3(r26,r25,r24) +#define K_LOAD_ARGS_5(r26,r25,r24,r23,r22) \ + register unsigned long __r22 __asm__("r22") = (unsigned long)(r22); \ + K_LOAD_ARGS_4(r26,r25,r24,r23) +#define K_LOAD_ARGS_6(r26,r25,r24,r23,r22,r21) \ + register unsigned long __r21 __asm__("r21") = (unsigned long)(r21); \ + K_LOAD_ARGS_5(r26,r25,r24,r23,r22) + +/* Even with zero args we use r20 for the syscall number */ +#define K_ASM_ARGS_0 +#define K_ASM_ARGS_1 K_ASM_ARGS_0, "r" (__r26) +#define K_ASM_ARGS_2 K_ASM_ARGS_1, "r" (__r25) +#define K_ASM_ARGS_3 K_ASM_ARGS_2, "r" (__r24) +#define K_ASM_ARGS_4 K_ASM_ARGS_3, "r" (__r23) +#define K_ASM_ARGS_5 K_ASM_ARGS_4, "r" (__r22) +#define K_ASM_ARGS_6 K_ASM_ARGS_5, "r" (__r21) + +/* The registers not listed as inputs but clobbered */ +#define K_CLOB_ARGS_6 +#define K_CLOB_ARGS_5 K_CLOB_ARGS_6, "%r21" +#define K_CLOB_ARGS_4 K_CLOB_ARGS_5, "%r22" +#define K_CLOB_ARGS_3 K_CLOB_ARGS_4, "%r23" +#define K_CLOB_ARGS_2 K_CLOB_ARGS_3, "%r24" +#define K_CLOB_ARGS_1 K_CLOB_ARGS_2, "%r25" +#define K_CLOB_ARGS_0 K_CLOB_ARGS_1, "%r26" + +#define _syscall0(type,name) \ +type name(void) \ +{ \ + return K_INLINE_SYSCALL(name, 0); \ +} + +#define _syscall1(type,name,type1,arg1) \ +type name(type1 arg1) \ +{ \ + return K_INLINE_SYSCALL(name, 1, arg1); \ +} + +#define _syscall2(type,name,type1,arg1,type2,arg2) \ +type name(type1 arg1, type2 arg2) \ +{ \ + return K_INLINE_SYSCALL(name, 2, arg1, arg2); \ +} + +#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ +type name(type1 arg1, type2 arg2, type3 arg3) \ +{ \ + return K_INLINE_SYSCALL(name, 3, arg1, arg2, arg3); \ +} + +#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ +{ \ + return K_INLINE_SYSCALL(name, 4, arg1, arg2, arg3, arg4); \ +} + +/* select takes 5 arguments */ +#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ +type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ +{ \ + return K_INLINE_SYSCALL(name, 5, arg1, arg2, arg3, arg4, arg5); \ +} + +#define __ARCH_WANT_OLD_READDIR +#define __ARCH_WANT_STAT64 +#define __ARCH_WANT_SYS_ALARM +#define __ARCH_WANT_SYS_GETHOSTNAME +#define __ARCH_WANT_SYS_PAUSE +#define __ARCH_WANT_SYS_SGETMASK +#define __ARCH_WANT_SYS_SIGNAL +#define __ARCH_WANT_SYS_TIME +#define __ARCH_WANT_COMPAT_SYS_TIME +#define __ARCH_WANT_SYS_UTIME +#define __ARCH_WANT_SYS_WAITPID +#define __ARCH_WANT_SYS_SOCKETCALL +#define __ARCH_WANT_SYS_FADVISE64 +#define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK +#define __ARCH_WANT_SYS_NICE +#define __ARCH_WANT_SYS_OLD_GETRLIMIT +#define __ARCH_WANT_SYS_OLDUMOUNT +#define __ARCH_WANT_SYS_SIGPENDING +#define __ARCH_WANT_SYS_SIGPROCMASK +#define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND +#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND + +#endif /* __ASSEMBLY__ */ + +#undef STR + +/* + * "Conditional" syscalls + * + * What we want is __attribute__((weak,alias("sys_ni_syscall"))), + * but it doesn't work on all toolchains, so we just do it by hand + */ +#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") + +#endif /* __KERNEL__ */ +#endif /* _ASM_PARISC_UNISTD_H_ */ diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h new file mode 100644 index 00000000000..2f7e6e50a15 --- /dev/null +++ b/arch/parisc/include/asm/unwind.h @@ -0,0 +1,77 @@ +#ifndef _UNWIND_H_ +#define _UNWIND_H_ + +#include + +/* From ABI specifications */ +struct unwind_table_entry { + unsigned int region_start; + unsigned int region_end; + unsigned int Cannot_unwind:1; /* 0 */ + unsigned int Millicode:1; /* 1 */ + unsigned int Millicode_save_sr0:1; /* 2 */ + unsigned int Region_description:2; /* 3..4 */ + unsigned int reserved1:1; /* 5 */ + unsigned int Entry_SR:1; /* 6 */ + unsigned int Entry_FR:4; /* number saved *//* 7..10 */ + unsigned int Entry_GR:5; /* number saved *//* 11..15 */ + unsigned int Args_stored:1; /* 16 */ + unsigned int Variable_Frame:1; /* 17 */ + unsigned int Separate_Package_Body:1; /* 18 */ + unsigned int Frame_Extension_Millicode:1; /* 19 */ + unsigned int Stack_Overflow_Check:1; /* 20 */ + unsigned int Two_Instruction_SP_Increment:1; /* 21 */ + unsigned int Ada_Region:1; /* 22 */ + unsigned int cxx_info:1; /* 23 */ + unsigned int cxx_try_catch:1; /* 24 */ + unsigned int sched_entry_seq:1; /* 25 */ + unsigned int reserved2:1; /* 26 */ + unsigned int Save_SP:1; /* 27 */ + unsigned int Save_RP:1; /* 28 */ + unsigned int Save_MRP_in_frame:1; /* 29 */ + unsigned int extn_ptr_defined:1; /* 30 */ + unsigned int Cleanup_defined:1; /* 31 */ + + unsigned int MPE_XL_interrupt_marker:1; /* 0 */ + unsigned int HP_UX_interrupt_marker:1; /* 1 */ + unsigned int Large_frame:1; /* 2 */ + unsigned int Pseudo_SP_Set:1; /* 3 */ + unsigned int reserved4:1; /* 4 */ + unsigned int Total_frame_size:27; /* 5..31 */ +}; + +struct unwind_table { + struct list_head list; + const char *name; + unsigned long gp; + unsigned long base_addr; + unsigned long start; + unsigned long end; + const struct unwind_table_entry *table; + unsigned long length; +}; + +struct unwind_frame_info { + struct task_struct *t; + /* Eventually we would like to be able to get at any of the registers + available; but for now we only try to get the sp and ip for each + frame */ + /* struct pt_regs regs; */ + unsigned long sp, ip, rp, r31; + unsigned long prev_sp, prev_ip; +}; + +struct unwind_table * +unwind_table_add(const char *name, unsigned long base_addr, + unsigned long gp, void *start, void *end); +void +unwind_table_remove(struct unwind_table *table); + +void unwind_frame_init(struct unwind_frame_info *info, struct task_struct *t, + struct pt_regs *regs); +void unwind_frame_init_from_blocked_task(struct unwind_frame_info *info, struct task_struct *t); +void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *regs); +int unwind_once(struct unwind_frame_info *info); +int unwind_to_user(struct unwind_frame_info *info); + +#endif diff --git a/arch/parisc/include/asm/user.h b/arch/parisc/include/asm/user.h new file mode 100644 index 00000000000..80224753e50 --- /dev/null +++ b/arch/parisc/include/asm/user.h @@ -0,0 +1,5 @@ +/* This file should not exist, but lots of generic code still includes + it. It's a hangover from old a.out days and the traditional core + dump format. We are ELF-only, and so are our core dumps. If we + need to support HP/UX core format then we'll do it here + eventually. */ diff --git a/arch/parisc/include/asm/vga.h b/arch/parisc/include/asm/vga.h new file mode 100644 index 00000000000..171399a88ca --- /dev/null +++ b/arch/parisc/include/asm/vga.h @@ -0,0 +1,6 @@ +#ifndef __ASM_PARISC_VGA_H__ +#define __ASM_PARISC_VGA_H__ + +/* nothing */ + +#endif /* __ASM_PARISC_VGA_H__ */ diff --git a/arch/parisc/include/asm/xor.h b/arch/parisc/include/asm/xor.h new file mode 100644 index 00000000000..c82eb12a5b1 --- /dev/null +++ b/arch/parisc/include/asm/xor.h @@ -0,0 +1 @@ +#include -- cgit v1.2.3 From 24b574d052a1996bac42fbd56715ab602092c291 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 29 Jul 2008 00:09:22 -0400 Subject: parisc: add pdc_coproc_cfg_unlocked and set_firmware_width_unlocked These functions are called only when bringing up the monarch cpu, so it is safe to call them without taking the pdc spinlock. In the future, this may become relevant for lockdep, since these functions were taking spinlocks before start_kernel called the lockdep initializers. --- arch/parisc/include/asm/pdc.h | 2 ++ arch/parisc/kernel/firmware.c | 65 +++++++++++++++++++++++++++++-------------- 2 files changed, 46 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index 46b75f9cce5..c584b00c607 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -603,6 +603,7 @@ int pdc_chassis_info(struct pdc_chassis_info *chassis_info, void *led_info, unsi int pdc_chassis_disp(unsigned long disp); int pdc_chassis_warn(unsigned long *warn); int pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info); +int pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info); int pdc_iodc_read(unsigned long *actcnt, unsigned long hpa, unsigned int index, void *iodc_data, unsigned int iodc_data_size); int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info, @@ -641,6 +642,7 @@ int pdc_mem_mem_table(struct pdc_memory_table_raddr *r_addr, #endif void set_firmware_width(void); +void set_firmware_width_unlocked(void); int pdc_do_firm_test_reset(unsigned long ftc_bitmap); int pdc_do_reset(void); int pdc_soft_power_info(unsigned long *power_reg); diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 99a9e505edf..03f26bd75bd 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -150,26 +150,40 @@ static void convert_to_wide(unsigned long *addr) #endif } +#ifdef CONFIG_64BIT +void __init set_firmware_width_unlocked(void) +{ + int ret; + + ret = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, + __pa(pdc_result), 0); + convert_to_wide(pdc_result); + if (pdc_result[0] != NARROW_FIRMWARE) + parisc_narrow_firmware = 0; +} + /** * set_firmware_width - Determine if the firmware is wide or narrow. * - * This function must be called before any pdc_* function that uses the convert_to_wide - * function. + * This function must be called before any pdc_* function that uses the + * convert_to_wide function. */ void __init set_firmware_width(void) { -#ifdef CONFIG_64BIT - int retval; unsigned long flags; + spin_lock_irqsave(&pdc_lock, flags); + set_firmware_width_unlocked(); + spin_unlock_irqrestore(&pdc_lock, flags); +} +#else +void __init set_firmware_width_unlocked(void) { + return; +} - spin_lock_irqsave(&pdc_lock, flags); - retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_CAPABILITIES, __pa(pdc_result), 0); - convert_to_wide(pdc_result); - if(pdc_result[0] != NARROW_FIRMWARE) - parisc_narrow_firmware = 0; - spin_unlock_irqrestore(&pdc_lock, flags); -#endif +void __init set_firmware_width(void) { + return; } +#endif /*CONFIG_64BIT*/ /** * pdc_emergency_unlock - Unlock the linux pdc lock @@ -288,6 +302,20 @@ int pdc_chassis_warn(unsigned long *warn) return retval; } +int __init pdc_coproc_cfg_unlocked(struct pdc_coproc_cfg *pdc_coproc_info) +{ + int ret; + + ret = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result)); + convert_to_wide(pdc_result); + pdc_coproc_info->ccr_functional = pdc_result[0]; + pdc_coproc_info->ccr_present = pdc_result[1]; + pdc_coproc_info->revision = pdc_result[17]; + pdc_coproc_info->model = pdc_result[18]; + + return ret; +} + /** * pdc_coproc_cfg - To identify coprocessors attached to the processor. * @pdc_coproc_info: Return buffer address. @@ -297,19 +325,14 @@ int pdc_chassis_warn(unsigned long *warn) */ int __init pdc_coproc_cfg(struct pdc_coproc_cfg *pdc_coproc_info) { - int retval; + int ret; unsigned long flags; - spin_lock_irqsave(&pdc_lock, flags); - retval = mem_pdc_call(PDC_COPROC, PDC_COPROC_CFG, __pa(pdc_result)); - convert_to_wide(pdc_result); - pdc_coproc_info->ccr_functional = pdc_result[0]; - pdc_coproc_info->ccr_present = pdc_result[1]; - pdc_coproc_info->revision = pdc_result[17]; - pdc_coproc_info->model = pdc_result[18]; - spin_unlock_irqrestore(&pdc_lock, flags); + spin_lock_irqsave(&pdc_lock, flags); + ret = pdc_coproc_cfg_unlocked(pdc_coproc_info); + spin_unlock_irqrestore(&pdc_lock, flags); - return retval; + return ret; } /** -- cgit v1.2.3 From 089d55289db5d58d938d73b47a415b2b82ee19ac Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Tue, 29 Jul 2008 00:11:13 -0400 Subject: parisc: hijack jump to start_kernel Bang in our own start_parisc call, which initializes the PDC width, and turns on the FPU. Previously, if CONFIG_PRINTK_TIME was on, we'd attempt to use the FPU before we had enabled it, resulting in a difficult to diagnose panic. This patch causes init_per_cpu to redundantly set these for cpu0, but this is harmless. --- arch/parisc/kernel/head.S | 2 +- arch/parisc/kernel/setup.c | 27 ++++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S index a84e31e8287..0e3d9f9b9e3 100644 --- a/arch/parisc/kernel/head.S +++ b/arch/parisc/kernel/head.S @@ -121,7 +121,7 @@ $pgt_fill_loop: copy %r0,%r2 /* And the RFI Target address too */ - load32 start_kernel,%r11 + load32 start_parisc,%r11 /* And the initial task pointer */ load32 init_thread_union,%r6 diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 39e7c5a5946..a59b71efdbe 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -368,6 +368,31 @@ static int __init parisc_init(void) return 0; } - arch_initcall(parisc_init); +void start_parisc(void) +{ + extern void start_kernel(void); + + int ret, cpunum; + struct pdc_coproc_cfg coproc_cfg; + + cpunum = smp_processor_id(); + + set_firmware_width_unlocked(); + + ret = pdc_coproc_cfg_unlocked(&coproc_cfg); + if (ret >= 0 && coproc_cfg.ccr_functional) { + mtctl(coproc_cfg.ccr_functional, 10); + + cpu_data[cpunum].fp_rev = coproc_cfg.revision; + cpu_data[cpunum].fp_model = coproc_cfg.model; + + asm volatile ("fstd %fr0,8(%sp)"); + } else { + panic("must have an fpu to boot linux"); + } + + start_kernel(); + // not reached +} -- cgit v1.2.3 From 0be7d1fe4361bb9f2ebbd6fa394687cbe4bea950 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Sat, 9 Aug 2008 14:38:18 -0400 Subject: parisc: add new syscalls Signed-off-by: Kyle McMartin --- arch/parisc/include/asm/unistd.h | 10 ++++++++-- arch/parisc/kernel/syscall_table.S | 6 ++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index a7d857f0e4f..ef26b009dc5 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -801,8 +801,14 @@ #define __NR_timerfd_create (__NR_Linux + 306) #define __NR_timerfd_settime (__NR_Linux + 307) #define __NR_timerfd_gettime (__NR_Linux + 308) - -#define __NR_Linux_syscalls (__NR_timerfd_gettime + 1) +#define __NR_signalfd4 (__NR_Linux + 309) +#define __NR_eventfd2 (__NR_Linux + 310) +#define __NR_epoll_create1 (__NR_Linux + 311) +#define __NR_dup3 (__NR_Linux + 312) +#define __NR_pipe2 (__NR_Linux + 313) +#define __NR_inotify_init1 (__NR_Linux + 314) + +#define __NR_Linux_syscalls (__NR_inotify_init1 + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 6b5ac38f5a9..6084667eacf 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -407,6 +407,12 @@ ENTRY_SAME(timerfd_create) ENTRY_COMP(timerfd_settime) ENTRY_COMP(timerfd_gettime) + ENTRY_COMP(signalfd4) + ENTRY_SAME(eventfd2) /* 310 */ + ENTRY_SAME(epoll_create1) + ENTRY_SAME(dup3) + ENTRY_SAME(pipe2) + ENTRY_SAME(inotify_init1) /* Nothing yet */ -- cgit v1.2.3 From f0514ae323f19ba1ad4bea4174ea274c812f7eee Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 11 Sep 2008 10:17:23 -0400 Subject: parisc: initialize unwinder much earlier The unwinder was being initialized way too late to be any use debugging early boot crashes. Instead of relying on module_init initcalls to initialize it, let's do it explicitly as early as we can. Signed-off-by: James Bottomley Signed-off-by: Kyle McMartin --- arch/parisc/include/asm/unwind.h | 2 ++ arch/parisc/kernel/setup.c | 2 ++ arch/parisc/kernel/unwind.c | 4 +--- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/unwind.h b/arch/parisc/include/asm/unwind.h index 2f7e6e50a15..52482e4fc20 100644 --- a/arch/parisc/include/asm/unwind.h +++ b/arch/parisc/include/asm/unwind.h @@ -74,4 +74,6 @@ void unwind_frame_init_running(struct unwind_frame_info *info, struct pt_regs *r int unwind_once(struct unwind_frame_info *info); int unwind_to_user(struct unwind_frame_info *info); +int unwind_init(void); + #endif diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index a59b71efdbe..7d27853ff8c 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -44,6 +44,7 @@ #include #include #include +#include static char __initdata command_line[COMMAND_LINE_SIZE]; @@ -123,6 +124,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_64BIT extern int parisc_narrow_firmware; #endif + unwind_init(); init_per_cpu(smp_processor_id()); /* Set Modes & Enable FP */ diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 701b2d2d888..6773c582e45 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -170,7 +170,7 @@ void unwind_table_remove(struct unwind_table *table) } /* Called from setup_arch to import the kernel unwind info */ -static int unwind_init(void) +int unwind_init(void) { long start, stop; register unsigned long gp __asm__ ("r27"); @@ -417,5 +417,3 @@ int unwind_to_user(struct unwind_frame_info *info) return ret; } - -module_init(unwind_init); -- cgit v1.2.3 From 9eb1686423756f4dfb0ad8bfb02bb8bf1b89e50a Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 10 Sep 2008 14:24:07 +0000 Subject: parisc: add rtc platform driver Signed-off-by: Kyle McMartin --- arch/parisc/Kconfig | 2 ++ arch/parisc/kernel/time.c | 20 +++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a7d4fd353c2..0a8ac703dbe 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -9,6 +9,8 @@ config PARISC def_bool y select HAVE_IDE select HAVE_OPROFILE + select RTC_CLASS + select RTC_DRV_PARISC help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 24be86bba94..4d09203bc69 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -215,6 +216,24 @@ void __init start_cpu_itimer(void) cpu_data[cpu].it_value = next_tick; } +struct platform_device rtc_parisc_dev = { + .name = "rtc-parisc", + .id = -1, +}; + +static int __init rtc_init(void) +{ + int ret; + + ret = platform_device_register(&rtc_parisc_dev); + if (ret < 0) + printk(KERN_ERR "unable to register rtc device...\n"); + + /* not necessarily an error */ + return 0; +} +module_init(rtc_init); + void __init time_init(void) { static struct pdc_tod tod_data; @@ -245,4 +264,3 @@ void __init time_init(void) xtime.tv_nsec = 0; } } - -- cgit v1.2.3 From 611b1597680dd4a57896bcca1af0484be463c55e Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 21 Jul 2008 18:49:56 +0300 Subject: x86: fix mmiotrace 8-bit register decoding When SIL, DIL, BPL or SPL registers were used in MMIO, the datum was extracted from AH, BH, CH, or DH, which are incorrect. Signed-off-by: Pekka Paalanen Cc: "Vegard Nossum" Cc: "Steven Rostedt" Cc: proski@gnu.org Cc: "Pekka Enberg" Signed-off-by: Ingo Molnar --- arch/x86/mm/pf_in.c | 121 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 37 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index efa1911e20c..df3d5c861cd 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 }; static unsigned int mw64[] = { 0x89, 0x8B }; #endif /* not __i386__ */ -static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, - int *rexr) +struct prefix_bits { + unsigned shorted:1; + unsigned enlarged:1; + unsigned rexr:1; + unsigned rex:1; +}; + +static int skip_prefix(unsigned char *addr, struct prefix_bits *prf) { int i; unsigned char *p = addr; - *shorted = 0; - *enlarged = 0; - *rexr = 0; + prf->shorted = 0; + prf->enlarged = 0; + prf->rexr = 0; + prf->rex = 0; restart: for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { if (*p == prefix_codes[i]) { if (*p == 0x66) - *shorted = 1; + prf->shorted = 1; #ifdef __amd64__ if ((*p & 0xf8) == 0x48) - *enlarged = 1; + prf->enlarged = 1; if ((*p & 0xf4) == 0x44) - *rexr = 1; + prf->rexr = 1; + if ((*p & 0xf0) == 0x40) + prf->rex = 1; #endif p++; goto restart; @@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int shorted, enlarged, rexr; + struct prefix_bits prf; int i; enum reason_type rv = OTHERS; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); CHECK_OP_TYPE(opcode, reg_rop, REG_READ); @@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(rw8); i++) @@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) for (i = 0; i < ARRAY_SIZE(rw32); i++) if (rw32[i] == opcode) - return (shorted ? 2 : (enlarged ? 8 : 4)); + return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); return 0; @@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(mw8); i++) @@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) for (i = 0; i < ARRAY_SIZE(mw32); i++) if (mw32[i] == opcode) - return shorted ? 2 : 4; + return prf.shorted ? 2 : 4; for (i = 0; i < ARRAY_SIZE(mw64); i++) if (mw64[i] == opcode) - return shorted ? 2 : (enlarged ? 8 : 4); + return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); return 0; @@ -238,7 +249,7 @@ enum { #endif }; -static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs) { unsigned char *rv = NULL; @@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs) case arg_DL: rv = (unsigned char *)®s->dx; break; - case arg_AH: - rv = 1 + (unsigned char *)®s->ax; - break; - case arg_BH: - rv = 1 + (unsigned char *)®s->bx; - break; - case arg_CH: - rv = 1 + (unsigned char *)®s->cx; - break; - case arg_DH: - rv = 1 + (unsigned char *)®s->dx; - break; #ifdef __amd64__ case arg_R8: rv = (unsigned char *)®s->r8; @@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs) break; #endif default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); break; } + + if (rv) + return rv; + + if (rex) { + /* + * If REX prefix exists, access low bytes of SI etc. + * instead of AH etc. + */ + switch (no) { + case arg_SI: + rv = (unsigned char *)®s->si; + break; + case arg_DI: + rv = (unsigned char *)®s->di; + break; + case arg_BP: + rv = (unsigned char *)®s->bp; + break; + case arg_SP: + rv = (unsigned char *)®s->sp; + break; + default: + break; + } + } else { + switch (no) { + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; + default: + break; + } + } + + if (!rv) + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + return rv; } @@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) unsigned char mod_rm; int reg; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; unsigned long rv; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(reg_rop); i++) if (reg_rop[i] == opcode) { @@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) do_work: mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); switch (get_ins_reg_width(ins_addr)) { case 1: - return *get_reg_w8(reg, regs); + return *get_reg_w8(reg, prf.rex, regs); case 2: return *(unsigned short *)get_reg_w32(reg, regs); @@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) unsigned char mod_rm; unsigned char mod; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; unsigned long rv; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(imm_wop); i++) if (imm_wop[i] == opcode) { -- cgit v1.2.3 From 8b1fa1d7b22f386747c7b78b918d4c680c16066f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 12:36:02 +0200 Subject: ftrace: mark lapic_wd_event() notrace it can be called in the NMI path: [ 0.645999] calling ftrace_dynamic_init+0x0/0xd6 [ 0.647521] ------------[ cut here ]------------ [ 0.647521] WARNING: at kernel/trace/ftrace.c:348 ftrace_record_ip+0x4e/0x252() [ 0.647521] Modules linked in: [ 0.647521] Pid: 15, comm: kstop1 Not tainted 2.6.27-rc1-tip #22686 [ 0.647521] [ 0.647521] Call Trace: [ 0.647521] [] warn_on_slowpath+0x5d/0x84 [ 0.647521] [] ? lapic_wd_event+0xb/0x5c [ 0.647521] [] ftrace_record_ip+0x4e/0x252 [ 0.647521] [] mcount_call+0x5/0x31 [ 0.647521] [] ? lapic_wd_event+0x10/0x5c [ 0.647521] [] nmi_watchdog_tick+0x19d/0x1ad [ 0.647521] [] default_do_nmi+0x75/0x1e3 [ 0.647521] [] do_nmi+0x5d/0x94 [ 0.647521] [] nmi+0xa2/0xc2 [ 0.647521] [] ? check_bytes_and_report+0x11/0xcc [ 0.647521] <> [] ? mcount_call+0x5/0x31 [ 0.647521] [] check_object+0x61/0x1b0 [ 0.647521] [] __slab_free+0x169/0x2ae [ 0.647521] [] ? __cleanup_sighand+0x25/0x27 [ 0.647521] [] ? __cleanup_sighand+0x25/0x27 [ 0.647521] [] kmem_cache_free+0x85/0xb9 [ 0.647521] [] __cleanup_sighand+0x25/0x27 [ 0.647521] [] release_task+0x256/0x339 [ 0.647521] [] do_exit+0x764/0x7ef [ 0.647521] [] __xchg+0x0/0x38 [ 0.647521] [] ? stop_cpu+0x0/0xb2 [ 0.647521] [] ? stop_cpu+0x0/0xb2 [ 0.647521] [] kthread+0x4e/0x7b [ 0.647521] [] child_rip+0xa/0x11 [ 0.647521] [] ? restore_args+0x0/0x30 [ 0.647521] [] ? native_load_tls+0x14/0x2e [ 0.647521] [] ? kthread+0x0/0x7b [ 0.647521] [] ? child_rip+0x0/0x11 [ 0.647521] [ 0.647521] ---[ end trace 4eaa2a86a8e2da22 ]--- [ 0.672032] initcall ftrace_dynamic_init+0x0/0xd6 returned 0 after 19 msecs also mark it no-kprobes while at it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6bff382094f..9abd48b2267 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -17,6 +17,8 @@ #include #include #include +#include + #include #include @@ -336,7 +338,8 @@ static void single_msr_unreserve(void) release_perfctr_nmi(wd_ops->perfctr); } -static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes +single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { /* start the cycle over again */ write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); @@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz) return 1; } -static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { /* * P6 based Pentium M need to re-unmask @@ -605,7 +608,7 @@ static void p4_unreserve(void) release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); } -static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { unsigned dummy; /* @@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz) return hz; } -int lapic_wd_event(unsigned nmi_hz) +int __kprobes lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; -- cgit v1.2.3 From e4b2b8866121bd06d5f3d9de0f79a04339ffa252 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:11 -0400 Subject: ftrace: enable using mcount recording on x86 Enable the use of the __mcount_loc infrastructure on x86_64 and i386. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fc8351f374f..5a34c5427a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -25,6 +25,7 @@ config X86 select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_KRETPROBES + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) -- cgit v1.2.3 From 0a37605c2261a06d8cafc62dee11374ad676c8c4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:12 -0400 Subject: ftrace: x86 mcount stub x86 now sets up the mcount locations through the build and no longer needs to record the ip when the function is executed. This patch changes the initial mcount to simply return. There's no need to do any other work. If the ftrace start up test fails, the original mcount will be what everything will use, so having this as fast as possible is a good thing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 14 -------------- arch/x86/kernel/entry_64.S | 26 -------------------------- arch/x86/kernel/ftrace.c | 14 ++------------ 3 files changed, 2 insertions(+), 52 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b21fbfaffe3..4e4269c73bb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - subl $MCOUNT_INSN_SIZE, %eax - -.globl mcount_call -mcount_call: - call ftrace_stub - - popl %edx - popl %ecx - popl %eax - ret END(mcount) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1db6ce4314e..09e7145484c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -64,32 +64,6 @@ #ifdef CONFIG_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) - - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - - movq 0x38(%rsp), %rdi - subq $MCOUNT_INSN_SIZE, %rdi - -.globl mcount_call -mcount_call: - call ftrace_stub - - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp - retq END(mcount) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ab115cd15fd..96aadbfedcc 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -112,18 +112,8 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) notrace int ftrace_mcount_set(unsigned long *data) { - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - + /* mcount is initialized as a nop */ + *data = 0; return 0; } -- cgit v1.2.3 From 732f3ca7d4ba3c1be8d051d52302ef441ee7748b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 18:05:05 -0400 Subject: ftrace: use only 5 byte nops for x86 Mathieu Desnoyers revealed a bug in the original code. The nop that is used to relpace the mcount caller can be a two part nop. This runs the risk where a process can be preempted after executing the first nop, but before the second part of the nop. The ftrace code calls kstop_machine to keep multiple CPUs from executing code that is being modified, but it does not protect against a task preempting in the middle of a two part nop. If the above preemption happens and the tracer is enabled, after the kstop_machine runs, all those nops will be calls to the trace function. If the preempted process that was preempted between the two nops is executed again, it will execute half of the call to the trace function, and this might crash the system. This patch instead uses what both the latest Intel and AMD spec suggests. That is the P6_NOP5 sequence of "0x0f 0x1f 0x44 0x00 0x00". Note, some older CPUs and QEMU might fault on this nop, so this nop is executed with fault handling first. If it detects a fault, it will then use the code "0x66 0x66 0x66 0x66 0x90". If that faults, it will then default to a simple "jmp 1f; .byte 0x00 0x00 0x00; 1:". The jmp is not optimal but will do if the first two can not be executed. TODO: Examine the cpuid to determine the nop to use. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 68 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 96aadbfedcc..4151c91254e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -16,8 +16,8 @@ #include #include -#include #include +#include /* Long is fine, even if it is only 4 bytes ;-) */ @@ -119,13 +119,67 @@ notrace int ftrace_mcount_set(unsigned long *data) int __init ftrace_dyn_arch_init(void *data) { - const unsigned char *const *noptable = find_nop_table(); - - /* This is running in kstop_machine */ - - ftrace_mcount_set(data); + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; - ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "jmp ftrace_test_jmp\n" + /* This code needs to stay around */ + ".section .text, \"ax\"\n" + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + ".byte 0x00,0x00,0x00\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "jmp 1f\n" + ".previous\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + ftrace_nop = (unsigned long *)ftrace_test_p6nop; + break; + case 1: + pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + ftrace_nop = (unsigned long *)ftrace_test_nop5; + break; + case 2: + pr_info("ftrace: converting mcount calls to jmp 1f\n"); + ftrace_nop = (unsigned long *)ftrace_test_jmp; + break; + } + + /* The return code is retured via data */ + *(unsigned long *)data = 0; return 0; } -- cgit v1.2.3 From 6f93fc076a464bfe24e8d4c5fea3f6ca5bdb264d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Aug 2008 12:55:07 -0400 Subject: ftrace: x86 use copy to and from user functions The modification of code is performed either by kstop_machine, before SMP starts, or on module code before the module is executed. There is no reason to do the modifications from assembly. The copy to and from user functions are sufficient and produces cleaner and easier to read code. Thanks to Benjamin Herrenschmidt for suggesting the idea. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4151c91254e..082d9964262 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -60,11 +61,7 @@ notrace int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned replaced; - unsigned old = *(unsigned *)old_code; /* 4 bytes */ - unsigned new = *(unsigned *)new_code; /* 4 bytes */ - unsigned char newch = new_code[4]; - int faulted = 0; + unsigned char replaced[MCOUNT_INSN_SIZE]; /* * Note: Due to modules and __init, code can @@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * as well as code changing. * * No real locking needed, this code is run through - * kstop_machine. + * kstop_machine, or before SMP starts. */ - asm volatile ( - "1: lock\n" - " cmpxchg %3, (%2)\n" - " jnz 2f\n" - " movb %b4, 4(%2)\n" - "2:\n" - ".section .fixup, \"ax\"\n" - "3: movl $1, %0\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "c"(newch), - "0"(faulted), "a"(old) - : "memory"); - sync_core(); + if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) + return 1; - if (replaced != old && replaced != new) - faulted = 2; + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return 2; - return faulted; + WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)); + + sync_core(); + + return 0; } notrace int ftrace_update_ftrace_func(ftrace_func_t func) -- cgit v1.2.3 From bbe5c7830c6dbde58726d44ec0337bc8b2d95d37 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 21:54:16 +0300 Subject: x86 mmiotrace: fix a rare memory leak Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 635b50e8558..754bd1eaf4f 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -307,8 +307,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, map.map_id = trace->id; spin_lock_irq(&trace_lock); - if (!is_enabled()) + if (!is_enabled()) { + kfree(trace); goto not_enabled; + } mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); -- cgit v1.2.3 From 9e57fb35d711331a9b1410c5c56ebeb3733428a0 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:00:34 +0300 Subject: x86 mmiotrace: implement mmiotrace_printk() Offer mmiotrace users a function to inject markers from inside the kernel. This depends on the trace_vprintk() patch. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 19 ++++++++++++++++++- arch/x86/mm/testmmiotrace.c | 4 ++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 754bd1eaf4f..5e2e2e72ee8 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -75,7 +75,7 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that mmio_trace_record is allowed. + * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ @@ -379,6 +379,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr) iounmap_trace_core(addr); } +int mmiotrace_printk(const char *fmt, ...) +{ + int ret = 0; + va_list args; + unsigned long flags; + va_start(args, fmt); + + spin_lock_irqsave(&trace_lock, flags); + if (is_enabled()) + ret = mmio_trace_printk(fmt, args); + spin_unlock_irqrestore(&trace_lock, flags); + + va_end(args); + return ret; +} +EXPORT_SYMBOL(mmiotrace_printk); + static void clear_trace_list(void) { struct remap_trace *trace; diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index d877c5b423e..ab50a8d7402 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -3,6 +3,7 @@ */ #include #include +#include #define MODULE_NAME "testmmiotrace" @@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); static void do_write_test(void __iomem *p) { unsigned int i; + mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) iowrite8(i, p + i); for (i = 1024; i < (5 * 1024); i += 2) @@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p) static void do_read_test(void __iomem *p) { unsigned int i; + mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) @@ -39,6 +42,7 @@ static void do_test(void) pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } + mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); iounmap(p); -- cgit v1.2.3 From 4427414170a63331a9cc36b9598502c5cdfe453b Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:03:56 +0300 Subject: mmiotrace: remove left-over marker cruft Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 64 -------------------------------------------------- 1 file changed, 64 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 5e2e2e72ee8..2c4baa88f2c 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -56,13 +56,6 @@ struct remap_trace { static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); -#if 0 /* XXX: no way gather this info anymore */ -/* Access to this is not per-cpu. */ -static DEFINE_PER_CPU(atomic_t, dropped); -#endif - -static struct dentry *marker_file; - static DEFINE_MUTEX(mmiotrace_mutex); static DEFINE_SPINLOCK(trace_lock); static atomic_t mmiotrace_enabled; @@ -97,44 +90,6 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } -#if 0 /* XXX: needs rewrite */ -/* - * Write callback for the debugfs entry: - * Read a marker and write it to the mmio trace log - */ -static ssize_t write_marker(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - char *event = NULL; - struct mm_io_header *headp; - ssize_t len = (count > 65535) ? 65535 : count; - - event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); - if (!event) - return -ENOMEM; - - headp = (struct mm_io_header *)event; - headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); - headp->data_len = len; - - if (copy_from_user(event + sizeof(*headp), buffer, len)) { - kfree(event); - return -EFAULT; - } - - spin_lock_irq(&trace_lock); -#if 0 /* XXX: convert this to use tracing */ - if (is_enabled()) - relay_write(chan, event, sizeof(*headp) + len); - else -#endif - len = -EINVAL; - spin_unlock_irq(&trace_lock); - kfree(event); - return len; -} -#endif - static void print_pte(unsigned long address) { unsigned int level; @@ -481,26 +436,12 @@ static void leave_uniprocessor(void) } #endif -#if 0 /* XXX: out of order */ -static struct file_operations fops_marker = { - .owner = THIS_MODULE, - .write = write_marker -}; -#endif - void enable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (is_enabled()) goto out; -#if 0 /* XXX: tracing does not support text entries */ - marker_file = debugfs_create_file("marker", 0660, dir, NULL, - &fops_marker); - if (!marker_file) - pr_err(NAME "marker file creation failed.\n"); -#endif - if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); enter_uniprocessor(); @@ -525,11 +466,6 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); - if (marker_file) { - debugfs_remove(marker_file); - marker_file = NULL; - } - pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); -- cgit v1.2.3 From 37a52f5ef120b93734bb2461744512b55695f69c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 15:05:46 -0700 Subject: x86: suppress trivial sparse signedness warnings Could just as easily change the three casts to cast to the correct type...this patch changes the type of ftrace_nop instead. Supresses sparse warnings: arch/x86/kernel/ftrace.c:157:14: warning: incorrect type in assignment (different signedness) arch/x86/kernel/ftrace.c:157:14: expected long *static [toplevel] ftrace_nop arch/x86/kernel/ftrace.c:157:14: got unsigned long * arch/x86/kernel/ftrace.c:161:14: warning: incorrect type in assignment (different signedness) arch/x86/kernel/ftrace.c:161:14: expected long *static [toplevel] ftrace_nop arch/x86/kernel/ftrace.c:161:14: got unsigned long * arch/x86/kernel/ftrace.c:165:14: warning: incorrect type in assignment (different signedness) arch/x86/kernel/ftrace.c:165:14: expected long *static [toplevel] ftrace_nop arch/x86/kernel/ftrace.c:165:14: got unsigned long * Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 082d9964262..66d900248fc 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -22,7 +22,7 @@ /* Long is fine, even if it is only 4 bytes ;-) */ -static long *ftrace_nop; +static unsigned long *ftrace_nop; union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; -- cgit v1.2.3 From ac2b86fdef5b44f194eefaa6b7b6aea9423d1bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= Date: Wed, 24 Sep 2008 16:31:56 +0100 Subject: x86/ftrace: use uaccess in atomic context With latest -tip I get this bug: [ 49.439988] in_atomic():0, irqs_disabled():1 [ 49.440118] INFO: lockdep is turned off. [ 49.440118] Pid: 2814, comm: modprobe Tainted: G W 2.6.27-rc7 #4 [ 49.440118] [] __might_sleep+0xe1/0x120 [ 49.440118] [] ftrace_modify_code+0x2a/0xd0 [ 49.440118] [] ? ftrace_test_p6nop+0x0/0xa [ 49.440118] [] __ftrace_update_code+0xfe/0x2f0 [ 49.440118] [] ? ftrace_test_p6nop+0x0/0xa [ 49.440118] [] ftrace_convert_nops+0x50/0x80 [ 49.440118] [] ftrace_init_module+0x16/0x20 [ 49.440118] [] load_module+0x185b/0x1d30 [ 49.440118] [] ? find_get_page+0x0/0xf0 [ 49.440118] [] ? sprintf+0x0/0x30 [ 49.440118] [] ? mutex_lock_interruptible_nested+0x1f2/0x350 [ 49.440118] [] sys_init_module+0x53/0x1b0 [ 49.440118] [] ? do_page_fault+0x0/0x740 [ 49.440118] [] syscall_call+0x7/0xb [ 49.440118] ======================= It is because ftrace_modify_code() calls copy_to_user and copy_from_user. These functions have been inserted after guessing that there couldn't be any race condition but copy_[to/from]_user might sleep and __ftrace_update_code is called with local_irq_saved. These function have been inserted since this commit: d5e92e8978fd2574e415dc2792c5eb592978243d: "ftrace: x86 use copy from user function" Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 66d900248fc..222507e8157 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -71,13 +71,13 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) + if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) return 1; if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) return 2; - WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code, + WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, MCOUNT_INSN_SIZE)); sync_core(); -- cgit v1.2.3 From 5b9261d93e5fa3db4995d5b77b5ed365166e001c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 29 Sep 2008 11:11:47 -0400 Subject: sputrace: use marker_synchronize_unregister() We need a marker_synchronize_unregister() before the end of exit() to make sure every probe callers have exited the non preemptible section and thus are not executing the probe code anymore. Signed-off-by: Mathieu Desnoyers Acked-by: Jeremy Kerr Signed-off-by: Ingo Molnar --- arch/powerpc/platforms/cell/spufs/sputrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c index 92d20e993ed..2ece399f286 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.c +++ b/arch/powerpc/platforms/cell/spufs/sputrace.c @@ -232,6 +232,7 @@ static void __exit sputrace_exit(void) remove_proc_entry("sputrace", NULL); kfree(sputrace_log); + marker_synchronize_unregister(); } module_init(sputrace_init); -- cgit v1.2.3 From 8b27386a9ce9c7f0f8702cff7565a46802ad57d1 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Thu, 9 Oct 2008 22:19:08 -0400 Subject: ftrace: make ftrace_test_p6nop disassembler-friendly Commit 4c3dc21b136f8cb4b72afee16c3ba7e961656c0b in tip introduced the 5-byte NOP ftrace_test_p6nop: jmp . + 5 .byte 0x00, 0x00, 0x00 This is not friendly to disassemblers because an odd number of 0x00s ends in the middle of an instruction boundary. This changes the 0x00s to 1-byte NOPs (0x90). Signed-off-by: Anders Kaseorg Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 222507e8157..d073d981a73 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -132,7 +132,9 @@ int __init ftrace_dyn_arch_init(void *data) ".section .text, \"ax\"\n" "ftrace_test_jmp:" "jmp ftrace_test_p6nop\n" - ".byte 0x00,0x00,0x00\n" /* 2 byte jmp + 3 bytes */ + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ "ftrace_test_p6nop:" P6_NOP5 "jmp 1f\n" @@ -161,7 +163,7 @@ int __init ftrace_dyn_arch_init(void *data) ftrace_nop = (unsigned long *)ftrace_test_nop5; break; case 2: - pr_info("ftrace: converting mcount calls to jmp 1f\n"); + pr_info("ftrace: converting mcount calls to jmp . + 5\n"); ftrace_nop = (unsigned long *)ftrace_test_jmp; break; } -- cgit v1.2.3 From 463baa8a0947f858d6db1c56d87eeaf1176ba7bb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 16 Oct 2008 20:29:07 +1100 Subject: powerpc: fix linux-next build failure Today's linux-next build (powerpc allyesconfig) failed like this: In file included from arch/powerpc/include/asm/mmu-hash64.h:17, from arch/powerpc/include/asm/mmu.h:8, from arch/powerpc/include/asm/pgtable.h:8, from arch/powerpc/mm/slb.c:20: arch/powerpc/include/asm/page.h:76: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'memstart_addr' arch/powerpc/include/asm/page.h:77: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'kernstart_addr' Caused by commit 600715dcdf567c86f8b2c6173fcfb4b873e25a19 ("generic: add phys_addr_t for holding physical addresses") from the tip-core tree. This only fails if CONFIG_RELOCATABLE is set. So include that instead of asm/types.h in asm/page.h for the CONFIG_RELOCATABLE case. Signed-off-by: Stephen Rothwell Cc: ppc-dev Cc: Andrew Morton Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/page.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index e088545cb3f..94fe5138b30 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -10,9 +10,13 @@ * 2 of the License, or (at your option) any later version. */ +#ifndef __ASSEMBLY__ +#include +#else +#include +#endif #include #include -#include /* * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software -- cgit v1.2.3 From fe648be019119722ec0ac54e3a4b2e5bf5168589 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:41 -0700 Subject: x86: add after_bootmem flag for 32bit to prepare to use dyn_array support etc. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8396868e82c..91343c2694b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -66,6 +66,7 @@ static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; static int __initdata after_init_bootmem; +int after_bootmem; static __init void *alloc_low_page(unsigned long *phys) { @@ -988,6 +989,8 @@ void __init mem_init(void) set_highmem_pages_init(); + after_bootmem = 1; + codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; -- cgit v1.2.3 From 1f3fcd4b1adc972d5c6a34cfed98931c46575b49 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:44 -0700 Subject: add per_cpu_dyn_array support allow dyn-array in per_cpu area, allocated dynamically. usage: | /* in .h */ | struct kernel_stat { | struct cpu_usage_stat cpustat; | unsigned int *irqs; | }; | | /* in .c */ | DEFINE_PER_CPU(struct kernel_stat, kstat); | | DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL); after setup_percpu()/per_cpu_alloc_dyn_array(), the dyn_array in per_cpu area is ready to use. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0e67f72d931..13ba7a83808 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size = PERCPU_ENOUGH_ROOM; + ssize_t size, old_size; char *ptr; int cpu; @@ -148,7 +148,8 @@ void __init setup_per_cpu_areas(void) setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ - size = PERCPU_ENOUGH_ROOM; + old_size = PERCPU_ENOUGH_ROOM; + size = old_size + per_cpu_dyn_array_size(); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -176,6 +177,8 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + per_cpu_alloc_dyn_array(cpu, ptr + old_size); + } printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", -- cgit v1.2.3 From 1f8ff037a871690c762d267d8a052529d3102fc9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:45 -0700 Subject: x86: alloc dyn_array all together so could spare some memory with small alignment in bootmem also tighten the alignment checking, and make print out less debug info. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 13ba7a83808..2b7dab699e8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,26 +140,31 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size; + ssize_t size, old_size, da_size; char *ptr; int cpu; + unsigned long align = 1; /* Setup cpu_pda map */ setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; - size = old_size + per_cpu_dyn_array_size(); + da_size = per_cpu_dyn_array_size(&align); + align = max_t(unsigned long, PAGE_SIZE, align); + size = roundup(old_size + da_size, align); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(size); + ptr = __alloc_bootmem(size, align, + __pa(MAX_DMA_ADDRESS)); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = alloc_bootmem_pages(size); + ptr = __alloc_bootmem(size, align, + __pa(MAX_DMA_ADDRESS)); printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); @@ -168,7 +173,8 @@ void __init setup_per_cpu_areas(void) cpu, __pa(ptr)); } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, + __pa(MAX_DMA_ADDRESS)); if (ptr) printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); -- cgit v1.2.3 From 6da55c3e8da88e8a7cb6452160776ad6706798ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:46 -0700 Subject: x86: enable dyn_array support Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/Kconfig | 2 ++ arch/x86/Kconfig | 1 + arch/x86/kernel/vmlinux_32.lds.S | 1 + arch/x86/kernel/vmlinux_64.lds.S | 3 +++ 4 files changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 0267babe5eb..c1f9febb404 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -103,3 +103,5 @@ config HAVE_CLK The calls support software clock gating and thus are a key power management tool on many systems. +config HAVE_DYN_ARRAY + def_bool n diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f65c2744d57..42f98009d75 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -33,6 +33,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_DYN_ARRAY config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc..c36007ab394 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -145,6 +145,7 @@ SECTIONS *(.x86_cpu_dev.init) __x86_cpu_dev_end = .; } + DYN_ARRAY_INIT(8) SECURITY_INIT . = ALIGN(4); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405..30973dbac8c 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -173,6 +173,9 @@ SECTIONS *(.x86_cpu_dev.init) } __x86_cpu_dev_end = .; + + DYN_ARRAY_INIT(8) + SECURITY_INIT . = ALIGN(8); -- cgit v1.2.3 From 85c0f90978bf50596dbd23854648020f1f9b5bfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:47 -0700 Subject: irq: introduce nr_irqs at this point nr_irqs is equal NR_IRQS convert a few easy users from NR_IRQS to dynamic nr_irqs. v2: according to Eric, we need to take care of arch without generic_hardirqs Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/m68k/kernel/ints.c | 2 ++ arch/s390/kernel/irq.c | 2 ++ arch/sparc/kernel/irq.c | 3 +++ 3 files changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index 7e8a0d394e6..74453d15692 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -46,6 +46,8 @@ #include #endif +int nr_irqs = NR_IRQS; + extern u32 auto_irqhandler_fixup[]; extern u32 user_irqhandler_fixup[]; extern u16 user_irqvec_fixup[]; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index e7c5bfb7c75..14eb5496c8a 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -17,6 +17,8 @@ #include #include +int nr_irqs = NR_IRQS; + /* * show_interrupts is needed by /proc/interrupts. */ diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 93e1d1c6529..059598b7e0f 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -55,6 +55,9 @@ #define SMP_NOP2 #define SMP_NOP3 #endif /* SMP */ + +int nr_irqs = NR_IRQS; + unsigned long __raw_local_irq_save(void) { unsigned long retval; -- cgit v1.2.3 From 0799e432acfda879eaeef9622426bfa1434f3786 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:48 -0700 Subject: x86: use nr_irqs also add first_free_entry and pin_map_size, which were NR_IRQS derived constants. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 26 ++++++++++++++------------ arch/x86/kernel/io_apic_64.c | 33 +++++++++++++++++---------------- arch/x86/kernel/irq_32.c | 8 ++++---- arch/x86/kernel/irq_64.c | 8 ++++---- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/kernel/irqinit_64.c | 2 +- 6 files changed, 41 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index e710289f673..d382990244f 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -70,6 +70,7 @@ int timer_through_8259 __initdata; */ int sis_apic_bug = -1; +int first_free_entry = NR_IRQS; /* * # of IRQ routing registers */ @@ -100,6 +101,8 @@ static int disable_timer_pin_1 __initdata; #define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +int pin_map_size = PIN_MAP_SIZE; + /* * This is performance-critical, we want to do it O(1) * @@ -213,7 +216,6 @@ static void ioapic_mask_entry(int apic, int pin) */ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; while (entry->next) @@ -222,7 +224,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) if (entry->pin != -1) { entry->next = first_free_entry; entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) + if (++first_free_entry >= pin_map_size) panic("io_apic.c: whoops"); } entry->apic = apic; @@ -457,7 +459,7 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) int i, j; for_each_online_cpu(i) { - for (j = 0; j < NR_IRQS; j++) { + for (j = 0; j < nr_irqs; j++) { if (!irq_desc[j].action) continue; /* Is it a significant load ? */ @@ -492,7 +494,7 @@ static void do_irq_balance(void) if (!cpu_online(i)) continue; package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < NR_IRQS; j++) { + for (j = 0; j < nr_irqs; j++) { unsigned long value_now, delta; /* Is this an active IRQ or balancing disabled ? */ if (!irq_desc[j].action || irq_balancing_disabled(j)) @@ -587,7 +589,7 @@ tryanotherirq: */ move_this_load = 0; selected_irq = -1; - for (j = 0; j < NR_IRQS; j++) { + for (j = 0; j < nr_irqs; j++) { /* Is this an active IRQ? */ if (!irq_desc[j].action) continue; @@ -664,7 +666,7 @@ static int balanced_irq(void *unused) long time_remaining = balanced_irq_interval; /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { + for (i = 0 ; i < nr_irqs ; i++) { irq_desc[i].pending_mask = cpumask_of_cpu(0); set_pending_irq(i, cpumask_of_cpu(0)); } @@ -712,8 +714,8 @@ static int __init balanced_irq_init(void) physical_balance = 1; for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); + irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); + irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { printk(KERN_ERR "balanced_irq_init: out of memory"); goto failed; @@ -1441,7 +1443,7 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < nr_irqs; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; @@ -1621,7 +1623,7 @@ static void __init enable_IO_APIC(void) int i, apic; unsigned long flags; - for (i = 0; i < PIN_MAP_SIZE; i++) { + for (i = 0; i < pin_map_size; i++) { irq_2_pin[i].pin = -1; irq_2_pin[i].next = 0; } @@ -2005,7 +2007,7 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < NR_IRQS ; irq++) { + for (irq = 0; irq < nr_irqs ; irq++) { if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { /* * Hmm.. We don't have an entry for this, @@ -2449,7 +2451,7 @@ int create_irq(void) irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { + for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; if (irq_vector[new] != 0) diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 02063ae042f..448384c7c1e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -132,6 +132,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); #define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +int pin_map_size = PIN_MAP_SIZE; /* * This is performance-critical, we want to do it O(1) * @@ -224,7 +225,7 @@ static inline void io_apic_sync(unsigned int apic) int pin; \ struct irq_pin_list *entry = irq_2_pin + irq; \ \ - BUG_ON(irq >= NR_IRQS); \ + BUG_ON(irq >= nr_irqs); \ for (;;) { \ unsigned int reg; \ pin = entry->pin; \ @@ -301,7 +302,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) int apic, pin; struct irq_pin_list *entry = irq_2_pin + irq; - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); for (;;) { unsigned int reg; apic = entry->apic; @@ -358,19 +359,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ +int first_free_entry = NR_IRQS; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); while (entry->next) entry = irq_2_pin + entry->next; if (entry->pin != -1) { entry->next = first_free_entry; entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) + if (++first_free_entry >= pin_map_size) panic("io_apic.c: ran out of irq_2_pin entries!"); } entry->apic = apic; @@ -634,7 +635,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) best_guess = irq; } } - BUG_ON(best_guess >= NR_IRQS); + BUG_ON(best_guess >= nr_irqs); return best_guess; } @@ -766,7 +767,7 @@ static int pin_2_irq(int idx, int apic, int pin) irq += nr_ioapic_registers[i++]; irq += pin; } - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); return irq; } @@ -801,7 +802,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) int cpu; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON((unsigned)irq >= nr_irqs); cfg = &irq_cfg[irq]; /* Only try and allocate irqs on cpus that are present */ @@ -875,7 +876,7 @@ static void __clear_irq_vector(int irq) cpumask_t mask; int cpu, vector; - BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON((unsigned)irq >= nr_irqs); cfg = &irq_cfg[irq]; BUG_ON(!cfg->vector); @@ -895,7 +896,7 @@ void __setup_vector_irq(int cpu) int irq, vector; /* Mark the inuse vectors */ - for (irq = 0; irq < NR_IRQS; ++irq) { + for (irq = 0; irq < nr_irqs; ++irq) { if (!cpu_isset(cpu, irq_cfg[irq].domain)) continue; vector = irq_cfg[irq].vector; @@ -1193,7 +1194,7 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < nr_irqs; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; @@ -1366,7 +1367,7 @@ void __init enable_IO_APIC(void) int i, apic; unsigned long flags; - for (i = 0; i < PIN_MAP_SIZE; i++) { + for (i = 0; i < pin_map_size; i++) { irq_2_pin[i].pin = -1; irq_2_pin[i].next = 0; } @@ -1658,7 +1659,7 @@ static void ir_irq_migration(struct work_struct *work) { int irq; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { struct irq_desc *desc = irq_desc + irq; if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -1707,7 +1708,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; - if (irq >= NR_IRQS) + if (irq >= nr_irqs) continue; desc = irq_desc + irq; @@ -1865,7 +1866,7 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < NR_IRQS ; irq++) { + for (irq = 0; irq < nr_irqs ; irq++) { if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { /* * Hmm.. We don't have an entry for this, @@ -2279,7 +2280,7 @@ int create_irq(void) irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { + for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; if (irq_cfg[new].vector != 0) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b71e02d42f4..4c7ffb32854 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -226,7 +226,7 @@ unsigned int do_IRQ(struct pt_regs *regs) int overflow, irq = ~regs->orig_ax; struct irq_desc *desc = irq_desc + irq; - if (unlikely((unsigned)irq >= NR_IRQS)) { + if (unlikely((unsigned)irq >= nr_irqs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", __func__, irq); BUG(); @@ -271,7 +271,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < NR_IRQS) { + if (i < nr_irqs) { unsigned any_count = 0; spin_lock_irqsave(&irq_desc[i].lock, flags); @@ -303,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { + } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); @@ -396,7 +396,7 @@ void fixup_irqs(cpumask_t map) unsigned int irq; static int warned; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { cpumask_t mask; if (irq == 2) continue; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f065fe9071b..e1f0839430d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -81,7 +81,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < NR_IRQS) { + if (i < nr_irqs) { unsigned any_count = 0; spin_lock_irqsave(&irq_desc[i].lock, flags); @@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { + } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); @@ -201,7 +201,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(irq < NR_IRQS)) + if (likely(irq < nr_irqs)) generic_handle_irq(irq); else { if (!disable_apic) @@ -224,7 +224,7 @@ void fixup_irqs(cpumask_t map) unsigned int irq; static int warned; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { cpumask_t mask; int break_affinity = 0; int set_affinity = 1; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 9200a1e2752..65c1c950770 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -100,7 +100,7 @@ void __init native_init_IRQ(void) */ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) + if (i >= nr_irqs) break; /* SYSCALL_VECTOR was reserved in trap_init. */ if (!test_bit(vector, used_vectors)) diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 5b5be9d43c2..165c5d9b0d1 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,7 +142,7 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < nr_irqs; i++) { irq_desc[i].status = IRQ_DISABLED; irq_desc[i].action = NULL; irq_desc[i].depth = 1; -- cgit v1.2.3 From 301e619020dd67bde7e7e64bb9ffb7f30d26c979 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:02 -0700 Subject: x86: use dyn_array in io_apic_xx.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 54 +++++++++++++++++++++++++++++++++++--------- arch/x86/kernel/io_apic_64.c | 28 +++++++++++++++++------ arch/x86/kernel/setup.c | 6 +++++ 3 files changed, 70 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index d382990244f..7f2bcc3dad8 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -70,7 +70,7 @@ int timer_through_8259 __initdata; */ int sis_apic_bug = -1; -int first_free_entry = NR_IRQS; +int first_free_entry; /* * # of IRQ routing registers */ @@ -98,10 +98,7 @@ static int disable_timer_pin_1 __initdata; * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - -int pin_map_size = PIN_MAP_SIZE; +int pin_map_size; /* * This is performance-critical, we want to do it O(1) @@ -112,7 +109,9 @@ int pin_map_size = PIN_MAP_SIZE; static struct irq_pin_list { int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; +} *irq_2_pin; + +DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL); struct io_apic { unsigned int index; @@ -403,9 +402,28 @@ static struct irq_cpu_info { #define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) -static cpumask_t balance_irq_affinity[NR_IRQS] = { - [0 ... NR_IRQS-1] = CPU_MASK_ALL -}; +static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL; + +static cpumask_t *balance_irq_affinity; + + +static void __init irq_affinity_init_work(void *data) +{ + struct dyn_array *da = data; + + int i; + struct balance_irq_affinity *affinity; + + affinity = *da->name; + + for (i = 0; i < *da->nr; i++) + memcpy(&affinity[i], &balance_irq_affinity_init, + sizeof(struct balance_irq_affinity)); + +} + +DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work); + void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) { @@ -1170,14 +1188,28 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; +static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR; +static u8 *irq_vector; + +static void __init irq_vector_init_work(void *data) +{ + struct dyn_array *da = data; + + u8 *irq_vec; + + irq_vec = *da->name; + + irq_vec[0] = irq_vector_init_first; +} + +DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work); static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset; int vector, offset; - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); + BUG_ON((unsigned)irq >= nr_irqs); if (irq_vector[irq] > 0) return irq_vector[irq]; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 448384c7c1e..93a3ffabfe6 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -66,7 +66,7 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { +static struct irq_cfg irq_cfg_legacy[] __initdata = { [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, @@ -85,6 +85,17 @@ static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; +static struct irq_cfg *irq_cfg; + +static void __init init_work(void *data) +{ + struct dyn_array *da = data; + + memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy)); +} + +DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); + static int assign_irq_vector(int irq, cpumask_t mask); int first_system_vector = 0xfe; @@ -129,10 +140,9 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) -int pin_map_size = PIN_MAP_SIZE; +int pin_map_size; + /* * This is performance-critical, we want to do it O(1) * @@ -141,8 +151,12 @@ int pin_map_size = PIN_MAP_SIZE; */ static struct irq_pin_list { - short apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; + short apic, pin; + int next; +} *irq_2_pin; + +DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL); + struct io_apic { unsigned int index; @@ -359,7 +373,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -int first_free_entry = NR_IRQS; +int first_free_entry; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { struct irq_pin_list *entry = irq_2_pin + irq; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2255782e8d4..558ec26b08e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1067,9 +1067,15 @@ void __init setup_arch(char **cmdline_p) #endif prefill_possible_map(); + #ifdef CONFIG_X86_64 + /* need to wait for nr_cpu_ids settle down */ + if (nr_irqs == NR_IRQS) + nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif + pin_map_size = nr_irqs * 2; + first_free_entry = nr_irqs; init_apic_mappings(); ioapic_init_mappings(); -- cgit v1.2.3 From a84488c213a8cfc29200344a6fb6357d48c8ed85 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Aug 2008 20:50:31 -0700 Subject: irq: sparse irqs, fix #3 fix non-APIC UP build: arch/x86/kernel/built-in.o: In function `setup_arch': : undefined reference to `pin_map_size' arch/x86/kernel/built-in.o: In function `setup_arch': : undefined reference to `first_free_entry' Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 558ec26b08e..02e3a669797 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1074,8 +1074,10 @@ void __init setup_arch(char **cmdline_p) nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif +#ifdef CONFIG_X86_IO_APIC pin_map_size = nr_irqs * 2; first_free_entry = nr_irqs; +#endif init_apic_mappings(); ioapic_init_mappings(); -- cgit v1.2.3 From 71f521bbaf375b685aeea20c6b0ed8600cd6edfe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:03 -0700 Subject: x86, irq: get nr_irqs from madt Until now, NR_IRQS was derived from black magic defines that had to be "large enough" to both accomodate NR_CPUS and MAX_NR_IO_APICs. This resulted in a way too large irq_desc[] array on most x86 systems. Especially with larger CPU masks, the size of irq_desc can spiral out of control quickly. So be smarter about it and use precise allocation instead: determine the default maximum possible IRQ number from the ACPI MADT. Use a minimum limit of at least 32 IRQs for broken BIOSes. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index eb875cdc736..3e9d163fd92 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -957,6 +957,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int get_nr_irqs_via_madt(void) +{ + int idx; + int nr = 0; + + for (idx = 0; idx < nr_ioapics; idx++) { + if (mp_ioapic_routing[idx].gsi_end > nr) + nr = mp_ioapic_routing[idx].gsi_end; + } + + nr++; + + /* double it for hotplug and msi and nmi */ + nr <<= 1; + + /* something wrong ? */ + if (nr < 32) + nr = 32; + + return nr; + +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { @@ -1254,9 +1277,12 @@ static int __init acpi_parse_madt_ioapic_entries(void) return count; } + + nr_irqs = get_nr_irqs_via_madt(); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - NR_IRQ_VECTORS); + nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); @@ -1276,7 +1302,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - NR_IRQ_VECTORS); + nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ -- cgit v1.2.3 From 08678b0841267c1d00d771fe01548d86043d065e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:05 -0700 Subject: generic: sparse irqs: use irq_desc() together with dyn_array, instead of irq_desc[] add CONFIG_HAVE_SPARSE_IRQ to for use condensed array. Get rid of irq_desc[] array assumptions. Preallocate 32 irq_desc, and irq_desc() will try to get more. ( No change in functionality is expected anywhere, except the odd build failure where we missed a code site or where a crossing commit itroduces new irq_desc[] usage. ) v2: according to Eric, change get_irq_desc() to irq_desc() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/Kconfig | 4 ++ arch/x86/Kconfig | 1 + arch/x86/kernel/io_apic_32.c | 46 +++++++++++++++++------ arch/x86/kernel/io_apic_64.c | 75 ++++++++++++++++++++++++------------- arch/x86/kernel/irq_32.c | 24 +++++++----- arch/x86/kernel/irq_64.c | 35 +++++++++-------- arch/x86/kernel/irqinit_64.c | 10 +++-- arch/x86/kernel/visws_quirks.c | 30 ++++++++------- arch/x86/mach-voyager/voyager_smp.c | 4 +- 9 files changed, 146 insertions(+), 83 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index c1f9febb404..b3676224626 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -105,3 +105,7 @@ config HAVE_CLK config HAVE_DYN_ARRAY def_bool n + +config HAVE_SPARSE_IRQ + def_bool n + diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 42f98009d75..1004888e9b1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,6 +34,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_DYN_ARRAY + select HAVE_SPARSE_IRQ if X86_64 config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 7f2bcc3dad8..c2160cfdec9 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) @@ -365,7 +366,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } - irq_desc[irq].affinity = cpumask; + desc = irq_to_desc(irq); + desc->affinity = cpumask; spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -475,10 +477,12 @@ static inline void balance_irq(int cpu, int irq) static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; + struct irq_desc *desc; for_each_online_cpu(i) { for (j = 0; j < nr_irqs; j++) { - if (!irq_desc[j].action) + desc = irq_to_desc(j); + if (!desc->action) continue; /* Is it a significant load ? */ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < @@ -505,6 +509,7 @@ static void do_irq_balance(void) unsigned long tmp_cpu_irq; unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; + struct irq_desc *desc; for_each_possible_cpu(i) { int package_index; @@ -515,7 +520,8 @@ static void do_irq_balance(void) for (j = 0; j < nr_irqs; j++) { unsigned long value_now, delta; /* Is this an active IRQ or balancing disabled ? */ - if (!irq_desc[j].action || irq_balancing_disabled(j)) + desc = irq_to_desc(j); + if (!desc->action || irq_balancing_disabled(j)) continue; if (package_index == i) IRQ_DELTA(package_index, j) = 0; @@ -609,7 +615,8 @@ tryanotherirq: selected_irq = -1; for (j = 0; j < nr_irqs; j++) { /* Is this an active IRQ? */ - if (!irq_desc[j].action) + desc = irq_to_desc(j); + if (!desc->action) continue; if (imbalance <= IRQ_DELTA(max_loaded, j)) continue; @@ -682,10 +689,12 @@ static int balanced_irq(void *unused) int i; unsigned long prev_balance_time = jiffies; long time_remaining = balanced_irq_interval; + struct irq_desc *desc; /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < nr_irqs ; i++) { - irq_desc[i].pending_mask = cpumask_of_cpu(0); + desc = irq_to_desc(i); + desc->pending_mask = cpumask_of_cpu(0); set_pending_irq(i, cpumask_of_cpu(0)); } @@ -1254,13 +1263,16 @@ static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { + struct irq_desc *desc; + + desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) { - irq_desc[irq].status |= IRQ_LEVEL; + desc->status |= IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); } else { - irq_desc[irq].status &= ~IRQ_LEVEL; + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); } @@ -2027,6 +2039,7 @@ static struct irq_chip ioapic_chip __read_mostly = { static inline void init_IO_APIC_traps(void) { int irq; + struct irq_desc *desc; /* * NOTE! The local APIC isn't very good at handling @@ -2048,9 +2061,11 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else + else { + desc = irq_to_desc(irq); /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; + desc->chip = &no_irq_chip; + } } } } @@ -2089,7 +2104,10 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq, int vector) { - irq_desc[irq].status &= ~IRQ_LEVEL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); set_intr_gate(vector, interrupt[irq]); @@ -2556,6 +2574,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp; int vector; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2575,7 +2594,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2649,6 +2669,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2659,7 +2680,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(mask); target_ht_irq(irq, dest); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 93a3ffabfe6..cab5a25d81b 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) unsigned long flags; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -361,9 +362,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); + desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); - irq_desc[irq].affinity = mask; + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } #endif @@ -933,14 +935,17 @@ static struct irq_chip ir_ioapic_chip; static void ioapic_register_intr(int irq, unsigned long trigger) { + struct irq_desc *desc; + + desc = irq_to_desc(irq); if (trigger) - irq_desc[irq].status |= IRQ_LEVEL; + desc->status |= IRQ_LEVEL; else - irq_desc[irq].status &= ~IRQ_LEVEL; + desc->status &= ~IRQ_LEVEL; #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { - irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + desc->status |= IRQ_MOVE_PCNTXT; if (trigger) set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, handle_fasteoi_irq, @@ -1596,10 +1601,10 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); static void migrate_ioapic_irq(int irq, cpumask_t mask) { struct irq_cfg *cfg = irq_cfg + irq; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; - int modify_ioapic_rte = desc->status & IRQ_LEVEL; + int modify_ioapic_rte; unsigned int dest; unsigned long flags; @@ -1616,6 +1621,8 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); @@ -1637,12 +1644,13 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) cfg->move_in_progress = 0; } - irq_desc[irq].affinity = mask; + desc->affinity = mask; } static int migrate_irq_remapped_level(int irq) { int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); mask_IO_APIC_irq(irq); @@ -1658,11 +1666,11 @@ static int migrate_irq_remapped_level(int irq) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + migrate_ioapic_irq(irq, desc->pending_mask); ret = 0; - irq_desc[irq].status &= ~IRQ_MOVE_PENDING; - cpus_clear(irq_desc[irq].pending_mask); + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq(irq); @@ -1674,7 +1682,7 @@ static void ir_irq_migration(struct work_struct *work) int irq; for (irq = 0; irq < nr_irqs; irq++) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -1686,8 +1694,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, - irq_desc[irq].pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -1698,9 +1705,11 @@ static void ir_irq_migration(struct work_struct *work) */ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - if (irq_desc[irq].status & IRQ_LEVEL) { - irq_desc[irq].status |= IRQ_MOVE_PENDING; - irq_desc[irq].pending_mask = mask; + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; migrate_irq_remapped_level(irq); return; } @@ -1725,7 +1734,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (irq >= nr_irqs) continue; - desc = irq_desc + irq; + desc = irq_to_desc(irq); cfg = irq_cfg + irq; spin_lock(&desc->lock); if (!cfg->move_cleanup_count) @@ -1791,7 +1800,7 @@ static void ack_apic_level(unsigned int irq) irq_complete_move(irq); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_IO_APIC_irq(irq); } @@ -1868,6 +1877,7 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { static inline void init_IO_APIC_traps(void) { int irq; + struct irq_desc *desc; /* * NOTE! The local APIC isn't very good at handling @@ -1889,9 +1899,11 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else + else { + desc = irq_to_desc(irq); /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; + desc->chip = &no_irq_chip; + } } } } @@ -1926,7 +1938,10 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq) { - irq_desc[irq].status &= ~IRQ_LEVEL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); } @@ -2402,6 +2417,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2421,7 +2437,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #ifdef CONFIG_INTR_REMAP @@ -2435,6 +2452,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2469,7 +2487,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cfg->move_in_progress = 0; } - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif #endif /* CONFIG_SMP */ @@ -2543,7 +2562,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); /* * irq migration in process context */ @@ -2655,6 +2674,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2674,7 +2694,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2731,6 +2752,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) struct irq_cfg *cfg = irq_cfg + irq; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2743,7 +2765,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4c7ffb32854..ede513be517 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -224,7 +224,7 @@ unsigned int do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs; /* high bit used in ret_from_ code */ int overflow, irq = ~regs->orig_ax; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); if (unlikely((unsigned)irq >= nr_irqs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", @@ -273,15 +273,16 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; + struct irq_desc *desc = irq_to_desc(i); - spin_lock_irqsave(&irq_desc[i].lock, flags); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); #else for_each_online_cpu(j) any_count |= kstat_cpu(j).irqs[i]; #endif - action = irq_desc[i].action; + action = desc->action; if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); @@ -291,8 +292,8 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); if (action) { seq_printf(p, " %s", action->name); @@ -302,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -398,17 +399,20 @@ void fixup_irqs(cpumask_t map) for (irq = 0; irq < nr_irqs; irq++) { cpumask_t mask; + struct irq_desc *desc; + if (irq == 2) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + desc = irq_to_desc(irq); + cpus_and(mask, desc->affinity, map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); - else if (irq_desc[irq].action && !(warned++)) + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, mask); + else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index e1f0839430d..738eb65a924 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,15 +83,16 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; + struct irq_desc *desc = irq_to_desc(i); - spin_lock_irqsave(&irq_desc[i].lock, flags); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); #else for_each_online_cpu(j) any_count |= kstat_cpu(j).irqs[i]; #endif - action = irq_desc[i].action; + action = desc->action; if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); @@ -101,8 +102,8 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); if (action) { seq_printf(p, " %s", action->name); @@ -111,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) } seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -228,37 +229,39 @@ void fixup_irqs(cpumask_t map) cpumask_t mask; int break_affinity = 0; int set_affinity = 1; + struct irq_desc *desc; if (irq == 2) continue; + desc = irq_to_desc(irq); /* interrupt's are disabled at this point */ - spin_lock(&irq_desc[irq].lock); + spin_lock(&desc->lock); if (!irq_has_action(irq) || - cpus_equal(irq_desc[irq].affinity, map)) { - spin_unlock(&irq_desc[irq].lock); + cpus_equal(desc->affinity, map)) { + spin_unlock(&desc->lock); continue; } - cpus_and(mask, irq_desc[irq].affinity, map); + cpus_and(mask, desc->affinity, map); if (cpus_empty(mask)) { break_affinity = 1; mask = map; } - if (irq_desc[irq].chip->mask) - irq_desc[irq].chip->mask(irq); + if (desc->chip->mask) + desc->chip->mask(irq); - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, mask); else if (!(warned++)) set_affinity = 0; - if (irq_desc[irq].chip->unmask) - irq_desc[irq].chip->unmask(irq); + if (desc->chip->unmask) + desc->chip->unmask(irq); - spin_unlock(&irq_desc[irq].lock); + spin_unlock(&desc->lock); if (break_affinity && set_affinity) printk("Broke affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 165c5d9b0d1..0744b49b4d1 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -143,9 +143,11 @@ void __init init_ISA_irqs(void) init_8259A(0); for (i = 0; i < nr_irqs; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; if (i < 16) { /* @@ -157,7 +159,7 @@ void __init init_ISA_irqs(void) /* * 'high' PCI IRQs filled in on demand */ - irq_desc[i].chip = &no_irq_chip; + desc->chip = &no_irq_chip; } } } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 61a97e616f7..9d85ab38443 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq) static unsigned int startup_cobalt_irq(unsigned int irq) { unsigned long flags; + struct irq_desc *desc = irq_to_desc(irq); spin_lock_irqsave(&cobalt_lock, flags); - if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) - irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); + if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) + desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); enable_cobalt_irq(irq); spin_unlock_irqrestore(&cobalt_lock, flags); return 0; @@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq) static void end_cobalt_irq(unsigned int irq) { unsigned long flags; + struct irq_desc *desc = irq_to_desc(irq); spin_lock_irqsave(&cobalt_lock, flags); - if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) + if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS))) enable_cobalt_irq(irq); spin_unlock_irqrestore(&cobalt_lock, flags); } @@ -626,7 +628,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) spin_unlock_irqrestore(&i8259A_lock, flags); - desc = irq_desc + realirq; + desc = irq_to_desc(realirq); /* * handle this 'virtual interrupt' as a Cobalt one now. @@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void) int i; for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = 0; - irq_desc[i].depth = 1; + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = 0; + desc->depth = 1; if (i == 0) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_IDE0) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_IDE1) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_8259) { - irq_desc[i].chip = &piix4_master_irq_type; + desc->chip = &piix4_master_irq_type; } else if (i < CO_IRQ_APIC0) { - irq_desc[i].chip = &piix4_virtual_irq_type; + desc->chip = &piix4_virtual_irq_type; } else if (IS_CO_APIC(i)) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 199a5f4a873..0f6e8a6523a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq) * the interrupt off to another CPU */ static void before_handle_vic_irq(unsigned int irq) { - irq_desc_t *desc = irq_desc + irq; + irq_desc_t *desc = irq_to_desc(irq); __u8 cpu = smp_processor_id(); _raw_spin_lock(&vic_irq_lock); @@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq) /* Finish the VIC interrupt: basically mask */ static void after_handle_vic_irq(unsigned int irq) { - irq_desc_t *desc = irq_desc + irq; + irq_desc_t *desc = irq_to_desc(irq); _raw_spin_lock(&vic_irq_lock); { -- cgit v1.2.3 From 3ac2de48ed3c998df7f366e039c97eedb27e7c3d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:06 -0700 Subject: x86: add irq_cfg in io_apic_64.c preallocate size is 32, and if it is not enough, irq_cfg will more via alloc_bootmem() or kzalloc(). (depending on how early we are in system setup) v2: fix typo about size of init_one_irq_cfg ... should use sizeof(struct irq_cfg) v3: according to Eric, change get_irq_cfg() to irq_cfg() v4: squash add irq_cfg_alloc in Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 209 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 169 insertions(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index cab5a25d81b..858c37a31a2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -57,7 +57,11 @@ #define __apicdebuginit(type) static type __init +struct irq_cfg; + struct irq_cfg { + unsigned int irq; + struct irq_cfg *next; cpumask_t domain; cpumask_t old_domain; unsigned move_cleanup_count; @@ -67,34 +71,132 @@ struct irq_cfg { /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -static struct irq_cfg *irq_cfg; +static struct irq_cfg irq_cfg_init = { .irq = -1U, }; +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; +} + +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +static void init_one_irq_cfg(struct irq_cfg *cfg) +{ + memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); +} static void __init init_work(void *data) { struct dyn_array *da = data; + struct irq_cfg *cfg; + int i; - memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + cfg = *da->name; + + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + + i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (; i < *da->nr; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < *da->nr; i++) + cfg[i-1].next = &cfg[i]; } -DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); +static struct irq_cfg *irq_cfgx; +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + struct irq_cfg *cfg; + + BUG_ON(irq == -1U); + + cfg = &irq_cfgx[0]; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + if (cfg->irq == -1U) + return NULL; + + cfg = cfg->next; + } + + return NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + struct irq_cfg *cfg, *cfg_pri; + int i; + int count = 0; + + BUG_ON(irq == -1U); + + cfg_pri = cfg = &irq_cfgx[0]; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + if (cfg->irq == -1U) { + cfg->irq = irq; + return cfg; + } + cfg_pri = cfg; + cfg = cfg->next; + count++; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + + if (after_bootmem) + cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0); + + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); + + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + cfg->irq = irq; + cfg_pri->next = cfg; + + return cfg; +} static int assign_irq_vector(int irq, cpumask_t mask); @@ -341,7 +443,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; unsigned int dest; cpumask_t tmp; @@ -381,6 +483,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) struct irq_pin_list *entry = irq_2_pin + irq; BUG_ON(irq >= nr_irqs); + irq_cfg_alloc(irq); + while (entry->next) entry = irq_2_pin + entry->next; @@ -819,7 +923,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) struct irq_cfg *cfg; BUG_ON((unsigned)irq >= nr_irqs); - cfg = &irq_cfg[irq]; + cfg = irq_cfg(irq); /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); @@ -893,7 +997,7 @@ static void __clear_irq_vector(int irq) int cpu, vector; BUG_ON((unsigned)irq >= nr_irqs); - cfg = &irq_cfg[irq]; + cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@ -913,17 +1017,23 @@ void __setup_vector_irq(int cpu) /* Mark the inuse vectors */ for (irq = 0; irq < nr_irqs; ++irq) { - if (!cpu_isset(cpu, irq_cfg[irq].domain)) + struct irq_cfg *cfg = irq_cfg(irq); + + if (!cpu_isset(cpu, cfg->domain)) continue; - vector = irq_cfg[irq].vector; + vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ for (vector = 0; vector < NR_VECTORS; ++vector) { + struct irq_cfg *cfg; + irq = per_cpu(vector_irq, cpu)[vector]; if (irq < 0) continue; - if (!cpu_isset(cpu, irq_cfg[irq].domain)) + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1029,13 +1139,15 @@ static int setup_ioapic_entry(int apic, int irq, static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, int trigger, int polarity) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct IO_APIC_route_entry entry; cpumask_t mask; if (!IO_APIC_IRQ(irq)) return; + cfg = irq_cfg(irq); + mask = TARGET_CPUS; if (assign_irq_vector(irq, mask)) return; @@ -1553,7 +1665,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) static int ioapic_retrigger_irq(unsigned int irq) { - struct irq_cfg *cfg = &irq_cfg[irq]; + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; spin_lock_irqsave(&vector_lock, flags); @@ -1600,7 +1712,7 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); */ static void migrate_ioapic_irq(int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; @@ -1618,6 +1730,7 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -1735,7 +1848,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; desc = irq_to_desc(irq); - cfg = irq_cfg + irq; + cfg = irq_cfg(irq); spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -1754,7 +1867,7 @@ unlock: static void irq_complete_move(unsigned int irq) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg = irq_cfg(irq); unsigned vector, me; if (likely(!cfg->move_in_progress)) @@ -1891,7 +2004,10 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < nr_irqs ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { + struct irq_cfg *cfg; + + cfg = irq_cfg(irq); + if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2028,7 +2144,7 @@ static inline void __init unlock_ExtINT_logic(void) */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg + 0; + struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2306,14 +2422,19 @@ int create_irq(void) int irq; int new; unsigned long flags; + struct irq_cfg *cfg_new; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_cfg[new].vector != 0) + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) continue; + /* check if need to create one */ + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; @@ -2346,7 +2467,7 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; int err; unsigned dest; cpumask_t tmp; @@ -2356,6 +2477,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms if (err) return err; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -2413,7 +2535,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -2426,6 +2548,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2448,7 +2571,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) */ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; @@ -2464,6 +2587,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2670,7 +2794,7 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -2683,6 +2807,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2749,7 +2874,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; struct irq_desc *desc; @@ -2761,6 +2886,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2783,7 +2909,7 @@ static struct irq_chip ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; int err; cpumask_t tmp; @@ -2793,6 +2919,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct ht_irq_msg msg; unsigned dest; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -2891,6 +3018,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; if (skip_ioapic_setup == 1) return; @@ -2906,7 +3034,8 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - if (!irq_cfg[irq].vector) + cfg = irq_cfg(irq); + if (!cfg->vector) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); -- cgit v1.2.3 From e5a53714acfc7b5f868d07d27c5f02cb00b118db Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:07 -0700 Subject: x86: put irq_2_pin pointer into irq_cfg preallocate 32 irq_2_pin entries, and use get_one_free_irq_2_pin() to get one more and link to irq_cfg if needed. so don't waste one where no irq is enabled. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 161 ++++++++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 858c37a31a2..51ef7eb75f2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -58,10 +58,11 @@ #define __apicdebuginit(type) static type __init struct irq_cfg; - +struct irq_pin_list; struct irq_cfg { unsigned int irq; struct irq_cfg *next; + struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; unsigned move_cleanup_count; @@ -252,13 +253,66 @@ int pin_map_size; * between pins and IRQs. */ -static struct irq_pin_list { - short apic, pin; - int next; -} *irq_2_pin; +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *irq_2_pin_head; +/* fill one page ? */ +static int nr_irq_2_pin = 0x100; +static struct irq_pin_list *irq_2_pin_ptr; +static void __init irq_2_pin_init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_pin_list *pin; + int i; + + pin = *da->name; + + for (i = 1; i < *da->nr; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} +DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin; + int i; + + pin = irq_2_pin_ptr; + + if (pin) { + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); + + if (after_bootmem) + pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, + GFP_ATOMIC); + else + pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * + nr_irq_2_pin, PAGE_SIZE, 0); + + if (!pin) + panic("can not get more irq_2_pin\n"); -DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL); + for (i = 1; i < nr_irq_2_pin; i++) + pin[i-1].next = &pin[i]; + irq_2_pin_ptr = pin->next; + pin->next = NULL; + + return pin; +} struct io_apic { unsigned int index; @@ -300,16 +354,17 @@ static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); - entry = irq_2_pin + irq; + entry = cfg->irq_2_pin; for (;;) { unsigned int reg; int pin; - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { @@ -318,7 +373,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) } if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -339,21 +394,24 @@ static inline void io_apic_sync(unsigned int apic) \ { \ int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ \ BUG_ON(irq >= nr_irqs); \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ for (;;) { \ unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ + if (!entry) \ break; \ + pin = entry->pin; \ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ FINAL; \ if (!entry->next) \ break; \ - entry = irq_2_pin + entry->next; \ + entry = entry->next; \ } \ } @@ -416,15 +474,20 @@ static void ioapic_mask_entry(int apic, int pin) static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) { int apic, pin; - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; BUG_ON(irq >= nr_irqs); + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; for (;;) { unsigned int reg; + + if (!entry) + break; + apic = entry->apic; pin = entry->pin; - if (pin == -1) - break; /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. @@ -437,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) io_apic_modify(apic, reg); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } } @@ -480,22 +543,35 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) int first_free_entry; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; BUG_ON(irq >= nr_irqs); - irq_cfg_alloc(irq); + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); + return; + } - while (entry->next) - entry = irq_2_pin + entry->next; + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= pin_map_size) - panic("io_apic.c: ran out of irq_2_pin entries!"); + entry = entry->next; } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; entry->apic = apic; entry->pin = pin; + printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); } /* @@ -505,17 +581,24 @@ static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; - while (1) { + while (entry) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - } - if (!entry->next) + replaced = 1; + /* every one is different, right? */ break; - entry = irq_2_pin + entry->next; + } + entry = entry->next; } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); } @@ -1326,15 +1409,16 @@ __apicdebuginit(void) print_IO_APIC(void) } printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < nr_irqs; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) + struct irq_cfg *cfg = irq_cfg(i); + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } printk("\n"); } @@ -1495,14 +1579,9 @@ void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; - int i, apic; + int apic; unsigned long flags; - for (i = 0; i < pin_map_size; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } - /* * The number of IO-APIC IRQ registers (== #pins): */ -- cgit v1.2.3 From 7f95ec9e4c12fd067febfd57532da1166d75d858 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:09 -0700 Subject: x86: move kstat_irqs from kstat to irq_desc based on Eric's patch ... together mold it with dyn_array for irq_desc, will allcate kstat_irqs for nr_irq_desc alltogether if needed. -- at that point nr_cpus is known already. v2: make sure system without generic_hardirqs works they don't have irq_desc v3: fix merging v4: [mingo@elte.hu] fix typo [ mingo@elte.hu ] irq: build fix fix: arch/x86/xen/spinlock.c: In function 'xen_spin_lock_slow': arch/x86/xen/spinlock.c:90: error: 'struct kernel_stat' has no member named 'irqs' Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 2 +- arch/x86/kernel/irq_32.c | 4 ++-- arch/x86/kernel/irq_64.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/xen/spinlock.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c2160cfdec9..204884b1415 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -526,7 +526,7 @@ static void do_irq_balance(void) if (package_index == i) IRQ_DELTA(package_index, j) = 0; /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_cpu(i).irqs[j]; + value_now = (unsigned long) kstat_irqs_cpu(j, i); /* Determine the activity per processor per IRQ */ delta = value_now - LAST_CPU_IRQ(i, j); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index ede513be517..576c5df6cad 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -280,7 +280,7 @@ int show_interrupts(struct seq_file *p, void *v) any_count = kstat_irqs(i); #else for_each_online_cpu(j) - any_count |= kstat_cpu(j).irqs[i]; + any_count |= kstat_irqs_cpu(i, j); #endif action = desc->action; if (!action && !any_count) @@ -290,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %8s", desc->chip->name); seq_printf(p, "-%-8s", desc->name); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 738eb65a924..4a0a4eb44dc 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v) any_count = kstat_irqs(i); #else for_each_online_cpu(j) - any_count |= kstat_cpu(j).irqs[i]; + any_count |= kstat_irqs_cpu(i, j); #endif action = desc->action; if (!action && !any_count) @@ -100,7 +100,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %8s", desc->chip->name); seq_printf(p, "-%-8s", desc->name); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 9d85ab38443..817aa55a120 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) /* * handle this 'virtual interrupt' as a Cobalt one now. */ - kstat_cpu(smp_processor_id()).irqs[realirq]++; + kstat_irqs_this_cpu(desc)++; if (likely(desc->action != NULL)) handle_IRQ_event(realirq, desc->action); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index dd71e3a021c..bb6bc721b13 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ - kstat_this_cpu.irqs[irq]++; + kstat_irqs_this_cpu(irq_to_desc(irq))++; out: raw_local_irq_restore(flags); -- cgit v1.2.3 From 2c6927a38f65b53b62f86158fba29a068c4e8b6a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:11 -0700 Subject: irq: replace loop with nr_irqs with for_each_irq_desc There are a handful of loops that go from 0 to nr_irqs and use get_irq_desc() on them. These would allocate all the irq_desc entries, regardless of the need for them. Use the smarter for_each_irq_desc() iterator that will only iterate over the present ones. v2: make sure arch without GENERIC_HARDIRQS work too Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 6 +++--- arch/x86/kernel/irq_64.c | 5 ++--- arch/x86/kernel/irqinit_64.c | 17 +++++------------ 3 files changed, 10 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 51ef7eb75f2..708be9724da 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1871,10 +1871,10 @@ unmask: static void ir_irq_migration(struct work_struct *work) { - int irq; + unsigned int irq; + struct irq_desc *desc; - for (irq = 0; irq < nr_irqs; irq++) { - struct irq_desc *desc = irq_to_desc(irq); + for_each_irq_desc(irq, desc) { if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a0a4eb44dc..b3cf55e325f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -224,17 +224,16 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; + struct irq_desc *desc; - for (irq = 0; irq < nr_irqs; irq++) { + for_each_irq_desc(irq, desc) { cpumask_t mask; int break_affinity = 0; int set_affinity = 1; - struct irq_desc *desc; if (irq == 2) continue; - desc = irq_to_desc(irq); /* interrupt's are disabled at this point */ spin_lock(&desc->lock); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 0744b49b4d1..cd9f42d028d 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,25 +142,18 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < nr_irqs; i++) { + for (i = 0; i < 16; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; desc->action = NULL; desc->depth = 1; - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &i8259A_chip, handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - desc->chip = &no_irq_chip; - } } } -- cgit v1.2.3 From 46b8214d12c274bd4265aae482ab7ffe69d94818 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:13 -0700 Subject: x86, ioapic: replace loop with nr_irqs with for_each_irq_icfg Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 708be9724da..60d60061659 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -129,6 +129,9 @@ static void __init init_work(void *data) cfg[i-1].next = &cfg[i]; } +#define for_each_irq_cfg(cfg) \ + for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next) + static struct irq_cfg *irq_cfgx; DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); @@ -1097,20 +1100,18 @@ void __setup_vector_irq(int cpu) /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ int irq, vector; + struct irq_cfg *cfg; /* Mark the inuse vectors */ - for (irq = 0; irq < nr_irqs; ++irq) { - struct irq_cfg *cfg = irq_cfg(irq); - + for_each_irq_cfg(cfg) { if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; + irq = cfg->irq; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ for (vector = 0; vector < NR_VECTORS; ++vector) { - struct irq_cfg *cfg; - irq = per_cpu(vector_irq, cpu)[vector]; if (irq < 0) continue; @@ -1340,6 +1341,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; unsigned long flags; + struct irq_cfg *cfg; if (apic_verbosity == APIC_QUIET) return; @@ -1408,12 +1410,11 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < nr_irqs; i++) { - struct irq_cfg *cfg = irq_cfg(i); + for_each_irq_cfg(cfg) { struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", cfg->irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -2070,6 +2071,7 @@ static inline void init_IO_APIC_traps(void) { int irq; struct irq_desc *desc; + struct irq_cfg *cfg; /* * NOTE! The local APIC isn't very good at handling @@ -2082,10 +2084,8 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < nr_irqs ; irq++) { - struct irq_cfg *cfg; - - cfg = irq_cfg(irq); + for_each_irq_cfg(cfg) { + irq = cfg->irq; if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, -- cgit v1.2.3 From 7d94f7ca401dd7f445fda9a971a48aa5427b3e55 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:14 -0700 Subject: irq: remove >= nr_irqs checking with config_have_sparse_irq remove irq limit checks - nr_irqs is dynamic and we expand anytime. v2: fix checking about result irq_cfg_without_new, so could use msi again v3: use irq_desc_without_new to check irq is valid Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 9 --------- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 60d60061659..1b8cccb5ba2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -400,7 +400,6 @@ static inline void io_apic_sync(unsigned int apic) struct irq_cfg *cfg; \ struct irq_pin_list *entry; \ \ - BUG_ON(irq >= nr_irqs); \ cfg = irq_cfg(irq); \ entry = cfg->irq_2_pin; \ for (;;) { \ @@ -480,7 +479,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) struct irq_cfg *cfg; struct irq_pin_list *entry; - BUG_ON(irq >= nr_irqs); cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { @@ -549,7 +547,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) struct irq_cfg *cfg; struct irq_pin_list *entry; - BUG_ON(irq >= nr_irqs); /* first time to refer irq_cfg, so with new */ cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; @@ -841,7 +838,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) best_guess = irq; } } - BUG_ON(best_guess >= nr_irqs); return best_guess; } @@ -973,7 +969,6 @@ static int pin_2_irq(int idx, int apic, int pin) irq += nr_ioapic_registers[i++]; irq += pin; } - BUG_ON(irq >= nr_irqs); return irq; } @@ -1008,7 +1003,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask) int cpu; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= nr_irqs); cfg = irq_cfg(irq); /* Only try and allocate irqs on cpus that are present */ @@ -1082,7 +1076,6 @@ static void __clear_irq_vector(int irq) cpumask_t mask; int cpu, vector; - BUG_ON((unsigned)irq >= nr_irqs); cfg = irq_cfg(irq); BUG_ON(!cfg->vector); @@ -1924,8 +1917,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; - if (irq >= nr_irqs) - continue; desc = irq_to_desc(irq); cfg = irq_cfg(irq); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index b3cf55e325f..a3e36336d91 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -202,7 +202,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(irq < nr_irqs)) + if (likely(__irq_to_desc(irq))) generic_handle_irq(irq); else { if (!disable_apic) -- cgit v1.2.3 From 46926b67fc663d357a1a8174328998a9e49da0b8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:15 -0700 Subject: generic: add irq_desc in function in parameter So we could remove some duplicated calling to irq_desc v2: make sure irq_desc in init/main.c is not used without generic_hardirqs Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a3e36336d91..f58b995b30e 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -189,6 +189,7 @@ u64 arch_irq_stat(void) asmlinkage unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; @@ -202,8 +203,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(__irq_to_desc(irq))) - generic_handle_irq(irq); + desc = __irq_to_desc(irq); + if (likely(desc)) + generic_handle_irq_desc(irq, desc); else { if (!disable_apic) ack_APIC_irq(); -- cgit v1.2.3 From 1d5f6b36c4736af1dac396d6267eb53dcc8c0021 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:16 -0700 Subject: x86: check with without_new in show_interrupts so we don't get new one that we don't need it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f58b995b30e..f337f87c1e1 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,7 +83,10 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; - struct irq_desc *desc = irq_to_desc(i); + struct irq_desc *desc = __irq_to_desc(i); + + if (!desc) + return 0; spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP -- cgit v1.2.3 From cb5bc83225a86ca53bbb889ed8439e4fd6cf44ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:17 -0700 Subject: x86_64: rename irq_desc/irq_desc_alloc change names: irq_desc() ==> irq_desc_alloc __irq_desc() ==> irq_desc Also split a few of the uses in lowlevel x86 code. v2: need to check if desc is null in smp_irq_move_cleanup Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 10 +++++++++- arch/x86/kernel/irq_64.c | 4 ++-- arch/x86/kernel/irqinit_64.c | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 1b8cccb5ba2..a054db9ef19 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1124,7 +1124,12 @@ static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; - desc = irq_to_desc(irq); + /* first time to use this irq_desc */ + if (irq < 16) + desc = irq_to_desc(irq); + else + desc = irq_to_desc_alloc(irq); + if (trigger) desc->status |= IRQ_LEVEL; else @@ -1919,6 +1924,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) irq = __get_cpu_var(vector_irq)[vector]; desc = irq_to_desc(irq); + if (!desc) + continue; + cfg = irq_cfg(irq); spin_lock(&desc->lock); if (!cfg->move_cleanup_count) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f337f87c1e1..5d5976e0311 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,7 +83,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; - struct irq_desc *desc = __irq_to_desc(i); + struct irq_desc *desc = irq_to_desc(i); if (!desc) return 0; @@ -206,7 +206,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - desc = __irq_to_desc(irq); + desc = irq_to_desc(irq); if (likely(desc)) generic_handle_irq_desc(irq, desc); else { diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index cd9f42d028d..d17fbc26d96 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -143,7 +143,8 @@ void __init init_ISA_irqs(void) init_8259A(0); for (i = 0; i < 16; i++) { - struct irq_desc *desc = irq_to_desc(i); + /* first time call this irq_desc */ + struct irq_desc *desc = irq_to_desc_alloc(i); desc->status = IRQ_DISABLED; desc->action = NULL; -- cgit v1.2.3 From a2f9f43858db64cb8b45c4f6746d7a52b80d4dcb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:19 -0700 Subject: x86_64: separate irq_cfgx from irq_cfgx_free so later don't need to compare with -1U Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 92 ++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 33 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index a054db9ef19..8ab7ae01773 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -111,44 +111,44 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); } +static struct irq_cfg *irq_cfgx; +static struct irq_cfg *irq_cfgx_free; static void __init init_work(void *data) { struct dyn_array *da = data; struct irq_cfg *cfg; + int legacy_count; int i; cfg = *da->name; memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); - for (; i < *da->nr; i++) + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); for (i = 1; i < *da->nr; i++) cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = &irq_cfgx[legacy_count]; + irq_cfgx[legacy_count - 1].next = NULL; } #define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next) + for (cfg = irq_cfgx; cfg; cfg = cfg->next) -static struct irq_cfg *irq_cfgx; DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); static struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg; - BUG_ON(irq == -1U); - - cfg = &irq_cfgx[0]; + cfg = irq_cfgx; while (cfg) { if (cfg->irq == irq) return cfg; - if (cfg->irq == -1U) - return NULL; - cfg = cfg->next; } @@ -161,44 +161,70 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) int i; int count = 0; - BUG_ON(irq == -1U); - - cfg_pri = cfg = &irq_cfgx[0]; + cfg_pri = cfg = irq_cfgx; while (cfg) { if (cfg->irq == irq) return cfg; - if (cfg->irq == -1U) { - cfg->irq = irq; - return cfg; - } cfg_pri = cfg; cfg = cfg->next; count++; } - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + if (!irq_cfgx_free) { + unsigned long phys; + unsigned long total_bytes; + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - if (after_bootmem) - cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0); + total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; + if (after_bootmem) + cfg = kzalloc(total_bytes, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); + phys = __pa(cfg); + printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); - cfg->irq = irq; - cfg_pri->next = cfg; + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = cfg; + } + cfg = irq_cfgx_free; + irq_cfgx_free = irq_cfgx_free->next; + cfg->next = NULL; + if (cfg_pri) + cfg_pri->next = cfg; + else + irq_cfgx = cfg; + cfg->irq = irq; + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); +#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG + { + /* dump the results */ + struct irq_cfg *cfg; + unsigned long phys; + unsigned long bytes = sizeof(struct irq_cfg); + + printk(KERN_DEBUG "=========================== %d\n", irq); + printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); + for_each_irq_cfg(cfg) { + phys = __pa(cfg); + printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); + } + printk(KERN_DEBUG "===========================\n"); + } +#endif return cfg; } -- cgit v1.2.3 From 52b17329d6d0a4824b89206803a430915031ff23 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:20 -0700 Subject: x86_64: make /proc/interrupts work with dyn irq_desc loop with irq_desc list Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 5d5976e0311..7bd841a9c64 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -70,9 +70,27 @@ static inline void stack_overflow_check(struct pt_regs *regs) int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *) v, j; + int i, j; struct irqaction * action; unsigned long flags; + unsigned int entries; + struct irq_desc *desc; + int tail = 0; + +#ifdef CONFIG_HAVE_SPARSE_IRQ + desc = (struct irq_desc *)v; + entries = -1U; + i = desc->irq; + if (!desc->next) + tail = 1; +#else + entries = nr_irqs - 1; + i = *(loff_t *) v; + if (i == nr_irqs) + tail = 1; + else + desc = irq_to_desc(i); +#endif if (i == 0) { seq_printf(p, " "); @@ -81,12 +99,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < nr_irqs) { + if (i <= entries) { unsigned any_count = 0; - struct irq_desc *desc = irq_to_desc(i); - - if (!desc) - return 0; spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP @@ -116,7 +130,9 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == nr_irqs) { + } + + if (tail) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); @@ -155,6 +171,7 @@ skip: seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } + return 0; } -- cgit v1.2.3 From 6d50bc26836e16a9589e0b128d527c29e30d722a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:22 -0700 Subject: x86: use 28 bits irq NR for pci msi/msix and ht also print out irq no in /proc/interrups and /proc/stat in hex, so could tell bus/dev/func. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 64 ++++++++++++++++++++++++++++++++++---------- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 51 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8ab7ae01773..b0d4abc55a1 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2520,17 +2520,21 @@ device_initcall(ioapic_init_sysfs); /* * Dynamic irq allocate and deallocation */ -int create_irq(void) +unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - int irq; - int new; + unsigned int irq; + unsigned int new; unsigned long flags; struct irq_cfg *cfg_new; - irq = -ENOSPC; +#ifndef CONFIG_HAVE_SPARSE_IRQ + irq_want = nr_irqs - 1; +#endif + + irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (nr_irqs - 1); new >= 0; new--) { + for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; cfg_new = irq_cfg(new); @@ -2545,12 +2549,24 @@ int create_irq(void) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq >= 0) { + if (irq > 0) { dynamic_irq_init(irq); } return irq; } +int create_irq(void) +{ + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; +} + void destroy_irq(unsigned int irq) { unsigned long flags; @@ -2803,13 +2819,29 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) return 0; } +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - int irq, ret; + unsigned int irq; + int ret; + unsigned int irq_want; - irq = create_irq(); - if (irq < 0) - return irq; + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + if (irq == 0) + return -1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) @@ -2836,18 +2868,22 @@ error: int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, ret, sub_handle; + unsigned int irq; + int ret, sub_handle; struct msi_desc *desc; + unsigned int irq_want; + #ifdef CONFIG_INTR_REMAP struct intel_iommu *iommu = 0; int index = 0; #endif + irq_want = build_irq_for_pci_dev(dev) + 0x100; sub_handle = 0; list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq(); - if (irq < 0) - return irq; + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 7bd841a9c64..348a11168c2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%3d: ",i); + seq_printf(p, "%#x: ",i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else -- cgit v1.2.3 From 8b8e8c1bf7275eca859fe551dfa484134eaf013b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:23 -0700 Subject: x86: remove irqbalance in kernel for 32 bit This has been deprecated for years, the user space irqbalanced utility works better with numa, has configurable policies, etc... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 - arch/x86/configs/i386_defconfig | 1 - arch/x86/kernel/io_apic_32.c | 402 ---------------------------------------- arch/x86/kernel/quirks.c | 3 - 4 files changed, 414 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1004888e9b1..3e0eaaa1a33 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,14 +1254,6 @@ config EFI resultant kernel should continue to boot on existing non-EFI platforms. -config IRQBALANCE - def_bool y - prompt "Enable kernel irq balancing" - depends on X86_32 && SMP && X86_IO_APIC - help - The default yes will allow the kernel to do irq load balancing. - Saying no will keep the kernel from doing irq load balancing. - config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 52d0359719d..13b8c86ae98 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -287,7 +287,6 @@ CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set CONFIG_X86_PAT=y CONFIG_EFI=y -# CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set # CONFIG_HZ_250 is not set diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 204884b1415..668edf22606 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) spin_unlock_irqrestore(&ioapic_lock, flags); } -#if defined(CONFIG_IRQBALANCE) -# include /* kernel_thread() */ -# include /* kstat */ -# include /* kmalloc() */ -# include - -#define IRQBALANCE_CHECK_ARCH -999 -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) - -static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; -static int physical_balance __read_mostly; -static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; - -static struct irq_cpu_info { - unsigned long *last_irq; - unsigned long *irq_delta; - unsigned long irq; -} irq_cpu_data[NR_CPUS]; - -#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) - -#define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) - -#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) - -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) - -static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL; - -static cpumask_t *balance_irq_affinity; - - -static void __init irq_affinity_init_work(void *data) -{ - struct dyn_array *da = data; - - int i; - struct balance_irq_affinity *affinity; - - affinity = *da->name; - - for (i = 0; i < *da->nr; i++) - memcpy(&affinity[i], &balance_irq_affinity_init, - sizeof(struct balance_irq_affinity)); - -} - -DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work); - - -void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) -{ - balance_irq_affinity[irq] = mask; -} - -static unsigned long move(int curr_cpu, cpumask_t allowed_mask, - unsigned long now, int direction) -{ - int search_idle = 1; - int cpu = curr_cpu; - - goto inside; - - do { - if (unlikely(cpu == curr_cpu)) - search_idle = 0; -inside: - if (direction == 1) { - cpu++; - if (cpu >= NR_CPUS) - cpu = 0; - } else { - cpu--; - if (cpu == -1) - cpu = NR_CPUS-1; - } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu, now))); - - return cpu; -} - -static inline void balance_irq(int cpu, int irq) -{ - unsigned long now = jiffies; - cpumask_t allowed_mask; - unsigned int new_cpu; - - if (irqbalance_disabled) - return; - - cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); - new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) - set_pending_irq(irq, cpumask_of_cpu(new_cpu)); -} - -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) -{ - int i, j; - struct irq_desc *desc; - - for_each_online_cpu(i) { - for (j = 0; j < nr_irqs; j++) { - desc = irq_to_desc(j); - if (!desc->action) - continue; - /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < - useful_load_threshold) - continue; - balance_irq(i, j); - } - } - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; -} - -static void do_irq_balance(void) -{ - int i, j; - unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); - unsigned long move_this_load = 0; - int max_loaded = 0, min_loaded = 0; - int load; - unsigned long useful_load_threshold = balanced_irq_interval + 10; - int selected_irq; - int tmp_loaded, first_attempt = 1; - unsigned long tmp_cpu_irq; - unsigned long imbalance = 0; - cpumask_t allowed_mask, target_cpu_mask, tmp; - struct irq_desc *desc; - - for_each_possible_cpu(i) { - int package_index; - CPU_IRQ(i) = 0; - if (!cpu_online(i)) - continue; - package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < nr_irqs; j++) { - unsigned long value_now, delta; - /* Is this an active IRQ or balancing disabled ? */ - desc = irq_to_desc(j); - if (!desc->action || irq_balancing_disabled(j)) - continue; - if (package_index == i) - IRQ_DELTA(package_index, j) = 0; - /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_irqs_cpu(j, i); - - /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i, j); - - /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i, j) = value_now; - - /* Ignore IRQs whose rate is less than the clock */ - if (delta < useful_load_threshold) - continue; - /* update the load for the processor or package total */ - IRQ_DELTA(package_index, j) += delta; - - /* Keep track of the higher numbered sibling as well */ - if (i != package_index) - CPU_IRQ(i) += delta; - /* - * We have sibling A and sibling B in the package - * - * cpu_irq[A] = load for cpu A + load for cpu B - * cpu_irq[B] = load for cpu B - */ - CPU_IRQ(package_index) += delta; - } - } - /* Find the least loaded processor package */ - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (min_cpu_irq > CPU_IRQ(i)) { - min_cpu_irq = CPU_IRQ(i); - min_loaded = i; - } - } - max_cpu_irq = ULONG_MAX; - -tryanothercpu: - /* - * Look for heaviest loaded processor. - * We may come back to get the next heaviest loaded processor. - * Skip processors with trivial loads. - */ - tmp_cpu_irq = 0; - tmp_loaded = -1; - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (max_cpu_irq <= CPU_IRQ(i)) - continue; - if (tmp_cpu_irq < CPU_IRQ(i)) { - tmp_cpu_irq = CPU_IRQ(i); - tmp_loaded = i; - } - } - - if (tmp_loaded == -1) { - /* - * In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original - * approach to rotate them around. - */ - if (!first_attempt && imbalance >= useful_load_threshold) { - rotate_irqs_among_cpus(useful_load_threshold); - return; - } - goto not_worth_the_effort; - } - - first_attempt = 0; /* heaviest search */ - max_cpu_irq = tmp_cpu_irq; /* load */ - max_loaded = tmp_loaded; /* processor */ - imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* - * if imbalance is less than approx 10% of max load, then - * observe diminishing returns action. - quit - */ - if (imbalance < (max_cpu_irq >> 3)) - goto not_worth_the_effort; - -tryanotherirq: - /* if we select an IRQ to move that can't go where we want, then - * see if there is another one to try. - */ - move_this_load = 0; - selected_irq = -1; - for (j = 0; j < nr_irqs; j++) { - /* Is this an active IRQ? */ - desc = irq_to_desc(j); - if (!desc->action) - continue; - if (imbalance <= IRQ_DELTA(max_loaded, j)) - continue; - /* Try to find the IRQ that is closest to the imbalance - * without going over. - */ - if (move_this_load < IRQ_DELTA(max_loaded, j)) { - move_this_load = IRQ_DELTA(max_loaded, j); - selected_irq = j; - } - } - if (selected_irq == -1) - goto tryanothercpu; - - imbalance = move_this_load; - - /* For physical_balance case, we accumulated both load - * values in the one of the siblings cpu_irq[], - * to use the same code for physical and logical processors - * as much as possible. - * - * NOTE: the cpu_irq[] array holds the sum of the load for - * sibling A and sibling B in the slot for the lowest numbered - * sibling (A), _AND_ the load for sibling B in the slot for - * the higher numbered sibling. - * - * We seek the least loaded sibling by making the comparison - * (A+B)/2 vs B - */ - load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { - if (load > CPU_IRQ(j)) { - /* This won't change cpu_sibling_map[min_loaded] */ - load = CPU_IRQ(j); - min_loaded = j; - } - } - - cpus_and(allowed_mask, - cpu_online_map, - balance_irq_affinity[selected_irq]); - target_cpu_mask = cpumask_of_cpu(min_loaded); - cpus_and(tmp, target_cpu_mask, allowed_mask); - - if (!cpus_empty(tmp)) { - /* mark for change destination */ - set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - - /* Since we made a change, come back sooner to - * check for more variation. - */ - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; - } - goto tryanotherirq; - -not_worth_the_effort: - /* - * if we did not find an IRQ to move, then adjust the time interval - * upward - */ - balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - return; -} - -static int balanced_irq(void *unused) -{ - int i; - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - struct irq_desc *desc; - - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < nr_irqs ; i++) { - desc = irq_to_desc(i); - desc->pending_mask = cpumask_of_cpu(0); - set_pending_irq(i, cpumask_of_cpu(0)); - } - - set_freezable(); - for ( ; ; ) { - time_remaining = schedule_timeout_interruptible(time_remaining); - try_to_freeze(); - if (time_after(jiffies, - prev_balance_time+balanced_irq_interval)) { - preempt_disable(); - do_irq_balance(); - prev_balance_time = jiffies; - time_remaining = balanced_irq_interval; - preempt_enable(); - } - } - return 0; -} - -static int __init balanced_irq_init(void) -{ - int i; - struct cpuinfo_x86 *c; - cpumask_t tmp; - - cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; - /* When not overwritten by the command line ask subarchitecture. */ - if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) - irqbalance_disabled = NO_BALANCE_IRQ; - if (irqbalance_disabled) - return 0; - - /* disable irqbalance completely if there is only one processor online */ - if (num_online_cpus() < 2) { - irqbalance_disabled = 1; - return 0; - } - /* - * Enable physical balance only if more than 1 physical processor - * is present - */ - if (smp_num_siblings > 1 && !cpus_empty(tmp)) - physical_balance = 1; - - for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); - irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); - if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { - printk(KERN_ERR "balanced_irq_init: out of memory"); - goto failed; - } - } - - printk(KERN_INFO "Starting balanced_irq\n"); - if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) - return 0; - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -failed: - for_each_possible_cpu(i) { - kfree(irq_cpu_data[i].irq_delta); - irq_cpu_data[i].irq_delta = NULL; - kfree(irq_cpu_data[i].last_irq); - irq_cpu_data[i].last_irq = NULL; - } - return 0; -} - -int __devinit irqbalance_disable(char *str) -{ - irqbalance_disabled = 1; - return 1; -} - -__setup("noirqbalance", irqbalance_disable); - -late_initcall(balanced_irq_init); -#endif /* CONFIG_IRQBALANCE */ #endif /* CONFIG_SMP */ #ifndef CONFIG_SMP diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index f6a11b9b1f9..67465ed8931 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (!(word & (1 << 13))) { dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " "disabling irq balancing and affinity\n"); -#ifdef CONFIG_IRQBALANCE - irqbalance_disable(""); -#endif noirqdebug_setup(""); #ifdef CONFIG_PROC_FS no_irq_affinity = 1; -- cgit v1.2.3 From a1420f395d7721895c05ba3dedf150294d2c0e4d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:24 -0700 Subject: x86: add irq_cfg for 32bit it only contains vector ... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 71 ++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 668edf22606..033ad953bee 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -94,6 +94,43 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); static int disable_timer_pin_1 __initdata; +struct irq_cfg { + u8 vector; +}; + + +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +static struct irq_cfg irq_cfg_legacy[] __initdata = { + [0] = { .vector = FIRST_DEVICE_VECTOR, }, +}; + +static void __init init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_cfg *cfg; + int legacy_count; + int i; + + cfg = *da->name; + + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + + BUG_ON(legacy_count > nr_irqs); + + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); +} + +static struct irq_cfg *irq_cfgx; +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + if (irq >= nr_irqs) + return NULL; + + return &irq_cfgx[irq]; +} + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -794,22 +831,6 @@ static inline int IO_APIC_irq_trigger(int irq) return 0; } -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR; -static u8 *irq_vector; - -static void __init irq_vector_init_work(void *data) -{ - struct dyn_array *da = data; - - u8 *irq_vec; - - irq_vec = *da->name; - - irq_vec[0] = irq_vector_init_first; -} - -DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work); static int __assign_irq_vector(int irq) { @@ -818,8 +839,8 @@ static int __assign_irq_vector(int irq) BUG_ON((unsigned)irq >= nr_irqs); - if (irq_vector[irq] > 0) - return irq_vector[irq]; + if (irq_cfg(irq)->vector > 0) + return irq_cfg(irq)->vector; vector = current_vector; offset = current_offset; @@ -836,7 +857,7 @@ next: current_vector = vector; current_offset = offset; - irq_vector[irq] = vector; + irq_cfg(irq)->vector = vector; return vector; } @@ -1598,7 +1619,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = irq_vector[irq]; + i = irq_cfg(irq)->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); @@ -1615,7 +1636,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_vector[irq]); + send_IPI_self(irq_cfg(irq)->vector); return 1; } @@ -1651,7 +1672,7 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < nr_irqs ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { + if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2102,7 +2123,7 @@ int create_irq(void) for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_vector[new] != 0) + if (irq_cfg(new)->vector != 0) continue; vector = __assign_irq_vector(new); if (likely(vector > 0)) @@ -2125,8 +2146,8 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - clear_bit(irq_vector[irq], used_vectors); - irq_vector[irq] = 0; + clear_bit(irq_cfg(irq)->vector, used_vectors); + irq_cfg(irq)->vector = 0; spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From da51a821314dd0ec7126f2bf9a62117146fbb6b9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:25 -0700 Subject: x86: make 32bit use irq_cfg_alloc, etc Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 180 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 160 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 033ad953bee..5a83d7f5b14 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -94,41 +94,172 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); static int disable_timer_pin_1 __initdata; +struct irq_cfg; + struct irq_cfg { + unsigned int irq; + struct irq_cfg *next; u8 vector; }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .vector = FIRST_DEVICE_VECTOR, }, + [0] = { .irq = 0, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .vector = IRQ15_VECTOR, }, }; +static struct irq_cfg irq_cfg_init = { .irq = -1U, }; +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; +} + +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +static void init_one_irq_cfg(struct irq_cfg *cfg) +{ + memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); +} + +static struct irq_cfg *irq_cfgx; +static struct irq_cfg *irq_cfgx_free; static void __init init_work(void *data) { - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; + struct dyn_array *da = data; + struct irq_cfg *cfg; + int legacy_count; + int i; + + cfg = *da->name; - cfg = *da->name; + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (i = legacy_count; i < *da->nr; i++) + init_one_irq_cfg(&cfg[i]); - BUG_ON(legacy_count > nr_irqs); + for (i = 1; i < *da->nr; i++) + cfg[i-1].next = &cfg[i]; - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + irq_cfgx_free = &irq_cfgx[legacy_count]; + irq_cfgx[legacy_count - 1].next = NULL; } -static struct irq_cfg *irq_cfgx; -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); +#define for_each_irq_cfg(cfg) \ + for (cfg = irq_cfgx; cfg; cfg = cfg->next) + +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); static struct irq_cfg *irq_cfg(unsigned int irq) { - if (irq >= nr_irqs) - return NULL; + struct irq_cfg *cfg; + + cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; - return &irq_cfgx[irq]; + cfg = cfg->next; + } + + return NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + struct irq_cfg *cfg, *cfg_pri; + int i; + int count = 0; + + cfg_pri = cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + cfg_pri = cfg; + cfg = cfg->next; + count++; + } + + if (!irq_cfgx_free) { + unsigned long phys; + unsigned long total_bytes; + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + + total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; + if (after_bootmem) + cfg = kzalloc(total_bytes, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); + + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); + + phys = __pa(cfg); + printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); + + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = cfg; + } + + cfg = irq_cfgx_free; + irq_cfgx_free = irq_cfgx_free->next; + cfg->next = NULL; + if (cfg_pri) + cfg_pri->next = cfg; + else + irq_cfgx = cfg; + cfg->irq = irq; + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); + +#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG + { + /* dump the results */ + struct irq_cfg *cfg; + unsigned long phys; + unsigned long bytes = sizeof(struct irq_cfg); + + printk(KERN_DEBUG "=========================== %d\n", irq); + printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); + for_each_irq_cfg(cfg) { + phys = __pa(cfg); + printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); + } + printk(KERN_DEBUG "===========================\n"); + } +#endif + return cfg; } /* @@ -254,6 +385,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { struct irq_pin_list *entry = irq_2_pin + irq; + irq_cfg_alloc(irq); while (entry->next) entry = irq_2_pin + entry->next; @@ -836,11 +968,13 @@ static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset; int vector, offset; + struct irq_cfg *cfg; BUG_ON((unsigned)irq >= nr_irqs); - if (irq_cfg(irq)->vector > 0) - return irq_cfg(irq)->vector; + cfg = irq_cfg(irq); + if (cfg->vector > 0) + return cfg->vector; vector = current_vector; offset = current_offset; @@ -857,7 +991,7 @@ next: current_vector = vector; current_offset = offset; - irq_cfg(irq)->vector = vector; + cfg->vector = vector; return vector; } @@ -1659,6 +1793,7 @@ static inline void init_IO_APIC_traps(void) { int irq; struct irq_desc *desc; + struct irq_cfg *cfg; /* * NOTE! The local APIC isn't very good at handling @@ -1671,8 +1806,9 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < nr_irqs ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) { + for_each_irq_cfg(cfg) { + irq = cfg->irq; + if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2117,14 +2253,18 @@ int create_irq(void) /* Allocate an unused irq */ int irq, new, vector = 0; unsigned long flags; + struct irq_cfg *cfg_new; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_cfg(new)->vector != 0) + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) continue; + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); vector = __assign_irq_vector(new); if (likely(vector > 0)) irq = new; -- cgit v1.2.3 From 0f978f4505e96227a89b3c9447552aca983c6b57 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:26 -0700 Subject: x86: make 32bit to use irq_2_pin in irq_cfg so it is more like 64 bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 152 +++++++++++++++++++++++++++++++++---------- 1 file changed, 117 insertions(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 5a83d7f5b14..59d2e8a273e 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -95,10 +95,11 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); static int disable_timer_pin_1 __initdata; struct irq_cfg; - +struct irq_pin_list; struct irq_cfg { unsigned int irq; struct irq_cfg *next; + struct irq_pin_list *irq_2_pin; u8 vector; }; @@ -275,11 +276,66 @@ int pin_map_size; * between pins and IRQs. */ -static struct irq_pin_list { - int apic, pin, next; -} *irq_2_pin; +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *irq_2_pin_head; +/* fill one page ? */ +static int nr_irq_2_pin = 0x100; +static struct irq_pin_list *irq_2_pin_ptr; +static void __init irq_2_pin_init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_pin_list *pin; + int i; + + pin = *da->name; + + for (i = 1; i < *da->nr; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} +DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin; + int i; + + pin = irq_2_pin_ptr; + + if (pin) { + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); + + if (after_bootmem) + pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, + GFP_ATOMIC); + else + pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * + nr_irq_2_pin, PAGE_SIZE, 0); + + if (!pin) + panic("can not get more irq_2_pin\n"); -DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL); + for (i = 1; i < nr_irq_2_pin; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = pin->next; + pin->next = NULL; + + return pin; +} struct io_apic { unsigned int index; @@ -383,20 +439,34 @@ static void ioapic_mask_entry(int apic, int pin) */ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); + return; + } - irq_cfg_alloc(irq); - while (entry->next) - entry = irq_2_pin + entry->next; + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= pin_map_size) - panic("io_apic.c: whoops"); + entry = entry->next; } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; entry->apic = apic; entry->pin = pin; + printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); } /* @@ -406,35 +476,45 @@ static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; - while (1) { + while (entry) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - } - if (!entry->next) + replaced = 1; + /* every one is different, right? */ break; - entry = irq_2_pin + entry->next; + } + entry = entry->next; } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); } static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; unsigned int pin, reg; + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; for (;;) { - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); reg &= ~disable; reg |= enable; io_apic_modify(entry->apic, 0x10 + pin*2, reg); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } } @@ -509,13 +589,18 @@ static void clear_IO_APIC(void) #ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { + struct irq_cfg *cfg; unsigned long flags; int pin; - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_pin_list *entry; unsigned int apicid_value; cpumask_t tmp; struct irq_desc *desc; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) tmp = TARGET_CPUS; @@ -527,13 +612,13 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) apicid_value = apicid_value << 24; spin_lock_irqsave(&ioapic_lock, flags); for (;;) { - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } desc = irq_to_desc(irq); desc->affinity = cpumask; @@ -1152,6 +1237,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; + struct irq_cfg *cfg; if (apic_verbosity == APIC_QUIET) return; @@ -1240,16 +1326,16 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < nr_irqs; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) + for_each_irq_cfg(cfg) { + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } printk("\n"); } @@ -1420,10 +1506,6 @@ static void __init enable_IO_APIC(void) int i, apic; unsigned long flags; - for (i = 0; i < pin_map_size; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } if (!pirqs_enabled) for (i = 0; i < MAX_PIRQS; i++) pirq_entries[i] = -1; -- cgit v1.2.3 From 199751d715bba5b469ea22adadc68a4166bfa4f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:27 -0700 Subject: x86: make 32 bit to use sparse_irq but actually irq still needs to be less than NR_IRQS, because interrupt[NR_IRQS] in entry.S. need to enable per_cpu vector... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/kernel/io_apic_32.c | 63 ++++++++++++++++++++++++++++++-------------- arch/x86/kernel/irq_32.c | 37 +++++++++++++++++++------- arch/x86/kernel/irqinit_32.c | 7 +++++ 4 files changed, 79 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3e0eaaa1a33..b157e94637b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,7 +34,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_DYN_ARRAY - select HAVE_SPARSE_IRQ if X86_64 + select HAVE_SPARSE_IRQ config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 59d2e8a273e..66c0a91362a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -595,7 +595,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) struct irq_pin_list *entry; unsigned int apicid_value; cpumask_t tmp; - struct irq_desc *desc; cfg = irq_cfg(irq); @@ -620,8 +619,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = entry->next; } - desc = irq_to_desc(irq); - desc->affinity = cpumask; + irq_to_desc(irq)->affinity = cpumask; spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1055,8 +1053,6 @@ static int __assign_irq_vector(int irq) int vector, offset; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= nr_irqs); - cfg = irq_cfg(irq); if (cfg->vector > 0) return cfg->vector; @@ -1103,7 +1099,12 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { struct irq_desc *desc; - desc = irq_to_desc(irq); + /* first time to use this irq_desc */ + if (irq < 16) + desc = irq_to_desc(irq); + else + desc = irq_to_desc_alloc(irq); + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) { desc->status |= IRQ_LEVEL; @@ -2330,16 +2331,19 @@ device_initcall(ioapic_init_sysfs); /* * Dynamic irq allocate and deallocation */ -int create_irq(void) +unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - int irq, new, vector = 0; + unsigned int irq, new, vector = 0; unsigned long flags; struct irq_cfg *cfg_new; - irq = -ENOSPC; + /* only can use bus/dev/fn.. when per_cpu vector is used */ + irq_want = nr_irqs - 1; + + irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (nr_irqs - 1); new >= 0; new--) { + for (new = (nr_irqs - 1); new > 0; new--) { if (platform_legacy_irq(new)) continue; cfg_new = irq_cfg(new); @@ -2354,13 +2358,18 @@ int create_irq(void) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq >= 0) { + if (irq > 0) { set_intr_gate(vector, interrupt[irq]); dynamic_irq_init(irq); } return irq; } +int create_irq(void) +{ + return create_irq_nr(nr_irqs - 1); +} + void destroy_irq(unsigned int irq) { unsigned long flags; @@ -2415,7 +2424,6 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp; int vector; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2435,8 +2443,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; + irq_to_desc(irq)->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2455,13 +2462,31 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; int irq, ret; - irq = create_irq(); - if (irq < 0) - return irq; + + unsigned int irq_want; + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + + if (irq == 0) + return -1; ret = msi_compose_msg(dev, irq, &msg); if (ret < 0) { @@ -2510,7 +2535,6 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2521,8 +2545,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(mask); target_ht_irq(irq, dest); - desc = irq_to_desc(irq); - desc->affinity = mask; + irq_to_desc(irq)->affinity = mask; } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 576c5df6cad..0a57e39159a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -224,9 +224,10 @@ unsigned int do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs; /* high bit used in ret_from_ code */ int overflow, irq = ~regs->orig_ax; - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; - if (unlikely((unsigned)irq >= nr_irqs)) { + desc = irq_to_desc(irq); + if (unlikely(!desc)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", __func__, irq); BUG(); @@ -263,6 +264,24 @@ int show_interrupts(struct seq_file *p, void *v) int i = *(loff_t *) v, j; struct irqaction * action; unsigned long flags; + unsigned int entries; + struct irq_desc *desc; + int tail = 0; + +#ifdef CONFIG_HAVE_SPARSE_IRQ + desc = (struct irq_desc *)v; + entries = -1U; + i = desc->irq; + if (!desc->next) + tail = 1; +#else + entries = nr_irqs - 1; + i = *(loff_t *) v; + if (i == nr_irqs) + tail = 1; + else + desc = irq_to_desc(i); +#endif if (i == 0) { seq_printf(p, " "); @@ -271,9 +290,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < nr_irqs) { + if (i <= entries) { unsigned any_count = 0; - struct irq_desc *desc = irq_to_desc(i); spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP @@ -285,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%3d: ",i); + seq_printf(p, "%#x: ",i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else @@ -304,7 +322,9 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == nr_irqs) { + } + + if (tail) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); @@ -396,15 +416,14 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; + struct irq_desc *desc; - for (irq = 0; irq < nr_irqs; irq++) { + for_each_irq_desc(irq, desc) { cpumask_t mask; - struct irq_desc *desc; if (irq == 2) continue; - desc = irq_to_desc(irq); cpus_and(mask, desc->affinity, map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 65c1c950770..ded09ac2642 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -69,6 +69,13 @@ void __init init_ISA_irqs (void) * 16 old-style INTA-cycle interrupts: */ for (i = 0; i < 16; i++) { + /* first time call this irq_desc */ + struct irq_desc *desc = irq_to_desc_alloc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + set_irq_chip_and_handler_name(i, &i8259A_chip, handle_level_irq, "XT"); } -- cgit v1.2.3 From 497c9a195db918d3f035e8cb3021e5d4d035516e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:28 -0700 Subject: x86: make 32bit support per_cpu vector so we can merge io_apic_32.c and io_apic_64.c v2: Use cpu_online_map as target cpus for bigsmp, just like 64-bit is doing. Also remove some unused TARGET_CPUS macro. v3: need to check if desc is null in smp_irq_move_cleanup also migration needs to reset vector too, so copy __target_IO_APIC_irq from 64bit. (the duplication will go away once the two files are unified.) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/io_apic_32.c | 719 ++++++++++++++++++++++++++--------------- arch/x86/kernel/irq_32.c | 18 +- arch/x86/kernel/irqinit_32.c | 41 ++- arch/x86/lguest/boot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 4 + arch/x86/mach-generic/es7000.c | 14 + arch/x86/mach-generic/numaq.c | 14 + arch/x86/mach-generic/summit.c | 14 + 9 files changed, 547 insertions(+), 281 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b21fbfaffe3..4d82171d0f9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -629,7 +629,7 @@ ENTRY(interrupt) ENTRY(irq_entries_start) RING0_INT_FRAME vector=0 -.rept NR_IRQS +.rept NR_VECTORS ALIGN .if vector CFI_ADJUST_CFA_OFFSET -4 diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 66c0a91362a..ea33d3c7497 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -48,6 +48,7 @@ #include #include +#include #include #include @@ -60,7 +61,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_SPINLOCK(vector_lock); int timer_through_8259 __initdata; @@ -100,28 +101,32 @@ struct irq_cfg { unsigned int irq; struct irq_cfg *next; struct irq_pin_list *irq_2_pin; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; u8 vector; + u8 move_in_progress : 1; }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .irq = 0, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .vector = IRQ15_VECTOR, }, + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; static struct irq_cfg irq_cfg_init = { .irq = -1U, }; @@ -263,6 +268,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) return cfg; } +static int assign_irq_vector(int irq, cpumask_t mask); /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -432,6 +438,65 @@ static void ioapic_mask_entry(int apic, int pin) spin_unlock_irqrestore(&ioapic_lock, flags); } +#ifdef CONFIG_SMP +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + int apic, pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin *2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cfg = irq_cfg(irq); + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + irq_to_desc(irq)->affinity = mask; + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#endif /* CONFIG_SMP */ + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -586,45 +651,6 @@ static void clear_IO_APIC(void) clear_IO_APIC_pin(apic, pin); } -#ifdef CONFIG_SMP -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) -{ - struct irq_cfg *cfg; - unsigned long flags; - int pin; - struct irq_pin_list *entry; - unsigned int apicid_value; - cpumask_t tmp; - - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - - cpus_and(tmp, cpumask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(cpumask, tmp, CPU_MASK_ALL); - - apicid_value = cpu_mask_to_apicid(cpumask); - /* Prepare to do the io_apic_write */ - apicid_value = apicid_value << 24; - spin_lock_irqsave(&ioapic_lock, flags); - for (;;) { - if (!entry) - break; - pin = entry->pin; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); - if (!entry->next) - break; - entry = entry->next; - } - irq_to_desc(irq)->affinity = cpumask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#endif /* CONFIG_SMP */ - #ifndef CONFIG_SMP void send_IPI_self(int vector) { @@ -789,32 +815,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - #if defined(CONFIG_EISA) || defined(CONFIG_MCA) /* * EISA Edge/Level control register, ELCR @@ -1046,47 +1046,138 @@ static inline int IO_APIC_irq_trigger(int irq) return 0; } +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} -static int __assign_irq_vector(int irq) +void unlock_vector_lock(void) { - static int current_vector = FIRST_DEVICE_VECTOR, current_offset; - int vector, offset; - struct irq_cfg *cfg; + spin_unlock(&vector_lock); +} - cfg = irq_cfg(irq); - if (cfg->vector > 0) - return cfg->vector; +static int __assign_irq_vector(int irq, cpumask_t mask) +{ + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + unsigned int old_vector; + int cpu; + struct irq_cfg *cfg; - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (vector == current_vector) - return -ENOSPC; - if (test_and_set_bit(vector, used_vectors)) - goto next; + cfg = irq_cfg(irq); - current_vector = vector; - current_offset = offset; - cfg->vector = vector; + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); - return vector; -} + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; -static int assign_irq_vector(int irq) -{ + old_vector = cfg->vector; + if (old_vector) { + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) + return 0; + } + + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; + int new_cpu; + int vector, offset; + + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); + + vector = current_vector; + offset = current_offset; +next: + vector += 8; + if (vector >= first_system_vector) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(current_vector == vector)) + continue; + if (vector == SYSCALL_VECTOR) + goto next; + + for_each_cpu_mask_nr(new_cpu, new_mask) + if (per_cpu(vector_irq, new_cpu)[vector] != -1) + goto next; + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + } + for_each_cpu_mask_nr(new_cpu, new_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + return 0; + } + return -ENOSPC; +} + +static int assign_irq_vector(int irq, cpumask_t mask) +{ + int err; unsigned long flags; - int vector; spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq); + err = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); - return vector; + return err; +} + +static void __clear_irq_vector(int irq) +{ + struct irq_cfg *cfg; + cpumask_t mask; + int cpu, vector; + + cfg = irq_cfg(irq); + BUG_ON(!cfg->vector); + + vector = cfg->vector; + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + cfg->vector = 0; + cpus_clear(cfg->domain); +} + +void __setup_vector_irq(int cpu) +{ + /* Initialize vector_irq on a new cpu */ + /* This function must be called with vector_lock held */ + int irq, vector; + struct irq_cfg *cfg; + + /* Mark the inuse vectors */ + for_each_irq_cfg(cfg) { + if (!cpu_isset(cpu, cfg->domain)) + continue; + vector = cfg->vector; + irq = cfg->irq; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq < 0) + continue; + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } } static struct irq_chip ioapic_chip; @@ -1095,7 +1186,7 @@ static struct irq_chip ioapic_chip; #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) +static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; @@ -1115,79 +1206,109 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); } - set_intr_gate(vector, interrupt[irq]); } -static void __init setup_IO_APIC_irqs(void) +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest.logical.logical_dest = destination; + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + + return 0; +} + +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, + int trigger, int polarity) +{ + struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; + cpumask_t mask; + + if (!IO_APIC_IRQ(irq)) + return; + + cfg = irq_cfg(irq); + + mask = TARGET_CPUS; + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(mask, cfg->domain, mask); + + apic_printk(APIC_VERBOSE,KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + irq, trigger, polarity); + + + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } + + ioapic_register_intr(irq, trigger); + if (irq < 16) + disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry, 0, sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = - cpu_mask_to_apicid(TARGET_CPUS); - - idx = find_irq_entry(apic, pin, mp_INT); + idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else - apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mp_apicid, pin); + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); continue; } - if (!first_notcon) { apic_printk(APIC_VERBOSE, " not connected.\n"); first_notcon = 1; } - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - } - irq = pin_2_irq(idx, apic, pin); - /* - * skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum - */ - if (multi_timer_check(apic, irq)) - continue; - else - add_pin_to_irq(irq, apic, pin); - if (!apic && !IO_APIC_IRQ(irq)) - continue; + if (multi_timer_check(apic, irq)) + continue; - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; - ioapic_register_intr(irq, vector, IOAPIC_AUTO); + add_pin_to_irq(irq, apic, pin); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); } } @@ -1221,7 +1342,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, * The timer IRQ doesn't have to know that behind the * scene we may have a 8259A-master in AEOI mode ... */ - ioapic_register_intr(0, vector, IOAPIC_EDGE); + ioapic_register_intr(0, IOAPIC_EDGE); /* * Add it to the IO-APIC irq-routing table: @@ -1805,8 +1926,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +static void irq_complete_move(unsigned int irq); static void ack_ioapic_irq(unsigned int irq) { + irq_complete_move(irq); move_native_irq(irq); ack_APIC_irq(); } @@ -1816,6 +1939,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) unsigned long v; int i; + irq_complete_move(irq); move_native_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 @@ -1858,6 +1982,64 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } +#ifdef CONFIG_SMP +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } +} +#else +static inline void irq_complete_move(unsigned int irq) {} +#endif + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, @@ -1940,7 +2122,7 @@ static struct irq_chip lapic_chip __read_mostly = { .ack = ack_lapic_irq, }; -static void lapic_register_intr(int irq, int vector) +static void lapic_register_intr(int irq) { struct irq_desc *desc; @@ -1948,7 +2130,6 @@ static void lapic_register_intr(int irq, int vector) desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); - set_intr_gate(vector, interrupt[irq]); } static void __init setup_nmi(void) @@ -2036,9 +2217,9 @@ static inline void __init unlock_ExtINT_logic(void) */ static inline void __init check_timer(void) { + struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; int no_pin1 = 0; - int vector; unsigned int ver; unsigned long flags; @@ -2051,8 +2232,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); + assign_irq_vector(0, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2074,7 +2254,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " "apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + cfg->vector, apic1, pin1, apic2, pin2); /* * Some BIOS writers are clueless and report the ExtINTA @@ -2098,7 +2278,7 @@ static inline void __init check_timer(void) */ if (no_pin1) { add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, vector); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); if (timer_irq_works()) { @@ -2123,7 +2303,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, vector); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); unmask_IO_APIC_irq(0); enable_8259A_irq(0); if (timer_irq_works()) { @@ -2154,8 +2334,8 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0, vector); - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + lapic_register_intr(0); + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); if (timer_irq_works()) { @@ -2163,7 +2343,7 @@ static inline void __init check_timer(void) goto out; } disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); apic_printk(APIC_QUIET, KERN_INFO @@ -2207,12 +2387,6 @@ out: void __init setup_IO_APIC(void) { - int i; - - /* Reserve all the system vectors. */ - for (i = first_system_vector; i < NR_VECTORS; i++) - set_bit(i, used_vectors); - enable_IO_APIC(); io_apic_irqs = ~PIC_IRQS; @@ -2334,12 +2508,14 @@ device_initcall(ioapic_init_sysfs); unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - unsigned int irq, new, vector = 0; + unsigned int irq, new; unsigned long flags; struct irq_cfg *cfg_new; +#ifndef CONFIG_HAVE_SPARSE_IRQ /* only can use bus/dev/fn.. when per_cpu vector is used */ irq_want = nr_irqs - 1; +#endif irq = 0; spin_lock_irqsave(&vector_lock, flags); @@ -2351,15 +2527,13 @@ unsigned int create_irq_nr(unsigned int irq_want) continue; if (!cfg_new) cfg_new = irq_cfg_alloc(new); - vector = __assign_irq_vector(new); - if (likely(vector > 0)) + if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; } spin_unlock_irqrestore(&vector_lock, flags); if (irq > 0) { - set_intr_gate(vector, interrupt[irq]); dynamic_irq_init(irq); } return irq; @@ -2377,8 +2551,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - clear_bit(irq_cfg(irq)->vector, used_vectors); - irq_cfg(irq)->vector = 0; + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } @@ -2388,57 +2561,65 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { - int vector; + struct irq_cfg *cfg; + int err; unsigned dest; + cpumask_t tmp; - vector = assign_irq_vector(irq); - if (vector >= 0) { - dest = cpu_mask_to_apicid(TARGET_CPUS); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? -MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (err) + return err; - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? -MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); - } - return vector; + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); + + return err; } #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - int vector; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; + return; - vector = assign_irq_vector(irq); - if (vector < 0) + if (assign_irq_vector(irq, mask)) return; - dest = cpu_mask_to_apicid(mask); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); read_msi_msg(irq, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(vector); + msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); @@ -2517,15 +2698,15 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP -static void target_ht_irq(unsigned int irq, unsigned int dest) +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) { struct ht_irq_msg msg; fetch_ht_irq_msg(irq, &msg); - msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); write_ht_irq_msg(irq, &msg); @@ -2533,18 +2714,22 @@ static void target_ht_irq(unsigned int irq, unsigned int dest) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; + return; - cpus_and(mask, tmp, CPU_MASK_ALL); + if (assign_irq_vector(irq, mask)) + return; - dest = cpu_mask_to_apicid(mask); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); - target_ht_irq(irq, dest); + target_ht_irq(irq, dest, cfg->vector); irq_to_desc(irq)->affinity = mask; } #endif @@ -2562,16 +2747,18 @@ static struct irq_chip ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { - int vector; + struct irq_cfg *cfg; + int err; + cpumask_t tmp; - vector = assign_irq_vector(irq); - if (vector >= 0) { + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if ( !err) { struct ht_irq_msg msg; unsigned dest; - cpumask_t tmp; - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -2579,7 +2766,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) msg.address_lo = HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(vector) | + HT_IRQ_LOW_VECTOR(cfg->vector) | ((INT_DEST_MODE == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | @@ -2594,7 +2781,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) set_irq_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); } - return vector; + return err; } #endif /* CONFIG_HT_IRQ */ @@ -2705,50 +2892,21 @@ int __init io_apic_get_redir_entries(int ioapic) } -int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity) { - struct IO_APIC_route_entry entry; - if (!IO_APIC_IRQ(irq)) { printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry, 0, sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.trigger = edge_level; - entry.polarity = active_high_low; - entry.mask = 1; - /* * IRQs < 16 are already in the irq_2_pin[] map */ if (irq >= 16) add_pin_to_irq(irq, ioapic, pin); - entry.vector = assign_irq_vector(irq); - - apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, - edge_level, active_high_low); - - ioapic_register_intr(irq, entry.vector, edge_level); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - ioapic_write_entry(ioapic, pin, entry); + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); return 0; } @@ -2774,6 +2932,47 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #endif /* CONFIG_ACPI */ +/* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online + * so mask in all cases should simply be TARGET_CPUS + */ +#ifdef CONFIG_SMP +void __init setup_ioapic_dest(void) +{ + int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; + struct irq_desc *desc; + + if (skip_ioapic_setup == 1) + return; + + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + cfg = irq_cfg(irq); + if (!cfg->vector) + setup_IO_APIC_irq(ioapic, pin, irq, + irq_trigger(irq_entry), + irq_polarity(irq_entry)); + else { + desc = irq_to_desc(irq); + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } + } + + } +} +#endif + static int __init parse_disable_timer_pin_1(char *arg) { disable_timer_pin_1 = 1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 0a57e39159a..b51ffdcfa31 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -223,21 +223,25 @@ unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int overflow, irq = ~regs->orig_ax; + int overflow; + unsigned vector = ~regs->orig_ax; struct irq_desc *desc; + unsigned irq; - desc = irq_to_desc(irq); - if (unlikely(!desc)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __func__, irq); - BUG(); - } old_regs = set_irq_regs(regs); irq_enter(); + irq = __get_cpu_var(vector_irq)[vector]; overflow = check_stack_overflow(); + desc = irq_to_desc(irq); + if (unlikely(!desc)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n", + __func__, irq, vector); + BUG(); + } + if (!execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index ded09ac2642..9092103a18e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -90,6 +90,27 @@ static struct irqaction irq2 = { .name = "cascade", }; +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -105,22 +126,14 @@ void __init native_init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= nr_irqs) - break; + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* SYSCALL_VECTOR was reserved in trap_init. */ - if (!test_bit(vector, used_vectors)) - set_intr_gate(vector, interrupt[i]); + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i]); } -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -135,6 +148,9 @@ void __init native_init_IRQ(void) /* IPI for single call function */ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -168,3 +184,4 @@ void __init native_init_IRQ(void) irq_ctx_init(smp_processor_id()); } + diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 65f0b8a47be..48ee4f9435f 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void) for (i = 0; i < LGUEST_IRQS; i++) { int vector = FIRST_EXTERNAL_VECTOR + i; if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, interrupt[i]); + set_intr_gate(vector, interrupt[vector]); set_irq_chip_and_handler_name(i, &lguest_irq_controller, handle_level_irq, "level"); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index df37fc9d6a2..3c3b471ea49 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; +static cpumask_t vector_allocation_domain(int cpu) +{ + return cpumask_of_cpu(cpu); +} static int probe_bigsmp(void) { diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 6513d41ea21..28459cab3dd 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8cf58394975..71a309b122e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6ad6b67a723..6272b5e69da 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -23,4 +23,18 @@ static int probe_summit(void) return 0; } +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic apic_summit = APIC_INIT("summit", probe_summit); -- cgit v1.2.3 From 7a959cff725872ce9c3a534f10724d7bb2cb3c4a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:32 -0700 Subject: x86: add debug info for 32bit sparse_irq so could figure out bugs where we get an interrupt, but vector_irq is not initialized yet. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 8 ++++++-- arch/x86/kernel/io_apic_64.c | 2 ++ arch/x86/kernel/irq_32.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index ea33d3c7497..3001924bdd3 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1114,8 +1114,12 @@ next: cfg->move_in_progress = 1; cfg->old_domain = cfg->domain; } - for_each_cpu_mask_nr(new_cpu, new_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; + printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector); + for_each_cpu_mask_nr(new_cpu, new_mask) { + per_cpu(vector_irq, new_cpu)[vector] = irq; + printk(KERN_CONT " %d ", new_cpu); + } + printk(KERN_CONT "\n"); cfg->vector = vector; cfg->domain = domain; return 0; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b0d4abc55a1..30d2e381131 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1394,6 +1394,8 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b51ffdcfa31..cc929f2f84f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -237,8 +237,8 @@ unsigned int do_IRQ(struct pt_regs *regs) desc = irq_to_desc(irq); if (unlikely(!desc)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n", - __func__, irq, vector); + printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", + __func__, irq, vector, smp_processor_id()); BUG(); } -- cgit v1.2.3 From d83e94acd95789829804fd9e442bd18975f4dc89 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:33 -0700 Subject: x86, io-apic: remove union about dest for log/phy let user decide the meaning of the bits. This unifies the 32-bit and 64-bit io-apic code a bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 3001924bdd3..353e586822a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1224,7 +1224,7 @@ static int setup_ioapic_entry(int apic, int irq, entry->delivery_mode = INT_DELIVERY_MODE; entry->dest_mode = INT_DEST_MODE; - entry->dest.logical.logical_dest = destination; + entry->dest = destination; entry->mask = 0; /* enable IRQ */ entry->trigger = trigger; @@ -1336,7 +1336,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, */ entry.dest_mode = INT_DEST_MODE; entry.mask = 1; /* mask IRQ now */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; @@ -1425,19 +1425,15 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" + " Stat Dmod Deli Vect: \n"); for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; entry = ioapic_read_entry(apic, i); - printk(KERN_DEBUG " %02x %03X %02X ", - i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest - ); + printk(KERN_DEBUG " %02x %02X ", i, entry.dest); printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", entry.mask, @@ -1717,7 +1713,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = read_apic_id(); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -2185,7 +2181,7 @@ static inline void __init unlock_ExtINT_logic(void) entry1.dest_mode = 0; /* physical delivery */ entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); + entry1.dest = hard_smp_processor_id(); entry1.delivery_mode = dest_ExtINT; entry1.polarity = entry0.polarity; entry1.trigger = 0; -- cgit v1.2.3 From 1d02519242c23450b043e5e8a9e3cb84a8666fe3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:34 -0700 Subject: x86: ordering functions in io_apic_32.c prepare for unification: try to make functions be of the same order to io_apic_64.c. v2: add calling setup_msi_irq back to arch_setup_msi_irq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 178 +++++++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 84 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 353e586822a..9531ef33362 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1029,23 +1029,6 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - void lock_vector_lock(void) { /* Used to the online set of cpus does not change @@ -1190,6 +1173,23 @@ static struct irq_chip ioapic_chip; #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} + static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; @@ -1926,55 +1926,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } -static void irq_complete_move(unsigned int irq); -static void ack_ioapic_irq(unsigned int irq) -{ - irq_complete_move(irq); - move_native_irq(irq); - ack_APIC_irq(); -} - -static void ack_ioapic_quirk_irq(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} - static int ioapic_retrigger_irq(unsigned int irq) { send_IPI_self(irq_cfg(irq)->vector); @@ -2040,13 +1991,61 @@ static void irq_complete_move(unsigned int irq) static inline void irq_complete_move(unsigned int irq) {} #endif +static void ack_apic_edge(unsigned int irq) +{ + irq_complete_move(irq); + move_native_irq(irq); + ack_APIC_irq(); +} + +static void ack_apic_level(unsigned int irq) +{ + unsigned long v; + int i; + + irq_complete_move(irq); + move_native_irq(irq); +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, - .ack = ack_ioapic_irq, - .eoi = ack_ioapic_quirk_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, #ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity_irq, #endif @@ -2094,11 +2093,6 @@ static inline void init_IO_APIC_traps(void) * The local APIC irq-chip implementation: */ -static void ack_lapic_irq(unsigned int irq) -{ - ack_APIC_irq(); -} - static void mask_lapic_irq(unsigned int irq) { unsigned long v; @@ -2115,6 +2109,11 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } +static void ack_lapic_irq(unsigned int irq) +{ + ack_APIC_irq(); +} + static struct irq_chip lapic_chip __read_mostly = { .name = "local-APIC", .mask = mask_lapic_irq, @@ -2636,13 +2635,31 @@ static struct irq_chip msi_chip = { .name = "PCI-MSI", .unmask = unmask_msi_irq, .mask = mask_msi_irq, - .ack = ack_ioapic_irq, + .ack = ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = set_msi_irq_affinity, #endif .retrigger = ioapic_retrigger_irq, }; + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) { unsigned int irq; @@ -2657,7 +2674,6 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - struct msi_msg msg; int irq, ret; unsigned int irq_want; @@ -2669,17 +2685,11 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) if (irq == 0) return -1; - ret = msi_compose_msg(dev, irq, &msg); + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; - } - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, - "edge"); + } return 0; } @@ -2738,7 +2748,7 @@ static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .mask = mask_ht_irq, .unmask = unmask_ht_irq, - .ack = ack_ioapic_irq, + .ack = ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = set_ht_irq_affinity, #endif -- cgit v1.2.3 From 8ea5371baa82db452a8d93e9977b418d30944e32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:35 -0700 Subject: x86: ordering functions in io_apic_64.c try to make functions have the same order between 32-bit and 64-bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 67 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 30d2e381131..07e1e45ee02 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -409,40 +409,6 @@ static bool io_apic_level_ack_pending(unsigned int irq) return false; } -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -627,6 +593,39 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ + \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ + for (;;) { \ + unsigned int reg; \ + if (!entry) \ + break; \ + pin = entry->pin; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + FINAL; \ + if (!entry->next) \ + break; \ + entry = entry->next; \ + } \ +} #define DO_ACTION(name,R,ACTION, FINAL) \ \ -- cgit v1.2.3 From efa2559f65167989f1893cb065e3126d4f13ba60 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:36 -0700 Subject: x86: order variables in io_apic_xx.c move first_system_vector to apic_64.c. also add #ifdef CONFIG_INTR_REMAP to prepare 32 bit to use same file. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 5 ++ arch/x86/kernel/io_apic_32.c | 79 ++++++++++----------- arch/x86/kernel/io_apic_64.c | 160 +++++++++++++++++++++++-------------------- 3 files changed, 127 insertions(+), 117 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 94ddb69ae15..4d7a188025b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,10 @@ int x2apic_preenabled; int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 9531ef33362..3010bdd3352 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -54,24 +54,22 @@ #define __apicdebuginit(type) static type __init -int (*ioapic_renumber_irq)(int ioapic, int irq); -atomic_t irq_mis_count; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int timer_through_8259 __initdata; - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes */ int sis_apic_bug = -1; +static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); + int first_free_entry; +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +int pin_map_size; + /* * # of IRQ routing registers */ @@ -93,7 +91,15 @@ int mp_bus_id_to_type[MAX_MP_BUSSES]; DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -static int disable_timer_pin_1 __initdata; +int skip_ioapic_setup; + +static int __init parse_noapic(char *arg) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); struct irq_cfg; struct irq_pin_list; @@ -268,13 +274,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) return cfg; } -static int assign_irq_vector(int irq, cpumask_t mask); -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - /* * This is performance-critical, we want to do it O(1) * @@ -465,6 +464,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) entry = entry->next; } } + +static int assign_irq_vector(int irq, cpumask_t mask); + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { struct irq_cfg *cfg; @@ -677,7 +679,6 @@ void send_IPI_self(int vector) #define MAX_PIRQS 8 static int pirq_entries [MAX_PIRQS]; static int pirqs_enabled; -int skip_ioapic_setup; static int __init ioapic_pirq_setup(char *str) { @@ -981,6 +982,7 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } +int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -1621,6 +1623,9 @@ __apicdebuginit(int) print_all_ICs(void) fs_initcall(print_all_ICs); +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + static void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; @@ -1998,6 +2003,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { unsigned long v; @@ -2208,6 +2214,17 @@ static inline void __init unlock_ExtINT_logic(void) ioapic_write_entry(apic, pin, entry0); } +static int disable_timer_pin_1 __initdata; + +static int __init parse_disable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); + +int timer_through_8259 __initdata; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2983,28 +3000,6 @@ void __init setup_ioapic_dest(void) } #endif -static int __init parse_disable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); - -static int __init parse_enable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = -1; - return 0; -} -early_param("enable_timer_pin_1", parse_enable_timer_pin_1); - -static int __init parse_noapic(char *arg) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 07e1e45ee02..847aa798764 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -57,6 +57,47 @@ #define __apicdebuginit(type) static type __init +int ioapic_force; + +int sis_apic_bug; /* not actually supported, dummy for compile */ + +static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); + +int first_free_entry; +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +int pin_map_size; + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* I/O APIC entries */ +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +int nr_ioapics; + +/* MP IRQ source entries */ +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* # of MP IRQ source entries */ +int mp_irq_entries; + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +int skip_ioapic_setup; + +static int __init parse_noapic(char *str) +{ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); + + struct irq_cfg; struct irq_pin_list; struct irq_cfg { @@ -228,53 +269,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) return cfg; } -static int assign_irq_vector(int irq, cpumask_t mask); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -int sis_apic_bug; /* not actually supported, dummy for compile */ - -static int no_timer_check; - -static int disable_timer_pin_1 __initdata; - -int timer_through_8259 __initdata; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC RTE contents at the OS boot up */ -struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ - -int pin_map_size; - /* * This is performance-critical, we want to do it O(1) * @@ -481,12 +475,16 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) apic = entry->apic; pin = entry->pin; +#ifdef CONFIG_INTR_REMAP /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. */ if (!irq_remapped(irq)) io_apic_write(apic, 0x11 + pin*2, dest); +#else + io_apic_write(apic, 0x11 + pin*2, dest); +#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -497,6 +495,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) } } +static int assign_irq_vector(int irq, cpumask_t mask); + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { struct irq_cfg *cfg = irq_cfg(irq); @@ -533,7 +533,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -int first_free_entry; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { struct irq_cfg *cfg; @@ -679,6 +678,10 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* * Saves and masks all the unmasked IO-APIC RTE's */ @@ -741,25 +744,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping) */ restore_IO_APIC_setup(); } - -int skip_ioapic_setup; -int ioapic_force; - -static int __init parse_noapic(char *str) -{ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - +#endif /* * Find the IRQ entry number of a certain pin. @@ -1327,8 +1312,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) return; +#endif memset(&entry, 0, sizeof(entry)); @@ -1601,6 +1588,9 @@ __apicdebuginit(int) print_all_ICs(void) fs_initcall(print_all_ICs); +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; @@ -1695,6 +1685,15 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +static int no_timer_check; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1708,6 +1707,9 @@ static int __init timer_irq_works(void) unsigned long t1 = jiffies; unsigned long flags; + if (no_timer_check) + return 1; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ @@ -2239,6 +2241,17 @@ static inline void __init unlock_ExtINT_logic(void) ioapic_write_entry(apic, pin, entry0); } +static int disable_timer_pin_1 __initdata; +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ +static int __init disable_timer_pin_setup(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", disable_timer_pin_setup); + +int timer_through_8259 __initdata; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2286,8 +2299,10 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); +#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2305,7 +2320,7 @@ static inline void __init check_timer(void) setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { + if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); enable_8259A_irq(0); @@ -2314,8 +2329,10 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2391,13 +2408,6 @@ out: local_irq_restore(flags); } -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - /* * Traditionally ISA IRQ2 is the cascade IRQ, and is not available * to devices. However there may be an I/O APIC pin available for -- cgit v1.2.3 From d4057bdb6a3bb85dd44f9f39f41eac53696fd637 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:38 -0700 Subject: x86: make headers files the same in io_apic_xx.c also make no_timer_check to be global on 64 bit, because vmi_32 is using that. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 15 ++++++++++++--- arch/x86/kernel/io_apic_64.c | 12 +++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 3010bdd3352..48184e126f2 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -25,28 +25,37 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include #include #include -#include /* time_after() */ +#include /* time_after() */ +#ifdef CONFIG_ACPI +#include +#endif +#include +#include +#include #include #include #include +#include +#include +#include #include #include #include #include #include #include +#include #include #include diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 847aa798764..3c66e5a8e89 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -27,12 +27,15 @@ #include #include #include +#include #include +#include #include #include #include -#include -#include +#include +#include +#include /* time_after() */ #ifdef CONFIG_ACPI #include #endif @@ -46,14 +49,17 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include +#include #define __apicdebuginit(type) static type __init @@ -1685,7 +1691,7 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } -static int no_timer_check; +int no_timer_check __initdata; static int __init notimercheck(char *s) { -- cgit v1.2.3 From f876d213a59c363d2492e399cc6c24edd6f3c368 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:39 -0700 Subject: x86: make 64 handle sis_apic_bug like the 32 bit do we have 64bit system with sis chipset? [ mingo@elte.hu: nope, the problem chipset was 32-bit only. The code symmetry is good nevertheless. ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 3c66e5a8e89..915af9b87aa 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -65,7 +65,11 @@ int ioapic_force; -int sis_apic_bug; /* not actually supported, dummy for compile */ +/* + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes + */ +int sis_apic_bug = -1; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); @@ -373,9 +377,11 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. */ -static inline void io_apic_modify(unsigned int apic, unsigned int value) +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -494,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; - io_apic_modify(apic, reg); + io_apic_modify(apic, 0x10 + pin*2, reg); if (!entry->next) break; entry = entry->next; @@ -624,7 +630,7 @@ static inline void io_apic_sync(unsigned int apic) pin = entry->pin; \ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ - io_apic_modify(entry->apic, reg); \ + io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ FINAL; \ if (!entry->next) \ break; \ @@ -2450,6 +2456,20 @@ void __init setup_IO_APIC(void) check_timer(); } +/* + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path + */ + +static int __init io_apic_bug_finalize(void) +{ + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; +} + +late_initcall(io_apic_bug_finalize); + struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; -- cgit v1.2.3 From aa45f97b1bb40adae1288669e73350907ffae85e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:40 -0700 Subject: x86: remove ioapic_force no user left. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 - arch/x86/kernel/io_apic_64.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4d7a188025b..cd860856a0b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1813,7 +1813,6 @@ static int __init apic_set_verbosity(char *arg) if (!arg) { #ifdef CONFIG_X86_64 skip_ioapic_setup = 0; - ioapic_force = 1; return 0; #endif return -EINVAL; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 915af9b87aa..b70fd823218 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -63,8 +63,6 @@ #define __apicdebuginit(type) static type __init -int ioapic_force; - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes -- cgit v1.2.3 From 047c8fdb8718890e3340073b178d0859d0c7f91f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:41 -0700 Subject: x86: make io_apic_64.c and io_apic_32.c the same all the same except INTR_REMAPPING related and ioapic io resource. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 213 +++++++++++++- arch/x86/kernel/io_apic_64.c | 663 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 829 insertions(+), 47 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 48184e126f2..3ed36041c81 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -123,7 +123,6 @@ struct irq_cfg { u8 move_in_progress : 1; }; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, @@ -391,6 +390,38 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } +#ifdef CONFIG_X86_64 +static bool io_apic_level_ack_pending(unsigned int irq) +{ + struct irq_pin_list *entry; + unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + int pin; + + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + if (reg & IO_APIC_REDIR_REMOTE_IRR) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return true; + } + if (!entry->next) + break; + entry = entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return false; +} +#endif + union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -483,17 +514,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp; - cfg = irq_cfg(irq); - cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + cfg = irq_cfg(irq); if (assign_irq_vector(irq, mask)) return; cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); /* * Only the high 8 bits are valid. @@ -572,6 +601,54 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } +#ifdef CONFIG_X86_64 +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ + \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ + for (;;) { \ + unsigned int reg; \ + if (!entry) \ + break; \ + pin = entry->pin; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ + FINAL; \ + if (!entry->next) \ + break; \ + entry = entry->next; \ + } \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + +/* mask = 0 */ +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) + +#else + static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_cfg *cfg; @@ -620,6 +697,8 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq) IO_APIC_REDIR_MASKED); } +#endif + static void mask_IO_APIC_irq(unsigned int irq) { unsigned long flags; @@ -1055,6 +1134,17 @@ void unlock_vector_lock(void) static int __assign_irq_vector(int irq, cpumask_t mask) { + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; @@ -1095,9 +1185,13 @@ next: } if (unlikely(current_vector == vector)) continue; - if (vector == SYSCALL_VECTOR) +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) goto next; - +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; @@ -1184,6 +1278,7 @@ static struct irq_chip ioapic_chip; #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 +#ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { int apic, idx, pin; @@ -1200,6 +1295,12 @@ static inline int IO_APIC_irq_trigger(int irq) */ return 0; } +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { @@ -1212,15 +1313,18 @@ static void ioapic_register_intr(int irq, unsigned long trigger) desc = irq_to_desc_alloc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { + trigger == IOAPIC_LEVEL) desc->status |= IRQ_LEVEL; + else + desc->status &= ~IRQ_LEVEL; + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); - } else { - desc->status &= ~IRQ_LEVEL; + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); - } } static int setup_ioapic_entry(int apic, int irq, @@ -1662,7 +1766,6 @@ static void __init enable_IO_APIC(void) struct IO_APIC_route_entry entry; entry = ioapic_read_entry(apic, pin); - /* If the interrupt line is enabled and in ExtInt mode * I have found the pin where the i8259 is connected. */ @@ -2012,6 +2115,60 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +#ifdef CONFIG_X86_64 +static void ack_apic_level(unsigned int irq) +{ + int do_unmask_irq = 0; + + irq_complete_move(irq); +#ifdef CONFIG_GENERIC_PENDING_IRQ + /* If we are moving the irq we need to mask it */ + if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } +#endif + + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propagate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq, desc); + unmask_IO_APIC_irq(irq); + } +} +#else atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { @@ -2053,6 +2210,7 @@ static void ack_apic_level(unsigned int irq) spin_unlock(&ioapic_lock); } } +#endif static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2224,7 +2382,7 @@ static inline void __init unlock_ExtINT_logic(void) } static int disable_timer_pin_1 __initdata; - +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ static int __init parse_disable_timer_pin_1(char *arg) { disable_timer_pin_1 = 1; @@ -2244,9 +2402,9 @@ static inline void __init check_timer(void) { struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; - int no_pin1 = 0; - unsigned int ver; unsigned long flags; + unsigned int ver; + int no_pin1 = 0; local_irq_save(flags); @@ -2550,6 +2708,7 @@ unsigned int create_irq_nr(unsigned int irq_want) cfg_new = irq_cfg(new); if (cfg_new && cfg_new->vector != 0) continue; + /* check if need to create one */ if (!cfg_new) cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) @@ -2720,6 +2879,32 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) return 0; } +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int irq; + int ret, sub_handle; + struct msi_desc *desc; + unsigned int irq_want; + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; + +error: + destroy_irq(irq); + return ret; +} + + void arch_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b70fd823218..940c4167b32 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -94,18 +94,22 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; static int __init parse_noapic(char *str) { + /* disable IO-APIC */ disable_ioapic_setup(); return 0; } early_param("noapic", parse_noapic); - struct irq_cfg; struct irq_pin_list; struct irq_cfg { @@ -374,6 +378,8 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i /* * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index register */ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { @@ -383,6 +389,7 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } +#ifdef CONFIG_X86_64 static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; @@ -412,6 +419,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) return false; } +#endif union entry_union { struct { u32 w1, w2; }; @@ -509,7 +517,7 @@ static int assign_irq_vector(int irq, cpumask_t mask); static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; @@ -519,12 +527,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) if (cpus_empty(tmp)) return; + cfg = irq_cfg(irq); if (assign_irq_vector(irq, mask)) return; cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - /* * Only the high 8 bits are valid. */ @@ -536,7 +544,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } -#endif +#endif /* CONFIG_SMP */ /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are @@ -602,6 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } +#ifdef CONFIG_X86_64 /* * Synchronize the IO-APIC and the CPU by doing * a dummy read from the IO-APIC @@ -647,6 +656,58 @@ DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) /* mask = 0 */ DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) +#else + +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +{ + struct irq_cfg *cfg; + struct irq_pin_list *entry; + unsigned int pin, reg; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + reg &= ~disable; + reg |= enable; + io_apic_modify(entry->apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +/* mask = 1 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); +} + +/* mask = 0 */ +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); +} + +/* mask = 1, trigger = 0 */ +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER); +} + +/* mask = 0, trigger = 1 */ +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED); +} + +#endif + static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -688,6 +749,64 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) +void send_IPI_self(int vector) +{ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write(APIC_ICR, cfg); +} +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ + +#ifdef CONFIG_X86_32 +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +static int pirq_entries [MAX_PIRQS]; +static int pirqs_enabled; + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, KERN_INFO + "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + #ifdef CONFIG_INTR_REMAP /* I/O APIC RTE contents at the OS boot up */ static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; @@ -861,18 +980,51 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +/* + * EISA Edge/Level control register, ELCR + */ +static int EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, KERN_INFO + "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +#endif + /* ISA interrupts are always polarity zero edge triggered, * when listed as conforming in the MP table. */ #define default_ISA_trigger(idx) (0) #define default_ISA_polarity(idx) (0) +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_polarity(idx) default_ISA_polarity(idx) + /* PCI interrupts are always polarity one level triggered, * when listed as conforming in the MP table. */ #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) default_ISA_polarity(idx) + static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mp_srcbus; @@ -930,6 +1082,36 @@ static int MPBIOS_trigger(int idx) trigger = default_ISA_trigger(idx); else trigger = default_PCI_trigger(idx); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif break; case 1: /* edge */ { @@ -967,6 +1149,7 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } +int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -988,7 +1171,32 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } + +#ifdef CONFIG_X86_32 + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + apic_printk(APIC_VERBOSE, KERN_DEBUG + "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } +#endif + return irq; } @@ -1058,8 +1266,13 @@ next: } if (unlikely(current_vector == vector)) continue; +#ifdef CONFIG_X86_64 if (vector == IA32_SYSCALL_VECTOR) goto next; +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; @@ -1140,6 +1353,34 @@ static struct irq_chip ioapic_chip; static struct irq_chip ir_ioapic_chip; #endif +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#ifdef CONFIG_X86_32 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif + static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; @@ -1150,7 +1391,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger) else desc = irq_to_desc_alloc(irq); - if (trigger) + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) desc->status |= IRQ_LEVEL; else desc->status &= ~IRQ_LEVEL; @@ -1168,7 +1410,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger) return; } #endif - if (trigger) + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); @@ -1303,6 +1546,10 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic, pin); +#ifdef CONFIG_X86_32 + if (multi_timer_check(apic, irq)) + continue; +#endif add_pin_to_irq(irq, apic, pin); setup_IO_APIC_irq(apic, pin, irq, @@ -1360,6 +1607,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; + union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; @@ -1384,6 +1632,8 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); @@ -1399,11 +1649,27 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - if (reg_01.bits.version >= 0x10) { + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); } + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && + reg_03.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); + } + printk(KERN_DEBUG ".... IRQ redirection table:\n"); printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" @@ -1475,7 +1741,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base) __apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; - unsigned long icr; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1492,11 +1758,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + v = apic_read(APIC_PROCPRI); + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + } v = apic_read(APIC_EOI); printk(KERN_DEBUG "... APIC EOI: %08x\n", v); @@ -1516,8 +1784,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } icr = apic_icr_read(); printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); @@ -1608,6 +1881,13 @@ void __init enable_IO_APIC(void) int apic; unsigned long flags; +#ifdef CONFIG_X86_32 + int i; + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; +#endif + /* * The number of IO-APIC IRQ registers (== #pins): */ @@ -1636,6 +1916,10 @@ void __init enable_IO_APIC(void) } found_i8259: /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); i8259_apic = find_isa_irq_apic(0, mp_ExtINT); /* Trust the MP table if nothing is setup in the hardware */ @@ -1695,6 +1979,122 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +#ifdef CONFIG_X86_32 +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc(void) +{ + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) + return; + + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + /* + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. + */ + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mp_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.bits.ID); + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(phys_id_present_map, + mp_ioapics[apic].mp_apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mp_apicid); + for (i = 0; i < get_physical_broadcast(); i++) + if (!physid_isset(i, phys_id_present_map)) + break; + if (i >= get_physical_broadcast()) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + physid_set(i, phys_id_present_map); + mp_ioapics[apic].mp_apicid = i; + } else { + physid_mask_t tmp; + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + apic_printk(APIC_VERBOSE, "Setting %d in the " + "phys_id_present_map\n", + mp_ioapics[apic].mp_apicid); + physids_or(phys_id_present_map, phys_id_present_map, tmp); + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mp_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + apic_printk(APIC_VERBOSE, KERN_INFO + "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mp_apicid); + + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE, " ok.\n"); + } +} +#endif + int no_timer_check __initdata; static int __init notimercheck(char *s) @@ -1780,8 +2180,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; @@ -1791,6 +2193,14 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } +#else +static int ioapic_retrigger_irq(unsigned int irq) +{ + send_IPI_self(irq_cfg(irq)->vector); + + return 1; +} +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1952,7 +2362,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; ack_APIC_irq(); +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); me = smp_processor_id(); @@ -2024,6 +2436,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +#ifdef CONFIG_X86_64 static void ack_apic_level(unsigned int irq) { int do_unmask_irq = 0; @@ -2076,6 +2489,49 @@ static void ack_apic_level(unsigned int irq) unmask_IO_APIC_irq(irq); } } +#else +atomic_t irq_mis_count; +static void ack_apic_level(unsigned int irq) +{ + unsigned long v; + int i; + + irq_complete_move(irq); + move_native_irq(irq); + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} +#endif static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2141,20 +2597,24 @@ static inline void init_IO_APIC_traps(void) } } -static void unmask_lapic_irq(unsigned int irq) +/* + * The local APIC irq-chip implementation: + */ + +static void mask_lapic_irq(unsigned int irq) { unsigned long v; v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); } -static void mask_lapic_irq(unsigned int irq) +static void unmask_lapic_irq(unsigned int irq) { unsigned long v; v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } static void ack_lapic_irq (unsigned int irq) @@ -2182,19 +2642,19 @@ static void lapic_register_intr(int irq) static void __init setup_nmi(void) { /* - * Dirty trick to enable the NMI watchdog ... + * Dirty trick to enable the NMI watchdog ... * We put the 8259A master into AEOI mode and * unmask on all local APICs LVT0 as NMI. * * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') * is from Maciej W. Rozycki - so we do not have to EOI from * the NMI handler or the timer interrupt. - */ - printk(KERN_INFO "activating NMI Watchdog ..."); + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); enable_NMI_through_LVT0(); - printk(" done.\n"); + apic_printk(APIC_VERBOSE, " done.\n"); } /* @@ -2211,12 +2671,17 @@ static inline void __init unlock_ExtINT_logic(void) unsigned char save_control, save_freq_select; pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) { + WARN_ON_ONCE(1); + return; + } apic = find_isa_irq_apic(8, mp_INT); - if (pin == -1) + if (apic == -1) { + WARN_ON_ONCE(1); return; + } entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2268,17 +2733,21 @@ int timer_through_8259 __initdata; * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. * - * FIXME: really need to revamp this for modern platforms only. + * FIXME: really need to revamp this for all platforms. */ static inline void __init check_timer(void) { struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; unsigned long flags; + unsigned int ver; int no_pin1 = 0; local_irq_save(flags); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + /* * get/set the timer IRQ vector: */ @@ -2287,10 +2756,18 @@ static inline void __init check_timer(void) /* * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); +#ifdef CONFIG_X86_32 + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2382,6 +2859,9 @@ static inline void __init check_timer(void) "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -2435,19 +2915,29 @@ out: * the I/O APIC in all cases now. No actual device should request * it anyway. --macro */ -#define PIC_IRQS (1<<2) +#define PIC_IRQS (1 << PIC_CASCADE_IR) void __init setup_IO_APIC(void) { +#ifdef CONFIG_X86_32 + enable_IO_APIC(); +#else /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ +#endif io_apic_irqs = ~PIC_IRQS; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); @@ -3128,7 +3618,93 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) #ifdef CONFIG_ACPI -#define IO_APIC_MAX_ID 0xFE +#ifdef CONFIG_X86_32 +int __init io_apic_get_unique_id(int ioapic, int apic_id) +{ + union IO_APIC_reg_00 reg_00; + static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + physid_mask_t tmp; + unsigned long flags; + int i = 0; + + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (physids_empty(apic_id_map)) + apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= get_physical_broadcast()) { + printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.bits.ID); + apic_id = reg_00.bits.ID; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(apic_id_map, apic_id)) { + + for (i = 0; i < get_physical_broadcast(); i++) { + if (!check_apicid_used(apic_id_map, i)) + break; + } + + if (i == get_physical_broadcast()) + panic("Max apic_id exceeded!\n"); + + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + tmp = apicid_to_cpu_present(apic_id); + physids_or(apic_id_map, apic_id_map, tmp); + + if (reg_00.bits.ID != apic_id) { + reg_00.bits.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } + } + + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + + return apic_id; +} + +int __init io_apic_get_version(int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.version; +} +#endif int __init io_apic_get_redir_entries (int ioapic) { @@ -3226,6 +3802,7 @@ void __init setup_ioapic_dest(void) } #endif +#ifdef CONFIG_X86_64 #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -3261,36 +3838,56 @@ static struct resource * __init ioapic_setup_resources(void) return res; } +#endif void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - struct resource *ioapic_res; int i; +#ifdef CONFIG_X86_64 + struct resource *ioapic_res; ioapic_res = ioapic_setup_resources(); +#endif for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif } else { +#ifdef CONFIG_X86_32 +fake_ioapic_page: +#endif ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %016lx (%016lx)\n", + "mapped IOAPIC to %08lx (%08lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; +#ifdef CONFIG_X86_64 if (ioapic_res != NULL) { ioapic_res->start = ioapic_phys; ioapic_res->end = ioapic_phys + (4 * 1024) - 1; ioapic_res++; } +#endif } } +#ifdef CONFIG_X86_64 static int __init ioapic_insert_resources(void) { int i; @@ -3313,4 +3910,4 @@ static int __init ioapic_insert_resources(void) /* Insert the IO APIC resources after PCI initialization has occured to handle * IO APICS that are mapped in on a BAR in PCI space. */ late_initcall(ioapic_insert_resources); - +#endif -- cgit v1.2.3 From 54168ed7f2a4f3fc2780e645124ae952598da601 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Aug 2008 09:07:45 +0200 Subject: x86: make io_apic_32.c the same as io_apic_64.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 1521 ++++++++++++++++++++++++++++++------------ 1 file changed, 1104 insertions(+), 417 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 3ed36041c81..fba6d6ee348 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -35,7 +35,7 @@ #include #include #include -#include /* time_after() */ +#include /* time_after() */ #ifdef CONFIG_ACPI #include #endif @@ -64,8 +64,8 @@ #define __apicdebuginit(type) static type __init /* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes */ int sis_apic_bug = -1; @@ -102,7 +102,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; -static int __init parse_noapic(char *arg) +static int __init parse_noapic(char *str) { /* disable IO-APIC */ disable_ioapic_setup(); @@ -188,7 +188,7 @@ static void __init init_work(void *data) irq_cfgx[legacy_count - 1].next = NULL; } -#define for_each_irq_cfg(cfg) \ +#define for_each_irq_cfg(cfg) \ for (cfg = irq_cfgx; cfg; cfg = cfg->next) DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); @@ -262,7 +262,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) irq_cfgx = cfg; cfg->irq = irq; printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); - #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG { /* dump the results */ @@ -384,9 +383,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i */ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { - volatile struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); + struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -494,11 +493,20 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) apic = entry->apic; pin = entry->pin; +#ifdef CONFIG_INTR_REMAP + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); +#else io_apic_write(apic, 0x11 + pin*2, dest); +#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; - io_apic_modify(apic, 0x10 + pin *2, reg); + io_apic_modify(apic, 0x10 + pin*2, reg); if (!entry->next) break; entry = entry->next; @@ -513,6 +521,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) unsigned long flags; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -529,12 +538,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); + desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); - irq_to_desc(irq)->affinity = mask; + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } - #endif /* CONFIG_SMP */ /* @@ -699,7 +708,7 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq) #endif -static void mask_IO_APIC_irq(unsigned int irq) +static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -708,7 +717,7 @@ static void mask_IO_APIC_irq(unsigned int irq) spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq(unsigned int irq) +static void unmask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -725,14 +734,13 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) return; - /* * Disable it in the IO-APIC irq-routing table: */ ioapic_mask_entry(apic, pin); } -static void clear_IO_APIC(void) +static void clear_IO_APIC (void) { int apic, pin; @@ -741,7 +749,7 @@ static void clear_IO_APIC(void) clear_IO_APIC_pin(apic, pin); } -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) void send_IPI_self(int vector) { unsigned int cfg; @@ -756,9 +764,9 @@ void send_IPI_self(int vector) */ apic_write(APIC_ICR, cfg); } -#endif /* !CONFIG_SMP */ - +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ +#ifdef CONFIG_X86_32 /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -797,6 +805,75 @@ static int __init ioapic_pirq_setup(char *str) } __setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} +#endif /* * Find the IRQ entry number of a certain pin. @@ -848,7 +925,7 @@ static int __init find_isa_irq_apic(int irq, int type) } if (i < mp_irq_entries) { int apic; - for (apic = 0; apic < nr_ioapics; apic++) { + for(apic = 0; apic < nr_ioapics; apic++) { if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } @@ -867,10 +944,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) { int apic, i, best_guess = -1; - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " - "slot:%d, pin:%d.\n", bus, slot, pin); + apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); if (test_bit(bus, mp_bus_not_pci)) { - printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } for (i = 0; i < mp_irq_entries; i++) { @@ -885,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) !mp_irqs[i].mp_irqtype && (bus == lbus) && (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; @@ -902,6 +979,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } return best_guess; } + EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) @@ -918,6 +996,7 @@ static int EISA_ELCR(unsigned int irq) "Broken MPtable reports ISA irq %d\n", irq); return 0; } + #endif /* ISA interrupts are always polarity zero edge triggered, @@ -954,36 +1033,36 @@ static int MPBIOS_polarity(int idx) /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - polarity = test_bit(bus, mp_bus_not_pci)? - default_ISA_polarity(idx): - default_PCI_polarity(idx); - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ + switch (mp_irqs[idx].mp_irqflag & 3) { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } + case 0: /* conforms, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) + polarity = default_ISA_polarity(idx); + else + polarity = default_PCI_polarity(idx); + break; + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } } return polarity; } @@ -996,67 +1075,67 @@ static int MPBIOS_trigger(int idx) /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { - case 0: /* conforms, ie. bus-type dependent */ + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { - trigger = test_bit(bus, mp_bus_not_pci)? - default_ISA_trigger(idx): - default_PCI_trigger(idx); + case 0: /* conforms, ie. bus-type dependent */ + if (test_bit(bus, mp_bus_not_pci)) + trigger = default_ISA_trigger(idx); + else + trigger = default_PCI_trigger(idx); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif break; - } - case MP_BUS_EISA: /* EISA pin */ + case 1: /* edge */ { - trigger = default_EISA_trigger(idx); + trigger = 0; break; } - case MP_BUS_PCI: /* PCI pin */ + case 2: /* reserved */ { - /* set before the switch */ + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; break; } - case MP_BUS_MCA: /* MCA pin */ + case 3: /* level */ { - trigger = default_MCA_trigger(idx); + trigger = 1; break; } - default: + default: /* invalid */ { printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; + trigger = 0; break; } } -#endif - break; - } - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } return trigger; } @@ -1082,9 +1161,9 @@ static int pin_2_irq(int idx, int apic, int pin) if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - if (test_bit(bus, mp_bus_not_pci)) + if (test_bit(bus, mp_bus_not_pci)) { irq = mp_irqs[idx].mp_srcbusirq; - else { + } else { /* * PCI IRQs are mapped in order */ @@ -1092,14 +1171,14 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; - - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } +#ifdef CONFIG_X86_32 /* * PCI IRQ command line redirection. Yes, limits are hardcoded. */ @@ -1116,6 +1195,8 @@ static int pin_2_irq(int idx, int apic, int pin) } } } +#endif + return irq; } @@ -1145,74 +1226,70 @@ static int __assign_irq_vector(int irq, cpumask_t mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - unsigned int old_vector; - int cpu; - struct irq_cfg *cfg; + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + unsigned int old_vector; + int cpu; + struct irq_cfg *cfg; - cfg = irq_cfg(irq); + cfg = irq_cfg(irq); - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; - old_vector = cfg->vector; - if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) - return 0; - } + old_vector = cfg->vector; + if (old_vector) { + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) + return 0; + } - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; - int new_cpu; - int vector, offset; + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; + int new_cpu; + int vector, offset; - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); - vector = current_vector; - offset = current_offset; + vector = current_vector; + offset = current_offset; next: - vector += 8; - if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; + vector += 8; + if (vector >= first_system_vector) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(current_vector == vector)) + continue; #ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; + if (vector == IA32_SYSCALL_VECTOR) + goto next; #else - if (vector == SYSCALL_VECTOR) - goto next; + if (vector == SYSCALL_VECTOR) + goto next; #endif - for_each_cpu_mask_nr(new_cpu, new_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - } - printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector); - for_each_cpu_mask_nr(new_cpu, new_mask) { - per_cpu(vector_irq, new_cpu)[vector] = irq; - printk(KERN_CONT " %d ", new_cpu); + for_each_cpu_mask_nr(new_cpu, new_mask) + if (per_cpu(vector_irq, new_cpu)[vector] != -1) + goto next; + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; } - printk(KERN_CONT "\n"); - cfg->vector = vector; - cfg->domain = domain; - return 0; - } - return -ENOSPC; + for_each_cpu_mask_nr(new_cpu, new_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + return 0; + } + return -ENOSPC; } static int assign_irq_vector(int irq, cpumask_t mask) @@ -1223,7 +1300,6 @@ static int assign_irq_vector(int irq, cpumask_t mask) spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); - return err; } @@ -1269,36 +1345,39 @@ void __setup_vector_irq(int cpu) cfg = irq_cfg(irq); if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; - } + } } static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { - int apic, idx, pin; + int apic, idx, pin; - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; } #else static inline int IO_APIC_irq_trigger(int irq) { - return 1; + return 1; } #endif @@ -1318,13 +1397,27 @@ static void ioapic_register_intr(int irq, unsigned long trigger) else desc->status &= ~IRQ_LEVEL; +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + desc->status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); + handle_fasteoi_irq, + "fasteoi"); else set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); + handle_edge_irq, "edge"); } static int setup_ioapic_entry(int apic, int irq, @@ -1337,11 +1430,45 @@ static int setup_ioapic_entry(int apic, int irq, */ memset(entry,0,sizeof(*entry)); - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; + } - entry->mask = 0; /* enable IRQ */ + entry->mask = 0; /* enable IRQ */ entry->trigger = trigger; entry->polarity = polarity; entry->vector = vector; @@ -1351,12 +1478,11 @@ static int setup_ioapic_entry(int apic, int irq, */ if (trigger) entry->mask = 1; - return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, - int trigger, int polarity) + int trigger, int polarity) { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; @@ -1420,10 +1546,10 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic, pin); - +#ifdef CONFIG_X86_32 if (multi_timer_check(apic, irq)) continue; - +#endif add_pin_to_irq(irq, apic, pin); setup_IO_APIC_irq(apic, pin, irq, @@ -1443,6 +1569,11 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + return; +#endif + memset(&entry, 0, sizeof(entry)); /* @@ -1461,7 +1592,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, * The timer IRQ doesn't have to know that behind the * scene we may have a 8259A-master in AEOI mode ... */ - ioapic_register_intr(0, IOAPIC_EDGE); + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); /* * Add it to the IO-APIC irq-routing table: @@ -1501,17 +1632,18 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); + printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); @@ -1548,7 +1680,10 @@ __apicdebuginit(void) print_IO_APIC(void) entry = ioapic_read_entry(apic, i); - printk(KERN_DEBUG " %02x %02X ", i, entry.dest); + printk(KERN_DEBUG " %02x %03X ", + i, + entry.dest + ); printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", entry.mask, @@ -1567,7 +1702,7 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", cfg->irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -1614,8 +1749,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(v)); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1624,7 +1758,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (APIC_INTEGRATED(ver)) { /* !82489DX */ v = apic_read(APIC_ARBPRI); printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, v & APIC_ARBPRI_MASK); @@ -1650,9 +1784,10 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } @@ -1710,11 +1845,11 @@ __apicdebuginit(void) print_PIC(void) v = inb(0xa0) << 8 | inb(0x20); printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - outb(0x0b, 0xa0); - outb(0x0b, 0x20); + outb(0x0b,0xa0); + outb(0x0b,0x20); v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a, 0xa0); - outb(0x0a, 0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); spin_unlock_irqrestore(&i8259A_lock, flags); @@ -1739,16 +1874,19 @@ fs_initcall(print_all_ICs); /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static void __init enable_IO_APIC(void) +void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; - int i, apic; + int apic; unsigned long flags; +#ifdef CONFIG_X86_32 + int i; if (!pirqs_enabled) for (i = 0; i < MAX_PIRQS; i++) pirq_entries[i] = -1; +#endif /* * The number of IO-APIC IRQ registers (== #pins): @@ -1759,7 +1897,7 @@ static void __init enable_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } - for (apic = 0; apic < nr_ioapics; apic++) { + for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { @@ -1830,16 +1968,18 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = read_apic_id(); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: */ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } + disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +#ifdef CONFIG_X86_32 /* * function to set the IO-APIC physical IDs based on the * values stored in the MPC table. @@ -1940,8 +2080,6 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check @@ -1955,6 +2093,7 @@ static void __init setup_ioapic_ids_from_mpc(void) apic_printk(APIC_VERBOSE, " ok.\n"); } } +#endif int no_timer_check __initdata; @@ -1994,9 +2133,10 @@ static int __init timer_irq_works(void) * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ + + /* jiffies wrap? */ if (time_after(jiffies, t1 + 4)) return 1; - return 0; } @@ -2014,8 +2154,6 @@ static int __init timer_irq_works(void) */ /* - * Startup quirk: - * * Starting up a edge-triggered IO-APIC interrupt is * nasty - we need to make sure that we get the edge. * If it is already asserted for some reason, we need @@ -2023,9 +2161,8 @@ static int __init timer_irq_works(void) * * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... - * - * (We do this for level-triggered IRQs too - it cannot hurt.) */ + static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; @@ -2043,70 +2180,254 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + + struct irq_cfg *cfg = irq_cfg(irq); + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); return 1; } - -#ifdef CONFIG_SMP -asmlinkage void smp_irq_move_cleanup_interrupt(void) +#else +static int ioapic_retrigger_irq(unsigned int irq) { - unsigned vector, me; - ack_APIC_irq(); - irq_enter(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + send_IPI_self(irq_cfg(irq)->vector); - desc = irq_to_desc(irq); - if (!desc) - continue; + return 1; +} +#endif - cfg = irq_cfg(irq); - spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) - goto unlock; +#ifdef CONFIG_SMP - __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; -unlock: - spin_unlock(&desc->lock); - } +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); - irq_exit(); -} +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); -static void irq_complete_move(unsigned int irq) +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg(irq); - unsigned vector, me; + struct irq_cfg *cfg; + struct irq_desc *desc; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte; + unsigned int dest; + unsigned long flags; - if (likely(!cfg->move_in_progress)) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; - vector = ~get_irq_regs()->orig_ax; - me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; + if (get_irte(irq, &irte)) + return; - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc->affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, desc->pending_mask); + + ret = 0; + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + unsigned int irq; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, desc->pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); cfg->move_in_progress = 0; } } #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -2118,55 +2439,55 @@ static void ack_apic_edge(unsigned int irq) #ifdef CONFIG_X86_64 static void ack_apic_level(unsigned int irq) { - int do_unmask_irq = 0; + int do_unmask_irq = 0; - irq_complete_move(irq); + irq_complete_move(irq); #ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_IO_APIC_irq(irq); - } + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } #endif - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(irq)) - move_masked_irq(irq, desc); - unmask_IO_APIC_irq(irq); - } + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propagate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq); + unmask_IO_APIC_irq(irq); + } } #else atomic_t irq_mis_count; @@ -2177,25 +2498,25 @@ static void ack_apic_level(unsigned int irq) irq_complete_move(irq); move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ i = irq_cfg(irq)->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); @@ -2225,6 +2546,20 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif static inline void init_IO_APIC_traps(void) { @@ -2282,7 +2617,7 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } -static void ack_lapic_irq(unsigned int irq) +static void ack_lapic_irq (unsigned int irq) { ack_APIC_irq(); } @@ -2383,12 +2718,12 @@ static inline void __init unlock_ExtINT_logic(void) static int disable_timer_pin_1 __initdata; /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init parse_disable_timer_pin_1(char *arg) +static int __init disable_timer_pin_setup(char *arg) { disable_timer_pin_1 = 1; return 0; } -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); +early_param("disable_timer_pin_1", disable_timer_pin_setup); int timer_through_8259 __initdata; @@ -2397,6 +2732,8 @@ int timer_through_8259 __initdata; * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for all platforms. */ static inline void __init check_timer(void) { @@ -2408,8 +2745,8 @@ static inline void __init check_timer(void) local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2428,7 +2765,9 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); +#ifdef CONFIG_X86_32 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2447,6 +2786,10 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); +#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2473,6 +2816,10 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2512,7 +2859,9 @@ static inline void __init check_timer(void) "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } +#ifdef CONFIG_X86_32 timer_ack = 0; +#endif apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -2570,17 +2919,25 @@ out: void __init setup_IO_APIC(void) { + +#ifdef CONFIG_X86_32 enable_IO_APIC(); +#else + /* + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ +#endif io_apic_irqs = ~PIC_IRQS; - printk("ENABLING IO-APIC IRQs\n"); - - /* - * Set up IO-APIC IRQ routing. - */ - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); @@ -2588,15 +2945,15 @@ void __init setup_IO_APIC(void) } /* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path */ static int __init io_apic_bug_finalize(void) { - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; } late_initcall(io_apic_bug_finalize); @@ -2605,7 +2962,7 @@ struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; }; -static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { @@ -2615,8 +2972,8 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state) data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - entry[i] = ioapic_read_entry(dev->id, i); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) + *entry = ioapic_read_entry(dev->id, i); return 0; } @@ -2653,14 +3010,14 @@ static struct sysdev_class ioapic_sysdev_class = { static int __init ioapic_init_sysfs(void) { - struct sys_device *dev; - int i, size, error = 0; + struct sys_device * dev; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) return error; - for (i = 0; i < nr_ioapics; i++) { + for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); @@ -2691,18 +3048,18 @@ device_initcall(ioapic_init_sysfs); unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - unsigned int irq, new; + unsigned int irq; + unsigned int new; unsigned long flags; struct irq_cfg *cfg_new; #ifndef CONFIG_HAVE_SPARSE_IRQ - /* only can use bus/dev/fn.. when per_cpu vector is used */ irq_want = nr_irqs - 1; #endif irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (nr_irqs - 1); new > 0; new--) { + for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; cfg_new = irq_cfg(new); @@ -2725,7 +3082,14 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { - return create_irq_nr(nr_irqs - 1); + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; } void destroy_irq(unsigned int irq) @@ -2734,6 +3098,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2759,25 +3126,54 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); + } return err; } @@ -2788,6 +3184,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2808,8 +3205,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_to_desc(irq)->affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2827,6 +3277,45 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { @@ -2840,7 +3329,17 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) set_irq_msi(irq, desc); write_msi_msg(irq, &msg); - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_to_desc(irq); + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); return 0; } @@ -2859,59 +3358,164 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - int irq, ret; - + unsigned int irq; + int ret; unsigned int irq_want; irq_want = build_irq_for_pci_dev(dev) + 0x100; irq = create_irq_nr(irq_want); - if (irq == 0) return -1; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; - } - + } return 0; + +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif } int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - unsigned int irq; - int ret, sub_handle; - struct msi_desc *desc; - unsigned int irq_want; - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); - if (irq == 0) - return -1; - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; + unsigned int irq; + int ret, sub_handle; + struct msi_desc *desc; + unsigned int irq_want; + +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; error: - destroy_irq(irq); - return ret; + destroy_irq(irq); + return ret; } - void arch_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ @@ -2938,6 +3542,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2951,7 +3556,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - irq_to_desc(irq)->affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif @@ -2974,7 +3580,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if ( !err) { + if (!err) { struct ht_irq_msg msg; unsigned dest; @@ -3007,11 +3613,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) #endif /* CONFIG_HT_IRQ */ /* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration + ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_32 int __init io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; @@ -3086,7 +3693,6 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } - int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3098,9 +3704,9 @@ int __init io_apic_get_version(int ioapic) return reg_01.bits.version; } +#endif - -int __init io_apic_get_redir_entries(int ioapic) +int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3113,10 +3719,10 @@ int __init io_apic_get_redir_entries(int ioapic) } -int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity) +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { if (!IO_APIC_IRQ(irq)) { - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } @@ -3132,6 +3738,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int po return 0; } + int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) { int i; @@ -3163,7 +3770,6 @@ void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; struct irq_cfg *cfg; - struct irq_desc *desc; if (skip_ioapic_setup == 1) return; @@ -3184,43 +3790,124 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); - else { - desc = irq_to_desc(irq); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + else set_ioapic_affinity_irq(irq, TARGET_CPUS); - } } } } #endif +#ifdef CONFIG_X86_64 +#define IOAPIC_RESOURCE_NAME_SIZE 11 + +static struct resource *ioapic_resources; + +static struct resource * __init ioapic_setup_resources(void) +{ + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} +#endif + void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; +#ifdef CONFIG_X86_64 + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); +#endif for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif } else { +#ifdef CONFIG_X86_32 fake_ioapic_page: +#endif ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, + "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); idx++; + +#ifdef CONFIG_X86_64 + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } +#endif } } +#ifdef CONFIG_X86_64 +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk(KERN_ERR + "IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif -- cgit v1.2.3 From 26d347c2c035b1f4c5b3c5094f3046db9ec920f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:42 -0700 Subject: rename io_apic_64.c and io_apic_32.c to io_apic.c The two files are now line by line equal. (sans a printk) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 3913 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/io_apic_32.c | 3913 ------------------------------------------ arch/x86/kernel/io_apic_64.c | 3913 ------------------------------------------ 4 files changed, 3914 insertions(+), 7827 deletions(-) create mode 100644 arch/x86/kernel/io_apic.c delete mode 100644 arch/x86/kernel/io_apic_32.c delete mode 100644 arch/x86/kernel/io_apic_64.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0d41f0343dc..7264f9c3af8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,7 +61,7 @@ obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o -obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c new file mode 100644 index 00000000000..fba6d6ee348 --- /dev/null +++ b/arch/x86/kernel/io_apic.c @@ -0,0 +1,3913 @@ +/* + * Intel IO-APIC support for multi-Pentium hosts. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * + * Many thanks to Stig Venaas for trying out countless experimental + * patches and reporting/debugging problems patiently! + * + * (c) 1999, Multiple IO-APIC support, developed by + * Ken-ichi Yaku and + * Hidemi Kishimoto , + * further tested and cleaned up by Zach Brown + * and Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively + * Paul Diefenbaugh : Added full ACPI support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* time_after() */ +#ifdef CONFIG_ACPI +#include +#endif +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define __apicdebuginit(type) static type __init + +/* + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes + */ +int sis_apic_bug = -1; + +static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); + +int first_free_entry; +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +int pin_map_size; + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* I/O APIC entries */ +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +int nr_ioapics; + +/* MP IRQ source entries */ +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* # of MP IRQ source entries */ +int mp_irq_entries; + +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +int skip_ioapic_setup; + +static int __init parse_noapic(char *str) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); + +struct irq_cfg; +struct irq_pin_list; +struct irq_cfg { + unsigned int irq; + struct irq_cfg *next; + struct irq_pin_list *irq_2_pin; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; + u8 vector; + u8 move_in_progress : 1; +}; + +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +static struct irq_cfg irq_cfg_legacy[] __initdata = { + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, +}; + +static struct irq_cfg irq_cfg_init = { .irq = -1U, }; +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; +} + +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +static void init_one_irq_cfg(struct irq_cfg *cfg) +{ + memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); +} + +static struct irq_cfg *irq_cfgx; +static struct irq_cfg *irq_cfgx_free; +static void __init init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_cfg *cfg; + int legacy_count; + int i; + + cfg = *da->name; + + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (i = legacy_count; i < *da->nr; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < *da->nr; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = &irq_cfgx[legacy_count]; + irq_cfgx[legacy_count - 1].next = NULL; +} + +#define for_each_irq_cfg(cfg) \ + for (cfg = irq_cfgx; cfg; cfg = cfg->next) + +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + struct irq_cfg *cfg; + + cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + cfg = cfg->next; + } + + return NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + struct irq_cfg *cfg, *cfg_pri; + int i; + int count = 0; + + cfg_pri = cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + cfg_pri = cfg; + cfg = cfg->next; + count++; + } + + if (!irq_cfgx_free) { + unsigned long phys; + unsigned long total_bytes; + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + + total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; + if (after_bootmem) + cfg = kzalloc(total_bytes, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); + + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); + + phys = __pa(cfg); + printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); + + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = cfg; + } + + cfg = irq_cfgx_free; + irq_cfgx_free = irq_cfgx_free->next; + cfg->next = NULL; + if (cfg_pri) + cfg_pri->next = cfg; + else + irq_cfgx = cfg; + cfg->irq = irq; + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); +#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG + { + /* dump the results */ + struct irq_cfg *cfg; + unsigned long phys; + unsigned long bytes = sizeof(struct irq_cfg); + + printk(KERN_DEBUG "=========================== %d\n", irq); + printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); + for_each_irq_cfg(cfg) { + phys = __pa(cfg); + printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); + } + printk(KERN_DEBUG "===========================\n"); + } +#endif + return cfg; +} + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *irq_2_pin_head; +/* fill one page ? */ +static int nr_irq_2_pin = 0x100; +static struct irq_pin_list *irq_2_pin_ptr; +static void __init irq_2_pin_init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_pin_list *pin; + int i; + + pin = *da->name; + + for (i = 1; i < *da->nr; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} +DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin; + int i; + + pin = irq_2_pin_ptr; + + if (pin) { + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); + + if (after_bootmem) + pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, + GFP_ATOMIC); + else + pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * + nr_irq_2_pin, PAGE_SIZE, 0); + + if (!pin) + panic("can not get more irq_2_pin\n"); + + for (i = 1; i < nr_irq_2_pin; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = pin->next; + pin->next = NULL; + + return pin; +} + +struct io_apic { + unsigned int index; + unsigned int unused[3]; + unsigned int data; +}; + +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) +{ + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); +} + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); + return readl(&io_apic->data); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); + writel(value, &io_apic->data); +} + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index register + */ +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); + writel(value, &io_apic->data); +} + +#ifdef CONFIG_X86_64 +static bool io_apic_level_ack_pending(unsigned int irq) +{ + struct irq_pin_list *entry; + unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + int pin; + + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + if (reg & IO_APIC_REDIR_REMOTE_IRR) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return true; + } + if (!entry->next) + break; + entry = entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return false; +} +#endif + +union entry_union { + struct { u32 w1, w2; }; + struct IO_APIC_route_entry entry; +}; + +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +{ + union entry_union eu; + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; +} + +/* + * When we write a new IO APIC routing entry, we need to write the high + * word first! If the mask bit in the low word is clear, we will enable + * the interrupt, and we need to make sure the entry is fully populated + * before that happens. + */ +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + union entry_union eu; + eu.entry = e; + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +/* + * When we mask an IO APIC routing entry, we need to write the low + * word first, in order to set the mask bit before we change the + * high bits! + */ +static void ioapic_mask_entry(int apic, int pin) +{ + unsigned long flags; + union entry_union eu = { .entry.mask = 1 }; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#ifdef CONFIG_SMP +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + int apic, pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; +#ifdef CONFIG_INTR_REMAP + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); +#else + io_apic_write(apic, 0x11 + pin*2, dest); +#endif + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +static int assign_irq_vector(int irq, cpumask_t mask); + +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + cfg = irq_cfg(irq); + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + desc = irq_to_desc(irq); + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + desc->affinity = mask; + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif /* CONFIG_SMP */ + +/* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +static void add_pin_to_irq(unsigned int irq, int apic, int pin) +{ + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); + return; + } + + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; + + entry = entry->next; + } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); +} + +/* + * Reroute an IRQ to a different pin. + */ +static void __init replace_pin_at_irq(unsigned int irq, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; + + while (entry) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + replaced = 1; + /* every one is different, right? */ + break; + } + entry = entry->next; + } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); +} + +#ifdef CONFIG_X86_64 +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ + \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ + for (;;) { \ + unsigned int reg; \ + if (!entry) \ + break; \ + pin = entry->pin; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ + FINAL; \ + if (!entry->next) \ + break; \ + entry = entry->next; \ + } \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + +/* mask = 0 */ +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) + +#else + +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +{ + struct irq_cfg *cfg; + struct irq_pin_list *entry; + unsigned int pin, reg; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + reg &= ~disable; + reg |= enable; + io_apic_modify(entry->apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +/* mask = 1 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); +} + +/* mask = 0 */ +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); +} + +/* mask = 1, trigger = 0 */ +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER); +} + +/* mask = 0, trigger = 1 */ +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED); +} + +#endif + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +{ + struct IO_APIC_route_entry entry; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ + entry = ioapic_read_entry(apic, pin); + if (entry.delivery_mode == dest_SMI) + return; + /* + * Disable it in the IO-APIC irq-routing table: + */ + ioapic_mask_entry(apic, pin); +} + +static void clear_IO_APIC (void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); +} + +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) +void send_IPI_self(int vector) +{ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write(APIC_ICR, cfg); +} +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ + +#ifdef CONFIG_X86_32 +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +static int pirq_entries [MAX_PIRQS]; +static int pirqs_enabled; + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, KERN_INFO + "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} +#endif + +/* + * Find the IRQ entry number of a certain pin. + */ +static int find_irq_entry(int apic, int pin, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) + return i; + + return -1; +} + +/* + * Find the pin to which IRQ[irq] (ISA) is connected + */ +static int __init find_isa_irq_pin(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mp_srcbus; + + if (test_bit(lbus, mp_bus_not_pci) && + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) + + return mp_irqs[i].mp_dstirq; + } + return -1; +} + +static int __init find_isa_irq_apic(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mp_srcbus; + + if (test_bit(lbus, mp_bus_not_pci) && + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) + break; + } + if (i < mp_irq_entries) { + int apic; + for(apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + return apic; + } + } + + return -1; +} + +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +static int pin_2_irq(int idx, int apic, int pin); + +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +{ + int apic, i, best_guess = -1; + + apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (test_bit(bus, mp_bus_not_pci)) { + apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mp_srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) + break; + + if (!test_bit(lbus, mp_bus_not_pci) && + !mp_irqs[i].mp_irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + return irq; + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) + best_guess = irq; + } + } + return best_guess; +} + +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +/* + * EISA Edge/Level control register, ELCR + */ +static int EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, KERN_INFO + "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +#endif + +/* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +#define default_ISA_trigger(idx) (0) +#define default_ISA_polarity(idx) (0) + +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_polarity(idx) default_ISA_polarity(idx) + +/* PCI interrupts are always polarity one level triggered, + * when listed as conforming in the MP table. */ + +#define default_PCI_trigger(idx) (1) +#define default_PCI_polarity(idx) (1) + +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) default_ISA_polarity(idx) + +static int MPBIOS_polarity(int idx) +{ + int bus = mp_irqs[idx].mp_srcbus; + int polarity; + + /* + * Determine IRQ line polarity (high active or low active): + */ + switch (mp_irqs[idx].mp_irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) + polarity = default_ISA_polarity(idx); + else + polarity = default_PCI_polarity(idx); + break; + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + return polarity; +} + +static int MPBIOS_trigger(int idx) +{ + int bus = mp_irqs[idx].mp_srcbus; + int trigger; + + /* + * Determine IRQ trigger mode (edge or level sensitive): + */ + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ + if (test_bit(bus, mp_bus_not_pci)) + trigger = default_ISA_trigger(idx); + else + trigger = default_PCI_trigger(idx); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif + break; + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } + return trigger; +} + +static inline int irq_polarity(int idx) +{ + return MPBIOS_polarity(idx); +} + +static inline int irq_trigger(int idx) +{ + return MPBIOS_trigger(idx); +} + +int (*ioapic_renumber_irq)(int ioapic, int irq); +static int pin_2_irq(int idx, int apic, int pin) +{ + int irq, i; + int bus = mp_irqs[idx].mp_srcbus; + + /* + * Debugging check, we are in big trouble if this message pops up! + */ + if (mp_irqs[idx].mp_dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + + if (test_bit(bus, mp_bus_not_pci)) { + irq = mp_irqs[idx].mp_srcbusirq; + } else { + /* + * PCI IRQs are mapped in order + */ + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); + } + +#ifdef CONFIG_X86_32 + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + apic_printk(APIC_VERBOSE, KERN_DEBUG + "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } +#endif + + return irq; +} + +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + spin_unlock(&vector_lock); +} + +static int __assign_irq_vector(int irq, cpumask_t mask) +{ + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + unsigned int old_vector; + int cpu; + struct irq_cfg *cfg; + + cfg = irq_cfg(irq); + + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); + + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; + + old_vector = cfg->vector; + if (old_vector) { + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) + return 0; + } + + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; + int new_cpu; + int vector, offset; + + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); + + vector = current_vector; + offset = current_offset; +next: + vector += 8; + if (vector >= first_system_vector) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(current_vector == vector)) + continue; +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) + goto next; +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif + for_each_cpu_mask_nr(new_cpu, new_mask) + if (per_cpu(vector_irq, new_cpu)[vector] != -1) + goto next; + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + } + for_each_cpu_mask_nr(new_cpu, new_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + return 0; + } + return -ENOSPC; +} + +static int assign_irq_vector(int irq, cpumask_t mask) +{ + int err; + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + err = __assign_irq_vector(irq, mask); + spin_unlock_irqrestore(&vector_lock, flags); + return err; +} + +static void __clear_irq_vector(int irq) +{ + struct irq_cfg *cfg; + cpumask_t mask; + int cpu, vector; + + cfg = irq_cfg(irq); + BUG_ON(!cfg->vector); + + vector = cfg->vector; + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + cfg->vector = 0; + cpus_clear(cfg->domain); +} + +void __setup_vector_irq(int cpu) +{ + /* Initialize vector_irq on a new cpu */ + /* This function must be called with vector_lock held */ + int irq, vector; + struct irq_cfg *cfg; + + /* Mark the inuse vectors */ + for_each_irq_cfg(cfg) { + if (!cpu_isset(cpu, cfg->domain)) + continue; + vector = cfg->vector; + irq = cfg->irq; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq < 0) + continue; + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } +} + +static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#ifdef CONFIG_X86_32 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif + +static void ioapic_register_intr(int irq, unsigned long trigger) +{ + struct irq_desc *desc; + + /* first time to use this irq_desc */ + if (irq < 16) + desc = irq_to_desc(irq); + else + desc = irq_to_desc_alloc(irq); + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + desc->status |= IRQ_LEVEL; + else + desc->status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + desc->status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; + } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; +} + +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, + int trigger, int polarity) +{ + struct irq_cfg *cfg; + struct IO_APIC_route_entry entry; + cpumask_t mask; + + if (!IO_APIC_IRQ(irq)) + return; + + cfg = irq_cfg(irq); + + mask = TARGET_CPUS; + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(mask, cfg->domain, mask); + + apic_printk(APIC_VERBOSE,KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + irq, trigger, polarity); + + + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } + + ioapic_register_intr(irq, trigger); + if (irq < 16) + disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + idx = find_irq_entry(apic,pin,mp_INT); + if (idx == -1) { + if (first_notcon) { + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); + first_notcon = 0; + } else + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); + continue; + } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + + irq = pin_2_irq(idx, apic, pin); +#ifdef CONFIG_X86_32 + if (multi_timer_check(apic, irq)) + continue; +#endif + add_pin_to_irq(irq, apic, pin); + + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); + } + } + + if (!first_notcon) + apic_printk(APIC_VERBOSE, " not connected.\n"); +} + +/* + * Set up the timer pin, possibly with the 8259A-master behind. + */ +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, + int vector) +{ + struct IO_APIC_route_entry entry; + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + return; +#endif + + memset(&entry, 0, sizeof(entry)); + + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = INT_DEST_MODE; + entry.mask = 1; /* mask IRQ now */ + entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.polarity = 0; + entry.trigger = 0; + entry.vector = vector; + + /* + * The timer IRQ doesn't have to know that behind the + * scene we may have a 8259A-master in AEOI mode ... + */ + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + + /* + * Add it to the IO-APIC irq-routing table: + */ + ioapic_write_entry(apic, pin, entry); +} + + +__apicdebuginit(void) print_IO_APIC(void) +{ + int apic, i; + union IO_APIC_reg_00 reg_00; + union IO_APIC_reg_01 reg_01; + union IO_APIC_reg_02 reg_02; + union IO_APIC_reg_03 reg_03; + unsigned long flags; + struct irq_cfg *cfg; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (i = 0; i < nr_ioapics; i++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + reg_01.raw = io_apic_read(apic, 1); + if (reg_01.bits.version >= 0x10) + reg_02.raw = io_apic_read(apic, 2); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk("\n"); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); + + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); + + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); + + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); + } + + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && + reg_03.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); + } + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" + " Stat Dmod Deli Vect: \n"); + + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + printk(KERN_DEBUG " %02x %03X ", + i, + entry.dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector + ); + } + } + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for_each_irq_cfg(cfg) { + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) + continue; + printk(KERN_DEBUG "IRQ%d ", cfg->irq); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) + break; + entry = entry->next; + } + printk("\n"); + } + + printk(KERN_INFO ".................................... done.\n"); + + return; +} + +__apicdebuginit(void) print_APIC_bitfield(int base) +{ + unsigned int v; + int i, j; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } + + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + + v = apic_read(APIC_LVTT); + printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + printk("\n"); +} + +__apicdebuginit(void) print_all_local_APICs(void) +{ + on_each_cpu(print_local_APIC, NULL, 1); +} + +__apicdebuginit(void) print_PIC(void) +{ + unsigned int v; + unsigned long flags; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "\nprinting PIC contents\n"); + + spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + printk(KERN_DEBUG "... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + printk(KERN_DEBUG "... PIC IRR: %04x\n", v); + + outb(0x0b,0xa0); + outb(0x0b,0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); + + spin_unlock_irqrestore(&i8259A_lock, flags); + + printk(KERN_DEBUG "... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +} + +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + + +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + +void __init enable_IO_APIC(void) +{ + union IO_APIC_reg_01 reg_01; + int i8259_apic, i8259_pin; + int apic; + unsigned long flags; + +#ifdef CONFIG_X86_32 + int i; + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; +#endif + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; + /* See if any of the pins is in ExtINT mode */ + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + entry = ioapic_read_entry(apic, pin); + + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; + } + } + } + found_i8259: + /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ + i8259_pin = find_isa_irq_pin(0, mp_ExtINT); + i8259_apic = find_isa_irq_apic(0, mp_ExtINT); + /* Trust the MP table if nothing is setup in the hardware */ + if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { + printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); + ioapic_i8259.pin = i8259_pin; + ioapic_i8259.apic = i8259_apic; + } + /* Complain if the MP table and the hardware disagree */ + if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && + (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) + { + printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); + } + + /* + * Do not trust the IO-APIC being empty at bootup + */ + clear_IO_APIC(); +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ + /* + * Clear the IO-APIC before rebooting: + */ + clear_IO_APIC(); + + /* + * If the i8259 is routed through an IOAPIC + * Put that IOAPIC in virtual wire mode + * so legacy interrupts can be delivered. + */ + if (ioapic_i8259.pin != -1) { + struct IO_APIC_route_entry entry; + + memset(&entry, 0, sizeof(entry)); + entry.mask = 0; /* Enabled */ + entry.trigger = 0; /* Edge */ + entry.irr = 0; + entry.polarity = 0; /* High */ + entry.delivery_status = 0; + entry.dest_mode = 0; /* Physical */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ + entry.vector = 0; + entry.dest = read_apic_id(); + + /* + * Add it to the IO-APIC irq-routing table: + */ + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); + } + + disconnect_bsp_APIC(ioapic_i8259.pin != -1); +} + +#ifdef CONFIG_X86_32 +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc(void) +{ + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) + return; + + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + /* + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. + */ + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mp_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.bits.ID); + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(phys_id_present_map, + mp_ioapics[apic].mp_apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mp_apicid); + for (i = 0; i < get_physical_broadcast(); i++) + if (!physid_isset(i, phys_id_present_map)) + break; + if (i >= get_physical_broadcast()) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + physid_set(i, phys_id_present_map); + mp_ioapics[apic].mp_apicid = i; + } else { + physid_mask_t tmp; + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + apic_printk(APIC_VERBOSE, "Setting %d in the " + "phys_id_present_map\n", + mp_ioapics[apic].mp_apicid); + physids_or(phys_id_present_map, phys_id_present_map, tmp); + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mp_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + apic_printk(APIC_VERBOSE, KERN_INFO + "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mp_apicid); + + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE, " ok.\n"); + } +} +#endif + +int no_timer_check __initdata; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + +/* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: + * + * - timer IRQ defaults to IO-APIC IRQ + * - if this function detects that timer IRQs are defunct, then we fall + * back to ISA timer IRQs + */ +static int __init timer_irq_works(void) +{ + unsigned long t1 = jiffies; + unsigned long flags; + + if (no_timer_check) + return 1; + + local_save_flags(flags); + local_irq_enable(); + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); + local_irq_restore(flags); + + /* + * Expect a few ticks at least, to be sure some possible + * glue logic does not lock up after one or two first + * ticks in a non-ExtINT mode. Also the local APIC + * might have cached one ExtINT interrupt. Finally, at + * least one tick may be lost due to delays. + */ + + /* jiffies wrap? */ + if (time_after(jiffies, t1 + 4)) + return 1; + return 0; +} + +/* + * In the SMP+IOAPIC case it might happen that there are an unspecified + * number of pending IRQ events unhandled. These cases are very rare, + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much + * better to do it this way as thus we do not have to be aware of + * 'pending' interrupts in the IRQ path, except at this point. + */ +/* + * Edge triggered needs to resend any interrupt + * that was delayed but this is now handled in the device + * independent code. + */ + +/* + * Starting up a edge-triggered IO-APIC interrupt is + * nasty - we need to make sure that we get the edge. + * If it is already asserted for some reason, we need + * return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ + +static unsigned int startup_ioapic_irq(unsigned int irq) +{ + int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + if (irq < 16) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; + } + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +} + +#ifdef CONFIG_X86_64 +static int ioapic_retrigger_irq(unsigned int irq) +{ + + struct irq_cfg *cfg = irq_cfg(irq); + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +} +#else +static int ioapic_retrigger_irq(unsigned int irq) +{ + send_IPI_self(irq_cfg(irq)->vector); + + return 1; +} +#endif + +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ + +#ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc->affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, desc->pending_mask); + + ret = 0; + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + unsigned int irq; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, desc->pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } +} +#else +static inline void irq_complete_move(unsigned int irq) {} +#endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif + +static void ack_apic_edge(unsigned int irq) +{ + irq_complete_move(irq); + move_native_irq(irq); + ack_APIC_irq(); +} + +#ifdef CONFIG_X86_64 +static void ack_apic_level(unsigned int irq) +{ + int do_unmask_irq = 0; + + irq_complete_move(irq); +#ifdef CONFIG_GENERIC_PENDING_IRQ + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } +#endif + + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propagate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq); + unmask_IO_APIC_irq(irq); + } +} +#else +atomic_t irq_mis_count; +static void ack_apic_level(unsigned int irq) +{ + unsigned long v; + int i; + + irq_complete_move(irq); + move_native_irq(irq); + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} +#endif + +static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + +static inline void init_IO_APIC_traps(void) +{ + int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for_each_irq_cfg(cfg) { + irq = cfg->irq; + if (IO_APIC_IRQ(irq) && !cfg->vector) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ + if (irq < 16) + make_8259A_irq(irq); + else { + desc = irq_to_desc(irq); + /* Strange. Oh, well.. */ + desc->chip = &no_irq_chip; + } + } + } +} + +/* + * The local APIC irq-chip implementation: + */ + +static void mask_lapic_irq(unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); +} + +static void unmask_lapic_irq(unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); +} + +static void ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +static struct irq_chip lapic_chip __read_mostly = { + .name = "local-APIC", + .mask = mask_lapic_irq, + .unmask = unmask_lapic_irq, + .ack = ack_lapic_irq, +}; + +static void lapic_register_intr(int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; + set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, + "edge"); +} + +static void __init setup_nmi(void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); + + enable_NMI_through_LVT0(); + + apic_printk(APIC_VERBOSE, " done.\n"); +} + +/* + * This looks a bit hackish but it's about the only one way of sending + * a few INTA cycles to 8259As and any associated glue logic. ICR does + * not support the ExtINT mode, unfortunately. We need to send these + * cycles as some i82489DX-based boards have glue logic that keeps the + * 8259A interrupt line asserted until INTA. --macro + */ +static inline void __init unlock_ExtINT_logic(void) +{ + int apic, pin, i; + struct IO_APIC_route_entry entry0, entry1; + unsigned char save_control, save_freq_select; + + pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) { + WARN_ON_ONCE(1); + return; + } + apic = find_isa_irq_apic(8, mp_INT); + if (apic == -1) { + WARN_ON_ONCE(1); + return; + } + + entry0 = ioapic_read_entry(apic, pin); + clear_IO_APIC_pin(apic, pin); + + memset(&entry1, 0, sizeof(entry1)); + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ + entry1.dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; + entry1.vector = 0; + + ioapic_write_entry(apic, pin, entry1); + + save_control = CMOS_READ(RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, + RTC_FREQ_SELECT); + CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); + + i = 100; + while (i-- > 0) { + mdelay(10); + if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) + i -= 10; + } + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + clear_IO_APIC_pin(apic, pin); + + ioapic_write_entry(apic, pin, entry0); +} + +static int disable_timer_pin_1 __initdata; +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ +static int __init disable_timer_pin_setup(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", disable_timer_pin_setup); + +int timer_through_8259 __initdata; + +/* + * This code may look a bit paranoid, but it's supposed to cooperate with + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast + * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for all platforms. + */ +static inline void __init check_timer(void) +{ + struct irq_cfg *cfg = irq_cfg(0); + int apic1, pin1, apic2, pin2; + unsigned long flags; + unsigned int ver; + int no_pin1 = 0; + + local_irq_save(flags); + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); + assign_irq_vector(0, TARGET_CPUS); + + /* + * As IRQ0 is to be enabled in the 8259A, the virtual + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. + */ + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + init_8259A(1); +#ifdef CONFIG_X86_32 + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif + + pin1 = find_isa_irq_pin(0, mp_INT); + apic1 = find_isa_irq_apic(0, mp_INT); + pin2 = ioapic_i8259.pin; + apic2 = ioapic_i8259.apic; + + apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " + "apic1=%d pin1=%d apic2=%d pin2=%d\n", + cfg->vector, apic1, pin1, apic2, pin2); + + /* + * Some BIOS writers are clueless and report the ExtINTA + * I/O APIC input from the cascaded 8259A as the timer + * interrupt input. So just in case, if only one pin + * was found above, try it both directly and through the + * 8259A. + */ + if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); +#endif + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; + } else if (pin2 == -1) { + pin2 = pin1; + apic2 = apic1; + } + + if (pin1 != -1) { + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (no_pin1) { + add_pin_to_irq(0, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } + unmask_IO_APIC_irq(0); + if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + enable_8259A_irq(0); + } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); + goto out; + } +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif + clear_IO_APIC_pin(apic1, pin1); + if (!no_pin1) + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); + + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " + "(IRQ0) through the 8259A ...\n"); + apic_printk(APIC_QUIET, KERN_INFO + "..... (found apic %d pin %d) ...\n", apic2, pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); + unmask_IO_APIC_irq(0); + enable_8259A_irq(0); + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); + timer_through_8259 = 1; + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + } + goto out; + } + /* + * Cleanup, just in case ... + */ + disable_8259A_irq(0); + clear_IO_APIC_pin(apic2, pin2); + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); + } + + if (nmi_watchdog == NMI_IO_APIC) { + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " + "through the IO-APIC - disabling NMI Watchdog!\n"); + nmi_watchdog = NMI_NONE; + } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif + + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as Virtual Wire IRQ...\n"); + + lapic_register_intr(0); + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + goto out; + } + disable_8259A_irq(0); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); + + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as ExtINT IRQ...\n"); + + init_8259A(0); + make_8259A_irq(0); + apic_write(APIC_LVT0, APIC_DM_EXTINT); + + unlock_ExtINT_logic(); + + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + goto out; + } + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " + "report. Then try booting with the 'noapic' option.\n"); +out: + local_irq_restore(flags); +} + +/* + * Traditionally ISA IRQ2 is the cascade IRQ, and is not available + * to devices. However there may be an I/O APIC pin available for + * this interrupt regardless. The pin may be left unconnected, but + * typically it will be reused as an ExtINT cascade interrupt for + * the master 8259A. In the MPS case such a pin will normally be + * reported as an ExtINT interrupt in the MP table. With ACPI + * there is no provision for ExtINT interrupts, and in the absence + * of an override it would be treated as an ordinary ISA I/O APIC + * interrupt, that is edge-triggered and unmasked by default. We + * used to do this, but it caused problems on some systems because + * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using + * the same ExtINT cascade interrupt to drive the local APIC of the + * bootstrap processor. Therefore we refrain from routing IRQ2 to + * the I/O APIC in all cases now. No actual device should request + * it anyway. --macro + */ +#define PIC_IRQS (1 << PIC_CASCADE_IR) + +void __init setup_IO_APIC(void) +{ + +#ifdef CONFIG_X86_32 + enable_IO_APIC(); +#else + /* + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ +#endif + + io_apic_irqs = ~PIC_IRQS; + + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif + sync_Arb_IDs(); + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); + check_timer(); +} + +/* + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path + */ + +static int __init io_apic_bug_finalize(void) +{ + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; +} + +late_initcall(io_apic_bug_finalize); + +struct sysfs_ioapic_data { + struct sys_device dev; + struct IO_APIC_route_entry entry[0]; +}; +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; + +static int ioapic_suspend(struct sys_device *dev, pm_message_t state) +{ + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) + *entry = ioapic_read_entry(dev->id, i); + + return 0; +} + +static int ioapic_resume(struct sys_device *dev) +{ + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; + unsigned long flags; + union IO_APIC_reg_00 reg_00; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(dev->id, 0); + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + io_apic_write(dev->id, 0, reg_00.raw); + } + spin_unlock_irqrestore(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) + ioapic_write_entry(dev->id, i, entry[i]); + + return 0; +} + +static struct sysdev_class ioapic_sysdev_class = { + .name = "ioapic", + .suspend = ioapic_suspend, + .resume = ioapic_resume, +}; + +static int __init ioapic_init_sysfs(void) +{ + struct sys_device * dev; + int i, size, error; + + error = sysdev_class_register(&ioapic_sysdev_class); + if (error) + return error; + + for (i = 0; i < nr_ioapics; i++ ) { + size = sizeof(struct sys_device) + nr_ioapic_registers[i] + * sizeof(struct IO_APIC_route_entry); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); + if (!mp_ioapic_data[i]) { + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); + continue; + } + dev = &mp_ioapic_data[i]->dev; + dev->id = i; + dev->cls = &ioapic_sysdev_class; + error = sysdev_register(dev); + if (error) { + kfree(mp_ioapic_data[i]); + mp_ioapic_data[i] = NULL; + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); + continue; + } + } + + return 0; +} + +device_initcall(ioapic_init_sysfs); + +/* + * Dynamic irq allocate and deallocation + */ +unsigned int create_irq_nr(unsigned int irq_want) +{ + /* Allocate an unused irq */ + unsigned int irq; + unsigned int new; + unsigned long flags; + struct irq_cfg *cfg_new; + +#ifndef CONFIG_HAVE_SPARSE_IRQ + irq_want = nr_irqs - 1; +#endif + + irq = 0; + spin_lock_irqsave(&vector_lock, flags); + for (new = irq_want; new > 0; new--) { + if (platform_legacy_irq(new)) + continue; + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) + continue; + /* check if need to create one */ + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); + if (__assign_irq_vector(new, TARGET_CPUS) == 0) + irq = new; + break; + } + spin_unlock_irqrestore(&vector_lock, flags); + + if (irq > 0) { + dynamic_irq_init(irq); + } + return irq; +} + +int create_irq(void) +{ + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; +} + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + dynamic_irq_cleanup(irq); + +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); +} + +/* + * MSI message composition + */ +#ifdef CONFIG_PCI_MSI +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + cpumask_t tmp; + + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (err) + return err; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); + } + return err; +} + +#ifdef CONFIG_SMP +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + read_msi_msg(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif +#endif /* CONFIG_SMP */ + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip msi_chip = { + .name = "PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_to_desc(irq); + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ + unsigned int irq; + int ret; + unsigned int irq_want; + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + if (irq == 0) + return -1; + +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) { + destroy_irq(irq); + return ret; + } + return 0; + +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} + +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int irq; + int ret, sub_handle; + struct msi_desc *desc; + unsigned int irq_want; + +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; + +error: + destroy_irq(irq); + return ret; +} + +void arch_teardown_msi_irq(unsigned int irq) +{ + destroy_irq(irq); +} + +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +#endif /* CONFIG_PCI_MSI */ +/* + * Hypertransport interrupt support + */ +#ifdef CONFIG_HT_IRQ + +#ifdef CONFIG_SMP + +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + struct ht_irq_msg msg; + fetch_ht_irq_msg(irq, &msg); + + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); + + write_ht_irq_msg(irq, &msg); +} + +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + target_ht_irq(irq, dest, cfg->vector); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif + +static struct irq_chip ht_irq_chip = { + .name = "PCI-HT", + .mask = mask_ht_irq, + .unmask = unmask_ht_irq, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = set_ht_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +{ + struct irq_cfg *cfg; + int err; + cpumask_t tmp; + + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (!err) { + struct ht_irq_msg msg; + unsigned dest; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((INT_DEST_MODE == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; + + write_ht_irq_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + } + return err; +} +#endif /* CONFIG_HT_IRQ */ + +/* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI + +#ifdef CONFIG_X86_32 +int __init io_apic_get_unique_id(int ioapic, int apic_id) +{ + union IO_APIC_reg_00 reg_00; + static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + physid_mask_t tmp; + unsigned long flags; + int i = 0; + + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (physids_empty(apic_id_map)) + apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= get_physical_broadcast()) { + printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.bits.ID); + apic_id = reg_00.bits.ID; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(apic_id_map, apic_id)) { + + for (i = 0; i < get_physical_broadcast(); i++) { + if (!check_apicid_used(apic_id_map, i)) + break; + } + + if (i == get_physical_broadcast()) + panic("Max apic_id exceeded!\n"); + + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + tmp = apicid_to_cpu_present(apic_id); + physids_or(apic_id_map, apic_id_map, tmp); + + if (reg_00.bits.ID != apic_id) { + reg_00.bits.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } + } + + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + + return apic_id; +} + +int __init io_apic_get_version(int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.version; +} +#endif + +int __init io_apic_get_redir_entries (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.entries; +} + + +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) +{ + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); + return -EINVAL; + } + + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); + + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + + return 0; +} + + +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + +#endif /* CONFIG_ACPI */ + +/* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online + * so mask in all cases should simply be TARGET_CPUS + */ +#ifdef CONFIG_SMP +void __init setup_ioapic_dest(void) +{ + int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; + + if (skip_ioapic_setup == 1) + return; + + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + cfg = irq_cfg(irq); + if (!cfg->vector) + setup_IO_APIC_irq(ioapic, pin, irq, + irq_trigger(irq_entry), + irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } + + } +} +#endif + +#ifdef CONFIG_X86_64 +#define IOAPIC_RESOURCE_NAME_SIZE 11 + +static struct resource *ioapic_resources; + +static struct resource * __init ioapic_setup_resources(void) +{ + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} +#endif + +void __init ioapic_init_mappings(void) +{ + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + int i; +#ifdef CONFIG_X86_64 + struct resource *ioapic_res; + + ioapic_res = ioapic_setup_resources(); +#endif + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].mp_apicaddr; +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif + } else { +#ifdef CONFIG_X86_32 +fake_ioapic_page: +#endif + ioapic_phys = (unsigned long) + alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = __pa(ioapic_phys); + } + set_fixmap_nocache(idx, ioapic_phys); + apic_printk(APIC_VERBOSE, + "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + +#ifdef CONFIG_X86_64 + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } +#endif + } +} + +#ifdef CONFIG_X86_64 +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk(KERN_ERR + "IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c deleted file mode 100644 index fba6d6ee348..00000000000 --- a/arch/x86/kernel/io_apic_32.c +++ /dev/null @@ -1,3913 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* time_after() */ -#ifdef CONFIG_ACPI -#include -#endif -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define __apicdebuginit(type) static type __init - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int first_free_entry; -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -int skip_ioapic_setup; - -static int __init parse_noapic(char *str) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -struct irq_cfg; -struct irq_pin_list; -struct irq_cfg { - unsigned int irq; - struct irq_cfg *next; - struct irq_pin_list *irq_2_pin; - cpumask_t domain; - cpumask_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, -}; - -static struct irq_cfg irq_cfg_init = { .irq = -1U, }; -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); - -static void init_one_irq_cfg(struct irq_cfg *cfg) -{ - memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); -} - -static struct irq_cfg *irq_cfgx; -static struct irq_cfg *irq_cfgx_free; -static void __init init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; - - cfg = *da->name; - - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); - for (i = legacy_count; i < *da->nr; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < *da->nr; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = &irq_cfgx[legacy_count]; - irq_cfgx[legacy_count - 1].next = NULL; -} - -#define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg; cfg = cfg->next) - -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - struct irq_cfg *cfg; - - cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg = cfg->next; - } - - return NULL; -} - -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - struct irq_cfg *cfg, *cfg_pri; - int i; - int count = 0; - - cfg_pri = cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg_pri = cfg; - cfg = cfg->next; - count++; - } - - if (!irq_cfgx_free) { - unsigned long phys; - unsigned long total_bytes; - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - - total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; - if (after_bootmem) - cfg = kzalloc(total_bytes, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); - - phys = __pa(cfg); - printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = cfg; - } - - cfg = irq_cfgx_free; - irq_cfgx_free = irq_cfgx_free->next; - cfg->next = NULL; - if (cfg_pri) - cfg_pri->next = cfg; - else - irq_cfgx = cfg; - cfg->irq = irq; - printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); -#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG - { - /* dump the results */ - struct irq_cfg *cfg; - unsigned long phys; - unsigned long bytes = sizeof(struct irq_cfg); - - printk(KERN_DEBUG "=========================== %d\n", irq); - printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); - for_each_irq_cfg(cfg) { - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); - } - printk(KERN_DEBUG "===========================\n"); - } -#endif - return cfg; -} - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; - -static struct irq_pin_list *irq_2_pin_head; -/* fill one page ? */ -static int nr_irq_2_pin = 0x100; -static struct irq_pin_list *irq_2_pin_ptr; -static void __init irq_2_pin_init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_pin_list *pin; - int i; - - pin = *da->name; - - for (i = 1; i < *da->nr; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = &pin[0]; -} -DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); - -static struct irq_pin_list *get_one_free_irq_2_pin(void) -{ - struct irq_pin_list *pin; - int i; - - pin = irq_2_pin_ptr; - - if (pin) { - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; - } - - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); - - if (after_bootmem) - pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, - GFP_ATOMIC); - else - pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * - nr_irq_2_pin, PAGE_SIZE, 0); - - if (!pin) - panic("can not get more irq_2_pin\n"); - - for (i = 1; i < nr_irq_2_pin; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = pin->next; - pin->next = NULL; - - return pin; -} - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -#ifdef CONFIG_X86_64 -static bool io_apic_level_ack_pending(unsigned int irq) -{ - struct irq_pin_list *entry; - unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - int pin; - - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); - return true; - } - if (!entry->next) - break; - entry = entry->next; - } - spin_unlock_irqrestore(&ioapic_lock, flags); - - return false; -} -#endif - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - int apic, pin; - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; -#ifdef CONFIG_INTR_REMAP - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int assign_irq_vector(int irq, cpumask_t mask); - -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - desc = irq_to_desc(irq); - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - desc->affinity = mask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} -#endif /* CONFIG_SMP */ - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(); - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); - return; - } - - while (entry->next) { - /* not again, please */ - if (entry->apic == apic && entry->pin == pin) - return; - - entry = entry->next; - } - - entry->next = get_one_free_irq_2_pin(); - entry = entry->next; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_cfg *cfg = irq_cfg(irq); - struct irq_pin_list *entry = cfg->irq_2_pin; - int replaced = 0; - - while (entry) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - replaced = 1; - /* every one is different, right? */ - break; - } - entry = entry->next; - } - - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq(irq, newapic, newpin); -} - -#ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - -#define DO_ACTION(name,R,ACTION, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) - -/* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) - -#else - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - unsigned int pin, reg; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} - -#endif - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - -#ifdef CONFIG_X86_32 -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - -/* - * Saves and masks all the unmasked IO-APIC RTE's - */ -int save_mask_IO_APIC_setup(void) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - int apic, pin; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - - for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - return -ENOMEM; - } - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = - ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } - return 0; -} - -void restore_IO_APIC_setup(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); -} - -void reinit_intr_remapped_IO_APIC(int intr_remapping) -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(); -} -#endif - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - - return mp_irqs[i].mp_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -#endif - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mp_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -int (*ioapic_renumber_irq)(int ioapic, int irq); -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mp_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mp_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; - } else { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - } - -#ifdef CONFIG_X86_32 - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } -#endif - - return irq; -} - -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - spin_unlock(&vector_lock); -} - -static int __assign_irq_vector(int irq, cpumask_t mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - unsigned int old_vector; - int cpu; - struct irq_cfg *cfg; - - cfg = irq_cfg(irq); - - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); - - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - - old_vector = cfg->vector; - if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) - return 0; - } - - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; - int new_cpu; - int vector, offset; - - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) - goto next; -#endif - for_each_cpu_mask_nr(new_cpu, new_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - } - for_each_cpu_mask_nr(new_cpu, new_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cfg->domain = domain; - return 0; - } - return -ENOSPC; -} - -static int assign_irq_vector(int irq, cpumask_t mask) -{ - int err; - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); - spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq) -{ - struct irq_cfg *cfg; - cpumask_t mask; - int cpu, vector; - - cfg = irq_cfg(irq); - BUG_ON(!cfg->vector); - - vector = cfg->vector; - cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) - per_cpu(vector_irq, cpu)[vector] = -1; - - cfg->vector = 0; - cpus_clear(cfg->domain); -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ - int irq, vector; - struct irq_cfg *cfg; - - /* Mark the inuse vectors */ - for_each_irq_cfg(cfg) { - if (!cpu_isset(cpu, cfg->domain)) - continue; - vector = cfg->vector; - irq = cfg->irq; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq < 0) - continue; - - cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = -1; - } -} - -static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip; -#endif - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - -static void ioapic_register_intr(int irq, unsigned long trigger) -{ - struct irq_desc *desc; - - /* first time to use this irq_desc */ - if (irq < 16) - desc = irq_to_desc(irq); - else - desc = irq_to_desc_alloc(irq); - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - desc->status |= IRQ_LEVEL; - else - desc->status &= ~IRQ_LEVEL; - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - desc->status |= IRQ_MOVE_PCNTXT; - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; - } -#endif - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); -} - -static int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) -{ - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; - - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); - - memset(&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = vector; - irte.dest_id = IRTE_DEST(destination); - - modify_irte(irq, &irte); - - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - } else -#endif - { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; - } - - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; - entry->vector = vector; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry->mask = 1; - return 0; -} - -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, - int trigger, int polarity) -{ - struct irq_cfg *cfg; - struct IO_APIC_route_entry entry; - cpumask_t mask; - - if (!IO_APIC_IRQ(irq)) - return; - - cfg = irq_cfg(irq); - - mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(mask, cfg->domain, mask); - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, - irq, trigger, polarity); - - - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); - return; - } - - ioapic_register_intr(irq, trigger); - if (irq < 16) - disable_8259A_irq(irq); - - ioapic_write_entry(apic, pin, entry); -} - -static void __init setup_IO_APIC_irqs(void) -{ - int apic, pin, idx, irq, first_notcon = 1; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } - - irq = pin_2_irq(idx, apic, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) - continue; -#endif - add_pin_to_irq(irq, apic, pin); - - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, - int vector) -{ - struct IO_APIC_route_entry entry; - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - return; -#endif - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); -} - - -__apicdebuginit(void) print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - struct irq_cfg *cfg; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X ", - i, - entry.dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; - if (!entry) - continue; - printk(KERN_DEBUG "IRQ%d ", cfg->irq); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -__apicdebuginit(void) print_APIC_bitfield(int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -__apicdebuginit(void) print_all_local_APICs(void) -{ - on_each_cpu(print_local_APIC, NULL, 1); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int apic; - unsigned long flags; - -#ifdef CONFIG_X86_32 - int i; - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; -#endif - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -#ifdef CONFIG_X86_32 -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) - return; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mp_apicid; - - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mp_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - unsigned long flags; - - if (no_timer_check) - return 1; - - local_save_flags(flags); - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - local_irq_restore(flags); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - - /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) - return 1; - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ - -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -#ifdef CONFIG_X86_64 -static int ioapic_retrigger_irq(unsigned int irq) -{ - - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_cfg(irq)->vector); - - return 1; -} -#endif - -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -#ifdef CONFIG_SMP - -#ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. - * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. - */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct irq_desc *desc; - cpumask_t tmp, cleanup_mask; - struct irte irte; - int modify_ioapic_rte; - unsigned int dest; - unsigned long flags; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - desc = irq_to_desc(irq); - modify_ioapic_rte = desc->status & IRQ_LEVEL; - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Modified the IRTE and flushes the Interrupt entry cache. - */ - modify_irte(irq, &irte); - - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc->affinity = mask; -} - -static int migrate_irq_remapped_level(int irq) -{ - int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); - - mask_IO_APIC_irq(irq); - - if (io_apic_level_ack_pending(irq)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); - - ret = 0; - desc->status &= ~IRQ_MOVE_PENDING; - cpus_clear(desc->pending_mask); - -unmask: - unmask_IO_APIC_irq(irq); - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, desc->pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - -/* - * Migrates the IRQ destination in the process context. - */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc->status & IRQ_LEVEL) { - desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; - migrate_irq_remapped_level(irq); - return; - } - - migrate_ioapic_irq(irq, mask); -} -#endif - -asmlinkage void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - ack_APIC_irq(); -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; - - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) - goto unlock; - - __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; -unlock: - spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void irq_complete_move(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - unsigned vector, me; - - if (likely(!cfg->move_in_progress)) - return; - - vector = ~get_irq_regs()->orig_ax; - me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } -} -#else -static inline void irq_complete_move(unsigned int irq) {} -#endif -#ifdef CONFIG_INTR_REMAP -static void ack_x2apic_level(unsigned int irq) -{ - ack_x2APIC_irq(); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - -static void ack_apic_edge(unsigned int irq) -{ - irq_complete_move(irq); - move_native_irq(irq); - ack_APIC_irq(); -} - -#ifdef CONFIG_X86_64 -static void ack_apic_level(unsigned int irq) -{ - int do_unmask_irq = 0; - - irq_complete_move(irq); -#ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_IO_APIC_irq(irq); - } -#endif - - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(irq)) - move_masked_irq(irq); - unmask_IO_APIC_irq(irq); - } -} -#else -atomic_t irq_mis_count; -static void ack_apic_level(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} -#endif - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; -#endif - -static inline void init_IO_APIC_traps(void) -{ - int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for_each_irq_cfg(cfg) { - irq = cfg->irq; - if (IO_APIC_IRQ(irq) && !cfg->vector) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); - /* Strange. Oh, well.. */ - desc->chip = &no_irq_chip; - } - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void mask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static void ack_lapic_irq (unsigned int irq) -{ - ack_APIC_irq(); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .ack = ack_lapic_irq, -}; - -static void lapic_register_intr(int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); -} - -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void __init unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -static int disable_timer_pin_1 __initdata; -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", disable_timer_pin_setup); - -int timer_through_8259 __initdata; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms. - */ -static inline void __init check_timer(void) -{ - struct irq_cfg *cfg = irq_cfg(0); - int apic1, pin1, apic2, pin2; - unsigned long flags; - unsigned int ver; - int no_pin1 = 0; - - local_irq_save(flags); - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); - - /* - * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. Also - * timer interrupts need to be acknowledged manually in - * the 8259A for the i82489DX when using the NMI - * watchdog as that APIC treats NMIs as level-triggered. - * The AEOI mode will finish them in the 8259A - * automatically. - */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); -#ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); -#endif - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); - - /* - * Some BIOS writers are clueless and report the ExtINTA - * I/O APIC input from the cascaded 8259A as the timer - * interrupt input. So just in case, if only one pin - * was found above, try it both directly and through the - * 8259A. - */ - if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); -#endif - pin1 = pin2; - apic1 = apic2; - no_pin1 = 1; - } else if (pin2 == -1) { - pin2 = pin1; - apic2 = apic1; - } - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); - } - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - goto out; - } -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif - clear_IO_APIC_pin(apic1, pin1); - if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); - - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); - enable_8259A_irq(0); - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - goto out; - } - /* - * Cleanup, just in case ... - */ - disable_8259A_irq(0); - clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); - } - - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); - - lapic_register_intr(0); - apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); - - init_8259A(0); - make_8259A_irq(0); - apic_write(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option.\n"); -out: - local_irq_restore(flags); -} - -/* - * Traditionally ISA IRQ2 is the cascade IRQ, and is not available - * to devices. However there may be an I/O APIC pin available for - * this interrupt regardless. The pin may be left unconnected, but - * typically it will be reused as an ExtINT cascade interrupt for - * the master 8259A. In the MPS case such a pin will normally be - * reported as an ExtINT interrupt in the MP table. With ACPI - * there is no provision for ExtINT interrupts, and in the absence - * of an override it would be treated as an ordinary ISA I/O APIC - * interrupt, that is edge-triggered and unmasked by default. We - * used to do this, but it caused problems on some systems because - * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using - * the same ExtINT cascade interrupt to drive the local APIC of the - * bootstrap processor. Therefore we refrain from routing IRQ2 to - * the I/O APIC in all cases now. No actual device should request - * it anyway. --macro - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - -#ifdef CONFIG_X86_32 - enable_IO_APIC(); -#else - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ -#endif - - io_apic_irqs = ~PIC_IRQS; - - apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* - * Set up IO-APIC IRQ routing. - */ -#ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); -#endif - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); -} - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) - *entry = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device * dev; - int i, size, error; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -unsigned int create_irq_nr(unsigned int irq_want) -{ - /* Allocate an unused irq */ - unsigned int irq; - unsigned int new; - unsigned long flags; - struct irq_cfg *cfg_new; - -#ifndef CONFIG_HAVE_SPARSE_IRQ - irq_want = nr_irqs - 1; -#endif - - irq = 0; - spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { - if (platform_legacy_irq(new)) - continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) - continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); - if (__assign_irq_vector(new, TARGET_CPUS) == 0) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq > 0) { - dynamic_irq_init(irq); - } - return irq; -} - -int create_irq(void) -{ - int irq; - - irq = create_irq_nr(nr_irqs - 1); - - if (irq == 0) - irq = -1; - - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - -#ifdef CONFIG_INTR_REMAP - free_irte(irq); -#endif - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI message composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (err) - return err; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - memset (&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - modify_irte(irq, &irte); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } - return err; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} - -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp, cleanup_mask; - struct irte irte; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_x2apic_edge, -#ifdef CONFIG_SMP - .set_affinity = ir_set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} -#endif - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) - return ret; - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irq_desc *desc = irq_to_desc(irq); - /* - * irq migration in process context - */ - desc->status |= IRQ_MOVE_PCNTXT; - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else -#endif - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - - return 0; -} - -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - unsigned int irq; - int ret, sub_handle; - struct msi_desc *desc; - unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP - struct intel_iommu *iommu = 0; - int index = 0; -#endif - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); - if (irq == 0) - return -1; -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - } -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; - -error: - destroy_irq(irq); - return ret; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#ifdef CONFIG_DMAR -#ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif /* CONFIG_SMP */ - -struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .unmask = dmar_msi_unmask, - .mask = dmar_msi_mask, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = dmar_msi_set_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - int err; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return err; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_32 -int __init io_apic_get_unique_id(int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - -int __init io_apic_get_version(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} -#endif - -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); - - return 0; -} - - -int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) -{ - int i; - - if (skip_ioapic_setup) - return -1; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) - break; - if (i >= mp_irq_entries) - return -1; - - *trigger = irq_trigger(i); - *polarity = irq_polarity(i); - return 0; -} - -#endif /* CONFIG_ACPI */ - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - struct irq_cfg *cfg; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - cfg = irq_cfg(irq); - if (!cfg->vector) - setup_IO_APIC_irq(ioapic, pin, irq, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); -#ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif - else - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -#ifdef CONFIG_X86_64 -#define IOAPIC_RESOURCE_NAME_SIZE 11 - -static struct resource *ioapic_resources; - -static struct resource * __init ioapic_setup_resources(void) -{ - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - } - - ioapic_resources = res; - - return res; -} -#endif - -void __init ioapic_init_mappings(void) -{ - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; -#ifdef CONFIG_X86_64 - struct resource *ioapic_res; - - ioapic_res = ioapic_setup_resources(); -#endif - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; -#ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } -#endif - } else { -#ifdef CONFIG_X86_32 -fake_ioapic_page: -#endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - -#ifdef CONFIG_X86_64 - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } -#endif - } -} - -#ifdef CONFIG_X86_64 -static int __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - printk(KERN_ERR - "IO APIC resources could be not be allocated.\n"); - return -1; - } - - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; - } - - return 0; -} - -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); -#endif diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c deleted file mode 100644 index 940c4167b32..00000000000 --- a/arch/x86/kernel/io_apic_64.c +++ /dev/null @@ -1,3913 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* time_after() */ -#ifdef CONFIG_ACPI -#include -#endif -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define __apicdebuginit(type) static type __init - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int first_free_entry; -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -int skip_ioapic_setup; - -static int __init parse_noapic(char *str) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -struct irq_cfg; -struct irq_pin_list; -struct irq_cfg { - unsigned int irq; - struct irq_cfg *next; - struct irq_pin_list *irq_2_pin; - cpumask_t domain; - cpumask_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, -}; - -static struct irq_cfg irq_cfg_init = { .irq = -1U, }; -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); - -static void init_one_irq_cfg(struct irq_cfg *cfg) -{ - memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); -} - -static struct irq_cfg *irq_cfgx; -static struct irq_cfg *irq_cfgx_free; -static void __init init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; - - cfg = *da->name; - - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); - for (i = legacy_count; i < *da->nr; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < *da->nr; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = &irq_cfgx[legacy_count]; - irq_cfgx[legacy_count - 1].next = NULL; -} - -#define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg; cfg = cfg->next) - -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - struct irq_cfg *cfg; - - cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg = cfg->next; - } - - return NULL; -} - -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - struct irq_cfg *cfg, *cfg_pri; - int i; - int count = 0; - - cfg_pri = cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg_pri = cfg; - cfg = cfg->next; - count++; - } - - if (!irq_cfgx_free) { - unsigned long phys; - unsigned long total_bytes; - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - - total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; - if (after_bootmem) - cfg = kzalloc(total_bytes, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); - - phys = __pa(cfg); - printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = cfg; - } - - cfg = irq_cfgx_free; - irq_cfgx_free = irq_cfgx_free->next; - cfg->next = NULL; - if (cfg_pri) - cfg_pri->next = cfg; - else - irq_cfgx = cfg; - cfg->irq = irq; - printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); -#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG - { - /* dump the results */ - struct irq_cfg *cfg; - unsigned long phys; - unsigned long bytes = sizeof(struct irq_cfg); - - printk(KERN_DEBUG "=========================== %d\n", irq); - printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); - for_each_irq_cfg(cfg) { - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); - } - printk(KERN_DEBUG "===========================\n"); - } -#endif - return cfg; -} - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; - -static struct irq_pin_list *irq_2_pin_head; -/* fill one page ? */ -static int nr_irq_2_pin = 0x100; -static struct irq_pin_list *irq_2_pin_ptr; -static void __init irq_2_pin_init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_pin_list *pin; - int i; - - pin = *da->name; - - for (i = 1; i < *da->nr; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = &pin[0]; -} -DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); - -static struct irq_pin_list *get_one_free_irq_2_pin(void) -{ - struct irq_pin_list *pin; - int i; - - pin = irq_2_pin_ptr; - - if (pin) { - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; - } - - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); - - if (after_bootmem) - pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, - GFP_ATOMIC); - else - pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * - nr_irq_2_pin, PAGE_SIZE, 0); - - if (!pin) - panic("can not get more irq_2_pin\n"); - - for (i = 1; i < nr_irq_2_pin; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = pin->next; - pin->next = NULL; - - return pin; -} - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -#ifdef CONFIG_X86_64 -static bool io_apic_level_ack_pending(unsigned int irq) -{ - struct irq_pin_list *entry; - unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - int pin; - - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); - return true; - } - if (!entry->next) - break; - entry = entry->next; - } - spin_unlock_irqrestore(&ioapic_lock, flags); - - return false; -} -#endif - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - int apic, pin; - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; -#ifdef CONFIG_INTR_REMAP - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int assign_irq_vector(int irq, cpumask_t mask); - -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - desc = irq_to_desc(irq); - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - desc->affinity = mask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} -#endif /* CONFIG_SMP */ - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(); - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); - return; - } - - while (entry->next) { - /* not again, please */ - if (entry->apic == apic && entry->pin == pin) - return; - - entry = entry->next; - } - - entry->next = get_one_free_irq_2_pin(); - entry = entry->next; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_cfg *cfg = irq_cfg(irq); - struct irq_pin_list *entry = cfg->irq_2_pin; - int replaced = 0; - - while (entry) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - replaced = 1; - /* every one is different, right? */ - break; - } - entry = entry->next; - } - - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq(irq, newapic, newpin); -} - -#ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - -#define DO_ACTION(name,R,ACTION, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) - -/* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) - -#else - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - unsigned int pin, reg; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} - -#endif - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - -#ifdef CONFIG_X86_32 -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - -/* - * Saves and masks all the unmasked IO-APIC RTE's - */ -int save_mask_IO_APIC_setup(void) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - int apic, pin; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - - for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - return -ENOMEM; - } - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = - ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } - return 0; -} - -void restore_IO_APIC_setup(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); -} - -void reinit_intr_remapped_IO_APIC(int intr_remapping) -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(); -} -#endif - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - - return mp_irqs[i].mp_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -#endif - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mp_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -int (*ioapic_renumber_irq)(int ioapic, int irq); -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mp_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mp_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; - } else { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - } - -#ifdef CONFIG_X86_32 - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } -#endif - - return irq; -} - -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - spin_unlock(&vector_lock); -} - -static int __assign_irq_vector(int irq, cpumask_t mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - unsigned int old_vector; - int cpu; - struct irq_cfg *cfg; - - cfg = irq_cfg(irq); - - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); - - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - - old_vector = cfg->vector; - if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) - return 0; - } - - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; - int new_cpu; - int vector, offset; - - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) - goto next; -#endif - for_each_cpu_mask_nr(new_cpu, new_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - } - for_each_cpu_mask_nr(new_cpu, new_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cfg->domain = domain; - return 0; - } - return -ENOSPC; -} - -static int assign_irq_vector(int irq, cpumask_t mask) -{ - int err; - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); - spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq) -{ - struct irq_cfg *cfg; - cpumask_t mask; - int cpu, vector; - - cfg = irq_cfg(irq); - BUG_ON(!cfg->vector); - - vector = cfg->vector; - cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) - per_cpu(vector_irq, cpu)[vector] = -1; - - cfg->vector = 0; - cpus_clear(cfg->domain); -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ - int irq, vector; - struct irq_cfg *cfg; - - /* Mark the inuse vectors */ - for_each_irq_cfg(cfg) { - if (!cpu_isset(cpu, cfg->domain)) - continue; - vector = cfg->vector; - irq = cfg->irq; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq < 0) - continue; - - cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = -1; - } -} - -static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip; -#endif - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - -static void ioapic_register_intr(int irq, unsigned long trigger) -{ - struct irq_desc *desc; - - /* first time to use this irq_desc */ - if (irq < 16) - desc = irq_to_desc(irq); - else - desc = irq_to_desc_alloc(irq); - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - desc->status |= IRQ_LEVEL; - else - desc->status &= ~IRQ_LEVEL; - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - desc->status |= IRQ_MOVE_PCNTXT; - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; - } -#endif - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); -} - -static int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) -{ - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; - - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); - - memset(&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = vector; - irte.dest_id = IRTE_DEST(destination); - - modify_irte(irq, &irte); - - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - } else -#endif - { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; - } - - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; - entry->vector = vector; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry->mask = 1; - return 0; -} - -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, - int trigger, int polarity) -{ - struct irq_cfg *cfg; - struct IO_APIC_route_entry entry; - cpumask_t mask; - - if (!IO_APIC_IRQ(irq)) - return; - - cfg = irq_cfg(irq); - - mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(mask, cfg->domain, mask); - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, - irq, trigger, polarity); - - - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); - return; - } - - ioapic_register_intr(irq, trigger); - if (irq < 16) - disable_8259A_irq(irq); - - ioapic_write_entry(apic, pin, entry); -} - -static void __init setup_IO_APIC_irqs(void) -{ - int apic, pin, idx, irq, first_notcon = 1; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } - - irq = pin_2_irq(idx, apic, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) - continue; -#endif - add_pin_to_irq(irq, apic, pin); - - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, - int vector) -{ - struct IO_APIC_route_entry entry; - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - return; -#endif - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); -} - - -__apicdebuginit(void) print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - struct irq_cfg *cfg; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X ", - i, - entry.dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; - if (!entry) - continue; - printk(KERN_DEBUG "IRQ%d ", cfg->irq); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -__apicdebuginit(void) print_APIC_bitfield(int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -__apicdebuginit(void) print_all_local_APICs(void) -{ - on_each_cpu(print_local_APIC, NULL, 1); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int apic; - unsigned long flags; - -#ifdef CONFIG_X86_32 - int i; - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; -#endif - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -#ifdef CONFIG_X86_32 -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) - return; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mp_apicid; - - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mp_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - unsigned long flags; - - if (no_timer_check) - return 1; - - local_save_flags(flags); - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - local_irq_restore(flags); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - - /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) - return 1; - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ - -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -#ifdef CONFIG_X86_64 -static int ioapic_retrigger_irq(unsigned int irq) -{ - - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_cfg(irq)->vector); - - return 1; -} -#endif - -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -#ifdef CONFIG_SMP - -#ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. - * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. - */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct irq_desc *desc; - cpumask_t tmp, cleanup_mask; - struct irte irte; - int modify_ioapic_rte; - unsigned int dest; - unsigned long flags; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - desc = irq_to_desc(irq); - modify_ioapic_rte = desc->status & IRQ_LEVEL; - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Modified the IRTE and flushes the Interrupt entry cache. - */ - modify_irte(irq, &irte); - - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc->affinity = mask; -} - -static int migrate_irq_remapped_level(int irq) -{ - int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); - - mask_IO_APIC_irq(irq); - - if (io_apic_level_ack_pending(irq)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); - - ret = 0; - desc->status &= ~IRQ_MOVE_PENDING; - cpus_clear(desc->pending_mask); - -unmask: - unmask_IO_APIC_irq(irq); - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, desc->pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - -/* - * Migrates the IRQ destination in the process context. - */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc->status & IRQ_LEVEL) { - desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; - migrate_irq_remapped_level(irq); - return; - } - - migrate_ioapic_irq(irq, mask); -} -#endif - -asmlinkage void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - ack_APIC_irq(); -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; - - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) - goto unlock; - - __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; -unlock: - spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void irq_complete_move(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - unsigned vector, me; - - if (likely(!cfg->move_in_progress)) - return; - - vector = ~get_irq_regs()->orig_ax; - me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } -} -#else -static inline void irq_complete_move(unsigned int irq) {} -#endif -#ifdef CONFIG_INTR_REMAP -static void ack_x2apic_level(unsigned int irq) -{ - ack_x2APIC_irq(); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - -static void ack_apic_edge(unsigned int irq) -{ - irq_complete_move(irq); - move_native_irq(irq); - ack_APIC_irq(); -} - -#ifdef CONFIG_X86_64 -static void ack_apic_level(unsigned int irq) -{ - int do_unmask_irq = 0; - - irq_complete_move(irq); -#ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_IO_APIC_irq(irq); - } -#endif - - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(irq)) - move_masked_irq(irq); - unmask_IO_APIC_irq(irq); - } -} -#else -atomic_t irq_mis_count; -static void ack_apic_level(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} -#endif - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; -#endif - -static inline void init_IO_APIC_traps(void) -{ - int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for_each_irq_cfg(cfg) { - irq = cfg->irq; - if (IO_APIC_IRQ(irq) && !cfg->vector) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); - /* Strange. Oh, well.. */ - desc->chip = &no_irq_chip; - } - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void mask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static void ack_lapic_irq (unsigned int irq) -{ - ack_APIC_irq(); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .ack = ack_lapic_irq, -}; - -static void lapic_register_intr(int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); -} - -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void __init unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -static int disable_timer_pin_1 __initdata; -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", disable_timer_pin_setup); - -int timer_through_8259 __initdata; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms. - */ -static inline void __init check_timer(void) -{ - struct irq_cfg *cfg = irq_cfg(0); - int apic1, pin1, apic2, pin2; - unsigned long flags; - unsigned int ver; - int no_pin1 = 0; - - local_irq_save(flags); - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); - - /* - * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. Also - * timer interrupts need to be acknowledged manually in - * the 8259A for the i82489DX when using the NMI - * watchdog as that APIC treats NMIs as level-triggered. - * The AEOI mode will finish them in the 8259A - * automatically. - */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); -#ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); -#endif - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); - - /* - * Some BIOS writers are clueless and report the ExtINTA - * I/O APIC input from the cascaded 8259A as the timer - * interrupt input. So just in case, if only one pin - * was found above, try it both directly and through the - * 8259A. - */ - if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); -#endif - pin1 = pin2; - apic1 = apic2; - no_pin1 = 1; - } else if (pin2 == -1) { - pin2 = pin1; - apic2 = apic1; - } - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); - } - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - goto out; - } -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif - clear_IO_APIC_pin(apic1, pin1); - if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); - - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); - enable_8259A_irq(0); - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - goto out; - } - /* - * Cleanup, just in case ... - */ - disable_8259A_irq(0); - clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); - } - - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); - - lapic_register_intr(0); - apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); - - init_8259A(0); - make_8259A_irq(0); - apic_write(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option.\n"); -out: - local_irq_restore(flags); -} - -/* - * Traditionally ISA IRQ2 is the cascade IRQ, and is not available - * to devices. However there may be an I/O APIC pin available for - * this interrupt regardless. The pin may be left unconnected, but - * typically it will be reused as an ExtINT cascade interrupt for - * the master 8259A. In the MPS case such a pin will normally be - * reported as an ExtINT interrupt in the MP table. With ACPI - * there is no provision for ExtINT interrupts, and in the absence - * of an override it would be treated as an ordinary ISA I/O APIC - * interrupt, that is edge-triggered and unmasked by default. We - * used to do this, but it caused problems on some systems because - * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using - * the same ExtINT cascade interrupt to drive the local APIC of the - * bootstrap processor. Therefore we refrain from routing IRQ2 to - * the I/O APIC in all cases now. No actual device should request - * it anyway. --macro - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - -#ifdef CONFIG_X86_32 - enable_IO_APIC(); -#else - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ -#endif - - io_apic_irqs = ~PIC_IRQS; - - apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* - * Set up IO-APIC IRQ routing. - */ -#ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); -#endif - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); -} - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) - *entry = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device * dev; - int i, size, error; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -unsigned int create_irq_nr(unsigned int irq_want) -{ - /* Allocate an unused irq */ - unsigned int irq; - unsigned int new; - unsigned long flags; - struct irq_cfg *cfg_new; - -#ifndef CONFIG_HAVE_SPARSE_IRQ - irq_want = nr_irqs - 1; -#endif - - irq = 0; - spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { - if (platform_legacy_irq(new)) - continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) - continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); - if (__assign_irq_vector(new, TARGET_CPUS) == 0) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq > 0) { - dynamic_irq_init(irq); - } - return irq; -} - -int create_irq(void) -{ - int irq; - - irq = create_irq_nr(nr_irqs - 1); - - if (irq == 0) - irq = -1; - - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - -#ifdef CONFIG_INTR_REMAP - free_irte(irq); -#endif - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI message composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (err) - return err; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - memset (&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - modify_irte(irq, &irte); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } - return err; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} - -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp, cleanup_mask; - struct irte irte; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_x2apic_edge, -#ifdef CONFIG_SMP - .set_affinity = ir_set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} -#endif - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) - return ret; - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irq_desc *desc = irq_to_desc(irq); - /* - * irq migration in process context - */ - desc->status |= IRQ_MOVE_PCNTXT; - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else -#endif - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - - return 0; -} - -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - unsigned int irq; - int ret, sub_handle; - struct msi_desc *desc; - unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP - struct intel_iommu *iommu = 0; - int index = 0; -#endif - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); - if (irq == 0) - return -1; -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - } -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; - -error: - destroy_irq(irq); - return ret; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#ifdef CONFIG_DMAR -#ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif /* CONFIG_SMP */ - -struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .unmask = dmar_msi_unmask, - .mask = dmar_msi_mask, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = dmar_msi_set_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - int err; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return err; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_32 -int __init io_apic_get_unique_id(int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - -int __init io_apic_get_version(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} -#endif - -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); - - return 0; -} - - -int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) -{ - int i; - - if (skip_ioapic_setup) - return -1; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) - break; - if (i >= mp_irq_entries) - return -1; - - *trigger = irq_trigger(i); - *polarity = irq_polarity(i); - return 0; -} - -#endif /* CONFIG_ACPI */ - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - struct irq_cfg *cfg; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - cfg = irq_cfg(irq); - if (!cfg->vector) - setup_IO_APIC_irq(ioapic, pin, irq, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); -#ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif - else - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -#ifdef CONFIG_X86_64 -#define IOAPIC_RESOURCE_NAME_SIZE 11 - -static struct resource *ioapic_resources; - -static struct resource * __init ioapic_setup_resources(void) -{ - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - } - - ioapic_resources = res; - - return res; -} -#endif - -void __init ioapic_init_mappings(void) -{ - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; -#ifdef CONFIG_X86_64 - struct resource *ioapic_res; - - ioapic_res = ioapic_setup_resources(); -#endif - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; -#ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } -#endif - } else { -#ifdef CONFIG_X86_32 -fake_ioapic_page: -#endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - -#ifdef CONFIG_X86_64 - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } -#endif - } -} - -#ifdef CONFIG_X86_64 -static int __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - printk(KERN_ERR - "IO APIC resources could be not be allocated.\n"); - return -1; - } - - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; - } - - return 0; -} - -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); -#endif -- cgit v1.2.3 From c691cc84529ec88ccb32b174535bb61875888c90 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:43 -0700 Subject: io_apic: make 32 bit have io_apic resource in /proc/iomem Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index fba6d6ee348..7e303e0967a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3802,7 +3802,6 @@ void __init setup_ioapic_dest(void) } #endif -#ifdef CONFIG_X86_64 #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -3838,17 +3837,14 @@ static struct resource * __init ioapic_setup_resources(void) return res; } -#endif void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; -#ifdef CONFIG_X86_64 struct resource *ioapic_res; ioapic_res = ioapic_setup_resources(); -#endif for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; @@ -3877,17 +3873,14 @@ fake_ioapic_page: __fix_to_virt(idx), ioapic_phys); idx++; -#ifdef CONFIG_X86_64 if (ioapic_res != NULL) { ioapic_res->start = ioapic_phys; ioapic_res->end = ioapic_phys + (4 * 1024) - 1; ioapic_res++; } -#endif } } -#ifdef CONFIG_X86_64 static int __init ioapic_insert_resources(void) { int i; @@ -3910,4 +3903,3 @@ static int __init ioapic_insert_resources(void) /* Insert the IO APIC resources after PCI initialization has occured to handle * IO APICS that are mapped in on a BAR in PCI space. */ late_initcall(ioapic_insert_resources); -#endif -- cgit v1.2.3 From 4e738e2f307113feaedebae147c3e0d072e39648 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:47 -0700 Subject: x86: unify mask_IO_APIC_irq use MACRO for 32 bit too Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 80 +++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 59 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7e303e0967a..099696109ca 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -610,18 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } -#ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ +#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ \ { \ int pin; \ @@ -636,7 +625,8 @@ static inline void io_apic_sync(unsigned int apic) break; \ pin = entry->pin; \ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ + reg ACTION_DISABLE; \ + reg ACTION_ENABLE; \ io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ FINAL; \ if (!entry->next) \ @@ -645,66 +635,38 @@ static inline void io_apic_sync(unsigned int apic) } \ } -#define DO_ACTION(name,R,ACTION, FINAL) \ +#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ \ static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) /* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) +DO_ACTION(__unmask, 0, |= 0, &= ~IO_APIC_REDIR_MASKED, ) -#else - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +#ifdef CONFIG_X86_64 +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) { - struct irq_cfg *cfg; - struct irq_pin_list *entry; - unsigned int pin, reg; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); } /* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic)) -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} +#else + +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, ) /* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} +DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, ) /* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} +DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, ) #endif -- cgit v1.2.3 From 3eb2cce84beae8fd41de950569cafd5bca7edd5d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:48 -0700 Subject: x86: unify ack_apic_edge use code in 64 to replace move_native_irq(irq, desc); in 32 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 73 +++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 099696109ca..a466b04ad5a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -389,7 +389,6 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -#ifdef CONFIG_X86_64 static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; @@ -419,7 +418,6 @@ static bool io_apic_level_ack_pending(unsigned int irq) return false; } -#endif union entry_union { struct { u32 w1, w2; }; @@ -2398,9 +2396,16 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_32 +atomic_t irq_mis_count; +#endif + static void ack_apic_level(unsigned int irq) { +#ifdef CONFIG_X86_32 + unsigned long v; + int i; +#endif int do_unmask_irq = 0; irq_complete_move(irq); @@ -2412,6 +2417,31 @@ static void ack_apic_level(unsigned int irq) } #endif +#ifdef CONFIG_X86_32 + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); +#endif + /* * We must acknowledge the irq before we move it or the acknowledge will * not propagate properly. @@ -2450,41 +2480,8 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq(irq); } -} -#else -atomic_t irq_mis_count; -static void ack_apic_level(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); +#ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); @@ -2492,8 +2489,8 @@ static void ack_apic_level(unsigned int irq) __unmask_and_level_IO_APIC_irq(irq); spin_unlock(&ioapic_lock); } -} #endif +} static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", -- cgit v1.2.3 From 29ccbbf232c035b8c7ff0c5060fbe30a66ed9b99 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:49 -0700 Subject: x86: remove first_free_entry/pin_map_size no user now Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 7 ------- arch/x86/kernel/setup.c | 4 ---- 2 files changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index a466b04ad5a..5de2d38812a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -72,13 +72,6 @@ int sis_apic_bug = -1; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); -int first_free_entry; -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - /* * # of IRQ routing registers */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 02e3a669797..d90c659b70e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1074,10 +1074,6 @@ void __init setup_arch(char **cmdline_p) nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif -#ifdef CONFIG_X86_IO_APIC - pin_map_size = nr_irqs * 2; - first_free_entry = nr_irqs; -#endif init_apic_mappings(); ioapic_init_mappings(); -- cgit v1.2.3 From ffd5aae7817fba22c5c3e304a31c44fa0a4e9a97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:50 -0700 Subject: x86: print local APIC of APs one by one instead of print that of all APs at the time Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 5de2d38812a..bf0e66d7303 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1777,7 +1777,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy) __apicdebuginit(void) print_all_local_APICs(void) { - on_each_cpu(print_local_APIC, NULL, 1); + int cpu; + + preempt_disable(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, print_local_APIC, NULL, 1); + preempt_enable(); } __apicdebuginit(void) print_PIC(void) -- cgit v1.2.3 From 8f09cd20a24c5f13c571bc73ddcd47be0af3b70f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:51 -0700 Subject: x86: make HAVE_SPARSE_IRQ support selectable Ingo said sparse_irq is some intrusive. need to make it selectable to make it simple, remove irq_desc as parameter in some functions. (ack, eoi, set_affinity). may need to make member if irq_chip to take irq_desc, or struct irq later. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/Kconfig | 3 -- arch/x86/Kconfig | 12 +++++++- arch/x86/kernel/io_apic.c | 71 ++++++++++++++++++++++++++++++++--------------- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 5 files changed, 62 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index b3676224626..c8a7c2eb649 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -106,6 +106,3 @@ config HAVE_CLK config HAVE_DYN_ARRAY def_bool n -config HAVE_SPARSE_IRQ - def_bool n - diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b157e94637b..600584b7a49 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,7 +34,6 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_DYN_ARRAY - select HAVE_SPARSE_IRQ config ARCH_DEFCONFIG string @@ -238,6 +237,17 @@ config SMP If you don't know what to do here, say N. +config HAVE_SPARSE_IRQ + bool "Support sparse irq numbering" + depends on PCI_MSI || HT_IRQ + default y + help + This enables support for sparse irq, esp for msi/msi-x. the irq + number will be bus/dev/fn + 12bit. You may need if you have lots of + cards supports msi-x installed. + + If you don't know what to do here, say Y. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bf0e66d7303..f853b667fa5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -107,7 +107,9 @@ struct irq_cfg; struct irq_pin_list; struct irq_cfg { unsigned int irq; +#ifdef CONFIG_HAVE_SPARSE_IRQ struct irq_cfg *next; +#endif struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; @@ -137,20 +139,6 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = { }; static struct irq_cfg irq_cfg_init = { .irq = -1U, }; -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); static void init_one_irq_cfg(struct irq_cfg *cfg) { @@ -158,7 +146,9 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) } static struct irq_cfg *irq_cfgx; +#ifdef CONFIG_HAVE_SPARSE_IRQ static struct irq_cfg *irq_cfgx_free; +#endif static void __init init_work(void *data) { struct dyn_array *da = data; @@ -174,15 +164,34 @@ static void __init init_work(void *data) for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); +#ifdef CONFIG_HAVE_SPARSE_IRQ for (i = 1; i < *da->nr; i++) cfg[i-1].next = &cfg[i]; irq_cfgx_free = &irq_cfgx[legacy_count]; irq_cfgx[legacy_count - 1].next = NULL; +#endif +} + +#ifdef CONFIG_HAVE_SPARSE_IRQ +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; } -#define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg; cfg = cfg->next) +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +#define for_each_irq_cfg(irqX, cfg) \ + for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U) + DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); @@ -273,7 +282,26 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) #endif return cfg; } +#else + +#define for_each_irq_cfg(irq, cfg) \ + for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq]) + +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); +struct irq_cfg *irq_cfg(unsigned int irq) +{ + if (irq < nr_irqs) + return &irq_cfgx[irq]; + + return NULL; +} +struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + return irq_cfg(irq); +} + +#endif /* * This is performance-critical, we want to do it O(1) * @@ -1282,11 +1310,10 @@ void __setup_vector_irq(int cpu) struct irq_cfg *cfg; /* Mark the inuse vectors */ - for_each_irq_cfg(cfg) { + for_each_irq_cfg(irq, cfg) { if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; - irq = cfg->irq; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -1563,6 +1590,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; + unsigned int irq; if (apic_verbosity == APIC_QUIET) return; @@ -1651,11 +1679,11 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(cfg) { + for_each_irq_cfg(irq, cfg) { struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", cfg->irq); + printk(KERN_DEBUG "IRQ%d ", irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -2535,8 +2563,7 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_cfg(cfg) { - irq = cfg->irq; + for_each_irq_cfg(irq, cfg) { if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index cc929f2f84f..b2e1082cf5a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -269,7 +269,7 @@ int show_interrupts(struct seq_file *p, void *v) struct irqaction * action; unsigned long flags; unsigned int entries; - struct irq_desc *desc; + struct irq_desc *desc = NULL; int tail = 0; #ifdef CONFIG_HAVE_SPARSE_IRQ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 348a11168c2..c2fca89a3f7 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, void *v) struct irqaction * action; unsigned long flags; unsigned int entries; - struct irq_desc *desc; + struct irq_desc *desc = NULL; int tail = 0; #ifdef CONFIG_HAVE_SPARSE_IRQ -- cgit v1.2.3 From 9d6a4d0823b3b8e29156f5e698b5a68687afad32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:52 -0700 Subject: x86: probe nr_irqs even only mptable is used for !CONFIG_HAVE_SPARSE_IRQ fix: In file included from arch/x86/kernel/early-quirks.c:18: include/asm/io_apic.h: In function 'probe_nr_irqs': include/asm/io_apic.h:209: error: 'NR_IRQS' undeclared (first use in this function) include/asm/io_apic.h:209: error: (Each undeclared identifier is reported only once include/asm/io_apic.h:209: error: for each function it appears in.) v2: fix by Ingo Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 25 ------------------------- arch/x86/kernel/io_apic.c | 43 ++++++++++++++++++++++++++++++------------- arch/x86/kernel/setup.c | 6 +++--- 3 files changed, 33 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3e9d163fd92..5fef4fece4a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -957,29 +957,6 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } -int get_nr_irqs_via_madt(void) -{ - int idx; - int nr = 0; - - for (idx = 0; idx < nr_ioapics; idx++) { - if (mp_ioapic_routing[idx].gsi_end > nr) - nr = mp_ioapic_routing[idx].gsi_end; - } - - nr++; - - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < 32) - nr = 32; - - return nr; - -} - static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { @@ -1278,8 +1255,6 @@ static int __init acpi_parse_madt_ioapic_entries(void) } - nr_irqs = get_nr_irqs_via_madt(); - count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, nr_irqs); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f853b667fa5..f7e80262cbb 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3596,6 +3596,36 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ +int __init io_apic_get_redir_entries (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.entries; +} + +int __init probe_nr_irqs(void) +{ + int idx; + int nr = 0; + + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx); + + /* double it for hotplug and msi and nmi */ + nr <<= 1; + + /* something wrong ? */ + if (nr < 32) + nr = 32; + + return nr; +} + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3690,19 +3720,6 @@ int __init io_apic_get_version(int ioapic) } #endif -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { if (!IO_APIC_IRQ(irq)) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d90c659b70e..61335306696 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1069,15 +1069,15 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); #ifdef CONFIG_X86_64 - /* need to wait for nr_cpu_ids settle down */ - if (nr_irqs == NR_IRQS) - nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif init_apic_mappings(); ioapic_init_mappings(); + /* need to wait for io_apic is mapped */ + nr_irqs = probe_nr_irqs(); + kvm_guest_init(); e820_reserve_resources(); -- cgit v1.2.3 From bf9d3cf73e8caf0b3ae0b7f508a9f251536f5ff4 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 18 Aug 2008 22:17:08 -0700 Subject: xen: Fix bug `do_IRQ: cannot handle IRQ -1 vector 0x6 cpu 1' Following commit 9c3f2468d8339866d9ef6a25aae31a8909c6be0d, do_IRQ() looks up the IRQ number in the per-cpu variable vector_irq. This commit makes Xen initialise an identity vector_irq map for both X86_32 and X86_64. Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/irq.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 28b85ab8422..bb042608c60 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void) static void __init __xen_init_IRQ(void) { -#ifdef CONFIG_X86_64 int i; /* Create identity vector->irq map */ @@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void) for_each_possible_cpu(cpu) per_cpu(vector_irq, cpu)[i] = i; } -#endif /* CONFIG_X86_64 */ xen_init_IRQ(); } -- cgit v1.2.3 From 0c425cec64eb0c0d0dd7037c21a25585cbe3636c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 18 Aug 2008 13:04:26 +0200 Subject: warning: fix arch x86 kernel io_apic c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix warning: arch/x86/kernel/io_apic.c: In function ‘print_local_APIC’: arch/x86/kernel/io_apic.c:1786: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘u64’ arch/x86/kernel/io_apic.c:1787: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘u64’ By creating uniform behavior on 32-bit and 64-bit and printing out the ICR value in two 32-bit words. Code has changed: text data bss dec hex filename 22901 19650 17040 59591 e8c7 io_apic.o.before 22899 19650 17040 59589 e8c5 io_apic.o.after Due to the 32-bit cast narrowing the printed out value on 64-bit. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f7e80262cbb..34c74cf5c24 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1774,8 +1774,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy) } icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From e89eb43863c2d9f11a3bbe766766fe646e6c50d9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 20 Aug 2008 20:46:25 -0700 Subject: x86: sparse_irq needs spin_lock in allocations Suresh Siddha noticed that we should have a spinlock around it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 34c74cf5c24..7ca55669047 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -146,6 +146,12 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) } static struct irq_cfg *irq_cfgx; + +/* + * Protect the irq_cfgx_free freelist: + */ +static DEFINE_SPINLOCK(irq_cfg_lock); + #ifdef CONFIG_HAVE_SPARSE_IRQ static struct irq_cfg *irq_cfgx_free; #endif @@ -213,8 +219,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) static struct irq_cfg *irq_cfg_alloc(unsigned int irq) { struct irq_cfg *cfg, *cfg_pri; - int i; + unsigned long flags; int count = 0; + int i; cfg_pri = cfg = irq_cfgx; while (cfg) { @@ -226,6 +233,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) count++; } + spin_lock_irqsave(&irq_cfg_lock, flags); if (!irq_cfgx_free) { unsigned long phys; unsigned long total_bytes; @@ -263,6 +271,9 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) else irq_cfgx = cfg; cfg->irq = irq; + + spin_unlock_irqrestore(&irq_cfg_lock, flags); + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG { -- cgit v1.2.3 From a2d332fa3445160519de03c350a59602ac1c3df9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 12:56:32 -0700 Subject: x86: fix 32-bit ioapic lockup with sparseirqs Missed two lines when copying. Fix panic on one of Ingo's machines that need to adjust ioapic id when acpi off/ 32bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7ca55669047..4e44fd1f466 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2077,6 +2077,8 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0, reg_00.raw); + spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check -- cgit v1.2.3 From 052c0bff9b83a578654dfa513d6e3d0b3795f1e8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 13:10:09 -0700 Subject: x86: fix probe_nr_irqs for xen otherwise Xen is _completely_ unusable with 5 or more VCPUs. (when !CONFIG_HAVE_SPARSE_IRQ). based on Alex Nixon's patch. also add +1 offset after redir_entries Signed-off-by: Yinghai Lu Acked-by: Jeremy Fitzhardinge Acked-by: Alex Nixon Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4e44fd1f466..d28128e0392 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3625,16 +3625,21 @@ int __init probe_nr_irqs(void) { int idx; int nr = 0; +#ifndef CONFIG_XEN + int nr_min = 32; +#else + int nr_min = NR_IRQS; +#endif for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx); + nr += io_apic_get_redir_entries(idx) + 1; /* double it for hotplug and msi and nmi */ nr <<= 1; /* something wrong ? */ - if (nr < 32) - nr = 32; + if (nr < nr_min) + nr = nr_min; return nr; } -- cgit v1.2.3 From 2699574b3c04351f5a23c8a842e17c303d7ebb6f Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Thu, 21 Aug 2008 11:26:43 -0700 Subject: x86: VMI, initialize IRQ vector Initialize vector_irq for the vmi used vector, to point to correct irq. Signed-off-by: Alok N Kataria Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmiclock_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 6953859fe28..254ee07f863 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void) void __init vmi_time_init(void) { + unsigned int cpu; /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3 From db4b5525caafd846ec20f95afbc6403c792e22cf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:39 -0700 Subject: x86: apic_64.c - setup_APIC_timer has to be __cpuinit function There is no need to hold this code if CPU_HOTPLUG is not defined. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index cd860856a0b..8f551276681 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -400,7 +400,7 @@ static void lapic_timer_broadcast(cpumask_t mask) * Setup the local APIC timer for this CPU. Copy the initilized values * of the boot CPU and register the clock event in the framework. */ -static void setup_APIC_timer(void) +static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); -- cgit v1.2.3 From 7c37e48b5125fdb0acac846a0a3af42806175d44 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:40 -0700 Subject: x86: apic - introduce get_physical_broadcast for 64bit We don't really use it now on 64bit mode but could reserve it for future. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 8f551276681..bb46711a0b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,16 @@ void __cpuinit enable_NMI_through_LVT0(void) apic_write(APIC_LVT0, v); } +#ifdef CONFIG_X86_32 +/** + * get_physical_broadcast - Get number of physical broadcast IDs + */ +int get_physical_broadcast(void) +{ + return modern_apic() ? 0xff : 0xf; +} +#endif + /** * lapic_get_maxlvt - get the maximum number of local vector table entries */ -- cgit v1.2.3 From 920fa7a507c3b1004a9ebe07a2c9d38605b3406a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:41 -0700 Subject: x86: apic - unify setup_apicpmtimer Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 ++++++++++ arch/x86/kernel/apic_64.c | 2 ++ 2 files changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 21c831d96af..df6ed603e54 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1774,6 +1774,16 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); +#ifdef CONFIG_X86_64 +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + static int __init apic_set_verbosity(char *arg) { if (!arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index bb46711a0b1..ddc5b245faf 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1810,6 +1810,7 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); +#ifdef CONFIG_X86_64 static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; @@ -1817,6 +1818,7 @@ static __init int setup_apicpmtimer(char *s) return 0; } __setup("apicpmtimer", setup_apicpmtimer); +#endif static int __init apic_set_verbosity(char *arg) { -- cgit v1.2.3 From 80e5609cabd7e4321769701a70297f819a15b08d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:42 -0700 Subject: x86: apic_64.c - add sanity check for spurious vector definition Do not check for SPUTIOUS_APIC_VECTOR definition twice. Check it once - is what we need. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ddc5b245faf..4587e16f73e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,13 @@ #include #include +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; @@ -939,8 +946,6 @@ void __cpuinit setup_local_APIC(void) preempt_disable(); value = apic_read(APIC_LVR); - BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); - /* * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. -- cgit v1.2.3 From 89c38c2867ebe37c4c5aee23e7fa1bffb025b171 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:43 -0700 Subject: x86: apic - unify setup_local_APIC - remove useless read of APIC_LVR - wrap with preempt_disable/enable - check for integrated APIC just in place v2: fix by Yinghai Lu. fix lapic_is_integrated using let 64-bit too have pic_mode Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 21 ++++++++++++++----- arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index df6ed603e54..f8c1251984e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1033,9 +1033,10 @@ static void __cpuinit lapic_setup_esr(void) */ void __cpuinit setup_local_APIC(void) { - unsigned long value, integrated; + unsigned int value; int i, j; +#ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (esr_disable) { apic_write(APIC_ESR, 0); @@ -1043,14 +1044,16 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); } +#endif - integrated = lapic_is_integrated(); + preempt_disable(); /* * Double-check whether this APIC is really registered. + * This is meaningless in clustered apic mode, so we skip it. */ if (!apic_id_registered()) - WARN_ON_ONCE(1); + BUG(); /* * Intel recommends to set DFR, LDR and TPR before enabling @@ -1096,6 +1099,7 @@ void __cpuinit setup_local_APIC(void) */ value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* * Some unknown Intel IO/APIC (or APIC) errata is biting us with * certain networking cards. If high frequency interrupts are @@ -1116,8 +1120,13 @@ void __cpuinit setup_local_APIC(void) * See also the comment in end_level_ioapic_irq(). --macro */ - /* Enable focus processor (bit==0) */ + /* + * - enable focus processor (bit==0) + * - 64bit mode always use processor focus + * so no need to set it + */ value &= ~APIC_SPIV_FOCUS_DISABLED; +#endif /* * Set spurious IRQ vector @@ -1154,9 +1163,11 @@ void __cpuinit setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!integrated) /* 82489DX */ + if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); + + preempt_enable(); } void __cpuinit end_local_APIC_setup(void) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4587e16f73e..283968d4e02 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -76,6 +76,8 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; */ unsigned int apic_verbosity; +int pic_mode; + /* Have we found an MP table */ int smp_found_config; @@ -943,8 +945,17 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; +#ifdef CONFIG_X86_32 + /* Pound the ESR really hard over the head with a big hammer - mbligh */ + if (esr_disable) { + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + } +#endif + preempt_disable(); - value = apic_read(APIC_LVR); /* * Double-check whether this APIC is really registered. @@ -997,7 +1008,34 @@ void __cpuinit setup_local_APIC(void) */ value |= APIC_SPIV_APIC_ENABLED; - /* We always use processor focus */ +#ifdef CONFIG_X86_32 + /* + * Some unknown Intel IO/APIC (or APIC) errata is biting us with + * certain networking cards. If high frequency interrupts are + * happening on a particular IOAPIC pin, plus the IOAPIC routing + * entry is masked/unmasked at a high rate as well then sooner or + * later IOAPIC line gets 'stuck', no more interrupts are received + * from the device. If focus CPU is disabled then the hang goes + * away, oh well :-( + * + * [ This bug can be reproduced easily with a level-triggered + * PCI Ne2000 networking cards and PII/PIII processors, dual + * BX chipset. ] + */ + /* + * Actually disabling the focus CPU check just makes the hang less + * frequent as it makes the interrupt distributon model be more + * like LRU than MRU (the short-term load is more even across CPUs). + * See also the comment in end_level_ioapic_irq(). --macro + */ + + /* + * - enable focus processor (bit==0) + * - 64bit mode always use processor focus + * so no need to set it + */ + value &= ~APIC_SPIV_FOCUS_DISABLED; +#endif /* * Set spurious IRQ vector @@ -1016,14 +1054,14 @@ void __cpuinit setup_local_APIC(void) * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!smp_processor_id() && !value) { + if (!smp_processor_id() && (pic_mode || !value)) { value = APIC_DM_EXTINT; apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", - smp_processor_id()); + smp_processor_id()); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", - smp_processor_id()); + smp_processor_id()); } apic_write(APIC_LVT0, value); @@ -1034,7 +1072,10 @@ void __cpuinit setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); + preempt_enable(); } -- cgit v1.2.3 From 457cc52d4670bcf1470606a108bbf35aac28eb7f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:44 -0700 Subject: x86: apic_32.c should use __cpuinit section All callers are __init or __cpuinit so there is no need to hold this code without CPU_HOTPLUG being set. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f8c1251984e..4ca3717b302 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -383,7 +383,7 @@ static void lapic_timer_broadcast(cpumask_t mask) * Setup the local APIC timer for this CPU. Copy the initilized values * of the boot CPU and register the clock event in the framework. */ -static void __devinit setup_APIC_timer(void) +static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); @@ -652,7 +652,7 @@ void __init setup_boot_APIC_clock(void) setup_APIC_timer(); } -void __devinit setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { setup_APIC_timer(); } @@ -1713,7 +1713,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __devinit apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } -- cgit v1.2.3 From 6460bc73aac970135104a0bc407c2c8b85394d59 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:45 -0700 Subject: x86: apic - unify smp_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +++ arch/x86/kernel/apic_64.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4ca3717b302..913d9924b10 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -718,6 +718,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); local_apic_timer_interrupt(); irq_exit(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 283968d4e02..2e109119f64 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -625,7 +625,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); local_apic_timer_interrupt(); irq_exit(); -- cgit v1.2.3 From b3c5117050e8028d48b2fa0ea09c7a50dd7f3414 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:46 -0700 Subject: x86: apic_xx.c order variables Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 46 +++++++++++++++++++++++---------------------- arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 913d9924b10..a54512bea62 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -50,16 +50,37 @@ # error SPURIOUS_APIC_VECTOR definition error #endif -unsigned long mp_lapic_addr; - +#ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. * * +1=force-enable */ static int force_enable_local_apic; -int disable_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +#endif +#ifdef CONFIG_X86_64 +static int apic_calibrate_pmtmr __initdata; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + +unsigned long mp_lapic_addr; +int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -1742,15 +1763,6 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); static int __init setup_disableapic(char *arg) { @@ -1788,16 +1800,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -#ifdef CONFIG_X86_64 -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - static int __init apic_set_verbosity(char *arg) { if (!arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 2e109119f64..c40a900e155 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -53,16 +53,44 @@ # error SPURIOUS_APIC_VECTOR definition error #endif -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; +#ifdef CONFIG_X86_32 +/* + * Knob to control our willingness to enable the local APIC. + * + * +1=force-enable + */ +static int force_enable_local_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +#endif + +#ifdef CONFIG_X86_64 static int apic_calibrate_pmtmr __initdata; -int disable_apic; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + int disable_x2apic; int x2apic; - /* x2apic enabled before OS handover */ int x2apic_preenabled; +unsigned long mp_lapic_addr; +int disable_apic; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -113,8 +141,6 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; -unsigned long mp_lapic_addr; - /* * Get the LAPIC version */ @@ -1858,16 +1884,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -#ifdef CONFIG_X86_64 -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - static int __init apic_set_verbosity(char *arg) { if (!arg) { -- cgit v1.2.3 From 49899eacce79ce39faf531dad3e00f771eba2eb1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:47 -0700 Subject: x86: use HAVE_X2APIC in apic_64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index c40a900e155..d3ec746aede 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -82,10 +82,23 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -int disable_x2apic; +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC int x2apic; /* x2apic enabled before OS handover */ int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif unsigned long mp_lapic_addr; int disable_apic; @@ -228,6 +241,7 @@ static struct apic_ops xapic_ops = { struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +#ifdef HAVE_X2APIC static void x2apic_wait_icr_idle(void) { /* no need to wait for icr idle in x2apic */ @@ -261,6 +275,7 @@ static struct apic_ops x2apic_ops = { .wait_icr_idle = x2apic_wait_icr_idle, .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, }; +#endif /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 @@ -1125,6 +1140,7 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef HAVE_X2APIC void check_x2apic(void) { int msr, msr2; @@ -1243,6 +1259,7 @@ end: return; } +#endif /* HAVE_X2APIC */ /* * Detect and enable local APICs on non-SMP boards. @@ -1291,10 +1308,12 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { +#ifdef HAVE_X2APIC if (x2apic) { boot_cpu_physical_apicid = read_apic_id(); return; } +#endif /* * If no local APIC can be found then set up a fake all @@ -1335,8 +1354,9 @@ int __init APIC_init_uniprocessor(void) printk(KERN_INFO "Apic disabled by BIOS\n"); return -1; } - +#ifdef HAVE_X2APIC enable_IR_x2apic(); +#endif setup_apic_routing(); verify_local_APIC(); @@ -1672,7 +1692,7 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_64 +#ifdef HAVE_X2APIC if (x2apic) enable_x2apic(); else @@ -1836,15 +1856,6 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); - - /* * APIC command line parameters */ -- cgit v1.2.3 From 3491998dd54f6d4ef7344518fe5463b299fdf537 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:48 -0700 Subject: x86: add hard_smp_prossor_id with MACRO in io_apic_xx.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 7 +++++++ arch/x86/kernel/apic_64.c | 2 ++ 2 files changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a54512bea62..93718ffb9b9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1600,6 +1600,13 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +#ifdef CONFIG_X86_64 +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} +#endif + /* * Power management */ diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d3ec746aede..eeb69838c2f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1612,10 +1612,12 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +#ifdef CONFIG_X86_64 int hard_smp_processor_id(void) { return read_apic_id(); } +#endif /* * Power management -- cgit v1.2.3 From f28c0ae21d80ffd6eb0987901c5273843387e341 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:49 -0700 Subject: x86: make apic_32/64.c more like except x2apic, detec_init_APIC, and calibrating_APIC_clock Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 125 +++++++++++++++++++++++++++++++++++++++++----- arch/x86/kernel/apic_64.c | 10 +++- 2 files changed, 122 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 93718ffb9b9..bfac26a1609 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -66,6 +66,9 @@ static int __init parse_lapic(char *arg) return 0; } early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + #endif #ifdef CONFIG_X86_64 @@ -131,9 +134,6 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - static unsigned long apic_phys; /* @@ -240,6 +240,7 @@ void __cpuinit enable_NMI_through_LVT0(void) apic_write(APIC_LVT0, v); } +#ifdef CONFIG_X86_32 /** * get_physical_broadcast - Get number of physical broadcast IDs */ @@ -247,6 +248,7 @@ int get_physical_broadcast(void) { return modern_apic() ? 0xff : 0xf; } +#endif /** * lapic_get_maxlvt - get the maximum number of local vector table entries @@ -1291,6 +1293,32 @@ no_apic: return -1; } +#ifdef CONFIG_X86_64 +void __init early_init_lapic_mapping(void) +{ + unsigned long phys_addr; + + /* + * If no local APIC can be found then go out + * : it means there is no mpatable and MADT + */ + if (!smp_found_config) + return; + + phys_addr = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, phys_addr); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + boot_cpu_physical_apicid = read_apic_id(); +} +#endif + /** * init_apic_mappings - initialize APIC mappings */ @@ -1308,8 +1336,8 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, - apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, apic_phys); /* * Fetch the APIC ID of the BSP in case we have a @@ -1317,14 +1345,12 @@ void __init init_apic_mappings(void) */ if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = read_apic_id(); - } /* * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ - int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) @@ -1682,11 +1708,6 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_64 - if (x2apic) - enable_x2apic(); - else -#endif { /* * Make sure the APICBASE points to the right address @@ -1770,7 +1791,87 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +#ifdef CONFIG_X86_64 +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. + */ +__cpuinit int apic_is_clustered_box(void) +{ + int i, clusters, zeros; + unsigned id; + u16 *bios_cpu_apicid; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + /* + * there is not this kind of box with AMD CPU yet. + * Some AMD box with quadcore cpu and 8 sockets apicid + * will be [4, 0x23] or [8, 0x27] could be thought to + * vsmp box still need checking... + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) + return 0; + + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + /* are we being called early in kernel startup? */ + if (bios_cpu_apicid) { + id = bios_cpu_apicid[i]; + } + else if (i < nr_cpu_ids) { + if (cpu_present(i)) + id = per_cpu(x86_bios_cpu_apicid, i); + else + continue; + } + else + break; + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. + * Since clusters are allocated sequentially, count zeros only if + * they are bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards + */ + if (is_vsmp_box() && clusters > 1) + return 1; + + /* + * If clusters > 2, then should be multi-chassis. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); +} +#endif + +/* + * APIC command line parameters + */ static int __init setup_disableapic(char *arg) { disable_apic = 1; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eeb69838c2f..dca6c246e73 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -69,6 +69,9 @@ static int __init parse_lapic(char *arg) return 0; } early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + #endif #ifdef CONFIG_X86_64 @@ -1279,6 +1282,7 @@ static int __init detect_init_APIC(void) return 0; } +#ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) { unsigned long phys_addr; @@ -1302,6 +1306,7 @@ void __init early_init_lapic_mapping(void) */ boot_cpu_physical_apicid = read_apic_id(); } +#endif /** * init_apic_mappings - initialize APIC mappings @@ -1334,7 +1339,8 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = read_apic_id(); + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = read_apic_id(); } /* @@ -1782,6 +1788,7 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +#ifdef CONFIG_X86_64 /* * apic_is_clustered_box() -- Check if we can expect good TSC * @@ -1857,6 +1864,7 @@ __cpuinit int apic_is_clustered_box(void) */ return (clusters > 2); } +#endif /* * APIC command line parameters -- cgit v1.2.3 From fa2bd35a8d5c88c03b638c72daf7f38a132d0e8c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:50 -0700 Subject: x86: merge APIC_init_uniprocessor Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 51 +++++++++++++++++++++++++++++++++++++++++------ arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index bfac26a1609..8f8b0e1f3eb 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1355,6 +1355,17 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { +#ifdef CONFIG_X86_64 + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } +#else if (!smp_found_config && !cpu_has_apic) return -1; @@ -1368,34 +1379,62 @@ int __init APIC_init_uniprocessor(void) clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } +#endif +#ifdef HAVE_X2APIC + enable_IR_x2apic(); +#endif +#ifdef CONFIG_X86_64 + setup_apic_routing(); +#endif verify_local_APIC(); - connect_bsp_APIC(); +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else /* * Hack: In case of kdump, after a crash, kernel might be booting * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid * might be zero if read from MP tables. Get it from LAPIC. */ -#ifdef CONFIG_CRASH_DUMP +# ifdef CONFIG_CRASH_DUMP boot_cpu_physical_apicid = read_apic_id(); +# endif #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); +#ifdef CONFIG_X86_64 + /* + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP + */ + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); +#endif + #ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) #endif localise_nmi_watchdog(); end_local_APIC_setup(); + #ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +# ifdef CONFIG_X86_64 + else + nr_ioapics = 0; +# endif #endif + +#ifdef CONFIG_X86_64 + setup_boot_APIC_clock(); + check_nmi_watchdog(); +#else setup_boot_clock(); +#endif return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dca6c246e73..16a30c22b07 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1351,6 +1351,7 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { +#ifdef CONFIG_X86_64 if (disable_apic) { printk(KERN_INFO "Apic disabled\n"); return -1; @@ -1360,37 +1361,78 @@ int __init APIC_init_uniprocessor(void) printk(KERN_INFO "Apic disabled by BIOS\n"); return -1; } +#else + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && + APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + return -1; + } +#endif + #ifdef HAVE_X2APIC enable_IR_x2apic(); #endif +#ifdef CONFIG_X86_64 setup_apic_routing(); +#endif verify_local_APIC(); - connect_bsp_APIC(); - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); +#ifdef CONFIG_X86_64 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); - +#else + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +# ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = read_apic_id(); +# endif +#endif + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); setup_local_APIC(); +#ifdef CONFIG_X86_64 /* * Now enable IO-APICs, actually call clear_IO_APIC * We need clear_IO_APIC before enabling vector on BP */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); +#endif +#ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) +#endif localise_nmi_watchdog(); end_local_APIC_setup(); +#ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); +# ifdef CONFIG_X86_64 else nr_ioapics = 0; +# endif +#endif + +#ifdef CONFIG_X86_64 setup_boot_APIC_clock(); check_nmi_watchdog(); +#else + setup_boot_clock(); +#endif + return 0; } -- cgit v1.2.3 From be7a656fe131cba088912bcafb079b029320504d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:51 -0700 Subject: x86: copy detect_init_APIC to the other Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 20 ++++++++++++ arch/x86/kernel/apic_64.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8f8b0e1f3eb..1103e05aa1b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1214,6 +1214,25 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef CONFIG_X86_64 +/* + * Detect and enable local APICs on non-SMP boards. + * Original code written by Keir Fraser. + * On AMD64 we trust the BIOS - if it says no APIC it is likely + * not correctly set up (usually the APIC timer won't work etc.) + */ +static int __init detect_init_APIC(void) +{ + if (!cpu_has_apic) { + printk(KERN_INFO "No local APIC present\n"); + return -1; + } + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_physical_apicid = 0; + return 0; +} +#else /* * Detect and initialize APIC */ @@ -1292,6 +1311,7 @@ no_apic: printk(KERN_INFO "No local APIC present or hardware disabled\n"); return -1; } +#endif #ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 16a30c22b07..b7268f5c8b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -684,7 +684,6 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } - /* * Local APIC start and shutdown */ @@ -1264,6 +1263,7 @@ end: } #endif /* HAVE_X2APIC */ +#ifdef CONFIG_X86_64 /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -1281,6 +1281,86 @@ static int __init detect_init_APIC(void) boot_cpu_physical_apicid = 0; return 0; } +#else +/* + * Detect and initialize APIC + */ +static int __init detect_init_APIC(void) +{ + u32 h, l, features; + + /* Disabled by kernel option? */ + if (disable_apic) + return -1; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || + (boot_cpu_data.x86 == 15)) + break; + goto no_apic; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || + (boot_cpu_data.x86 == 5 && cpu_has_apic)) + break; + goto no_apic; + default: + goto no_apic; + } + + if (!cpu_has_apic) { + /* + * Over-ride BIOS and try to enable the local APIC only if + * "lapic" specified. + */ + if (!force_enable_local_apic) { + printk(KERN_INFO "Local APIC disabled by BIOS -- " + "you can enable it with \"lapic\"\n"); + return -1; + } + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + printk(KERN_INFO + "Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + printk(KERN_WARNING "Could not enable APIC!\n"); + return -1; + } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + printk(KERN_INFO "Found and enabled local APIC!\n"); + + apic_pm_activate(); + + return 0; + +no_apic: + printk(KERN_INFO "No local APIC present or hardware disabled\n"); + return -1; +} +#endif #ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) -- cgit v1.2.3 From 773763df7de881e65ff2600c024c9ce2dde64750 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:52 -0700 Subject: x86: merge header files in apic_xx.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++++ arch/x86/kernel/apic_64.c | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 1103e05aa1b..0ec8321e687 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -36,8 +38,14 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include #include #include diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index b7268f5c8b1..ebe417b4d7f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -24,9 +24,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -34,8 +36,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -43,8 +47,9 @@ #include #include -#include #include +#include +#include /* * Sanity check -- cgit v1.2.3 From dc1528dd864a0b79fa67b60b3ca5674fe94fdce5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:53 -0700 Subject: x86: apic unify smp_spurious/error_interrupt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 24 +++++++++++++++++++++--- arch/x86/kernel/apic_64.c | 24 ++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 0ec8321e687..60a901b2d4f 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1474,10 +1474,17 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_spurious_interrupt(void) +#else void smp_spurious_interrupt(struct pt_regs *regs) +#endif { - unsigned long v; + u32 v; +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -1488,20 +1495,31 @@ void smp_spurious_interrupt(struct pt_regs *regs) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); +#ifdef CONFIG_X86_64 + add_pda(irq_spurious_count, 1); +#else /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); __get_cpu_var(irq_stat).irq_spurious_count++; +#endif irq_exit(); } /* * This interrupt should never happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_error_interrupt(void) +#else void smp_error_interrupt(struct pt_regs *regs) +#endif { - unsigned long v, v1; + u32 v, v1; +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); @@ -1520,7 +1538,7 @@ void smp_error_interrupt(struct pt_regs *regs) 6: Received illegal vector 7: Illegal register address */ - printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); irq_exit(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ebe417b4d7f..c728885e4f4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1528,10 +1528,17 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 asmlinkage void smp_spurious_interrupt(void) +#else +void smp_spurious_interrupt(struct pt_regs *regs) +#endif { - unsigned int v; + u32 v; + +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -1542,18 +1549,31 @@ asmlinkage void smp_spurious_interrupt(void) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); +#ifdef CONFIG_X86_64 add_pda(irq_spurious_count, 1); +#else + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " + "should never happen.\n", smp_processor_id()); + __get_cpu_var(irq_stat).irq_spurious_count++; +#endif irq_exit(); } /* * This interrupt should never happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 asmlinkage void smp_error_interrupt(void) +#else +void smp_error_interrupt(struct pt_regs *regs) +#endif { - unsigned int v, v1; + u32 v, v1; +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); -- cgit v1.2.3 From 2f04fa888d270951b9e0fe9e641ddd560d77ad1b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:54 -0700 Subject: x86: apic copy calibrate_APIC_clock to each other in apic_32/64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 86 +++++++++++++++++++ arch/x86/kernel/apic_64.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60a901b2d4f..05498c37f8d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -424,6 +424,90 @@ static void __cpuinit setup_APIC_timer(void) clockevents_register_device(levt); } +#ifdef CONFIG_X86_64 +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +#define TICK_COUNT 100000000 + +static int __init calibrate_APIC_clock(void) +{ + unsigned apic, apic_start; + unsigned long tsc, tsc_start; + int result; + + local_irq_disable(); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + * + * No interrupt enable ! + */ + __setup_APIC_LVTT(250000000, 0, 0); + + apic_start = apic_read(APIC_TMCCT); +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ + apic = apic_read(APIC_TMCCT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscll(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscll(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic_start - apic) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * tsc_khz / + (tsc - tsc_start); + } + + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + + printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", + result / 1000 / 1000, result / 1000 % 1000); + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (result * APIC_DIVISOR) / HZ; + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + return 0; +} + +#else /* * In this functions we calibrate APIC bus clocks to the external timer. * @@ -635,6 +719,8 @@ static int __init calibrate_APIC_clock(void) return 0; } +#endif + /* * Setup the boot APIC * diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index c728885e4f4..6e99af5ce67 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -478,6 +478,7 @@ static void __cpuinit setup_APIC_timer(void) clockevents_register_device(levt); } +#ifdef CONFIG_X86_64 /* * In this function we calibrate APIC bus clocks to the external * timer. Unfortunately we cannot use jiffies and the timer irq @@ -560,6 +561,220 @@ static int __init calibrate_APIC_clock(void) return 0; } +#else +/* + * In this functions we calibrate APIC bus clocks to the external timer. + * + * We want to do the calibration only once since we want to have local timer + * irqs syncron. CPUs connected by the same APIC bus have the very same bus + * frequency. + * + * This was previously done by reading the PIT/HPET and waiting for a wrap + * around to find out, that a tick has elapsed. I have a box, where the PIT + * readout is broken, so it never gets out of the wait loop again. This was + * also reported by others. + * + * Monitoring the jiffies value is inaccurate and the clockevents + * infrastructure allows us to do a simple substitution of the interrupt + * handler. + * + * The calibration routine also uses the pm_timer when possible, as the PIT + * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes + * back to normal later in the boot process). + */ + +#define LAPIC_CAL_LOOPS (HZ/10) + +static __initdata int lapic_cal_loops = -1; +static __initdata long lapic_cal_t1, lapic_cal_t2; +static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; +static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; + +/* + * Temporary interrupt handler. + */ +static void __init lapic_cal_handler(struct clock_event_device *dev) +{ + unsigned long long tsc = 0; + long tapic = apic_read(APIC_TMCCT); + unsigned long pm = acpi_pm_read_early(); + + if (cpu_has_tsc) + rdtscll(tsc); + + switch (lapic_cal_loops++) { + case 0: + lapic_cal_t1 = tapic; + lapic_cal_tsc1 = tsc; + lapic_cal_pm1 = pm; + lapic_cal_j1 = jiffies; + break; + + case LAPIC_CAL_LOOPS: + lapic_cal_t2 = tapic; + lapic_cal_tsc2 = tsc; + if (pm < lapic_cal_pm1) + pm += ACPI_PM_OVRRUN; + lapic_cal_pm2 = pm; + lapic_cal_j2 = jiffies; + break; + } +} + +static int __init calibrate_APIC_clock(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + const long pm_100ms = PMTMR_TICKS_PER_SEC/10; + const long pm_thresh = pm_100ms/100; + void (*real_handler)(struct clock_event_device *dev); + unsigned long deltaj; + long delta, deltapm; + int pm_referenced = 0; + + local_irq_disable(); + + /* Replace the global interrupt handler */ + real_handler = global_clock_event->event_handler; + global_clock_event->event_handler = lapic_cal_handler; + + /* + * Setup the APIC counter to 1e9. There is no way the lapic + * can underflow in the 100ms detection time frame + */ + __setup_APIC_LVTT(1000000000, 0, 0); + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Restore the real event handler */ + global_clock_event->event_handler = real_handler; + + /* Build delta t1-t2 as apic timer counts down */ + delta = lapic_cal_t1 - lapic_cal_t2; + apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + + /* Check, if the PM timer is available */ + deltapm = lapic_cal_pm2 - lapic_cal_pm1; + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + if (deltapm) { + unsigned long mult; + u64 res; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64) deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64) delta) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long) res, delta); + delta = (long) res; + } + pm_referenced = 1; + } + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + + apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); + apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); + apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", + calibration_result); + + if (cpu_has_tsc) { + delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + apic_printk(APIC_VERBOSE, "..... CPU clock speed is " + "%ld.%04ld MHz.\n", + (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + } + + apic_printk(APIC_VERBOSE, "..... host bus clock speed is " + "%u.%04u MHz.\n", + calibration_result / (1000000 / HZ), + calibration_result % (1000000 / HZ)); + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + local_irq_enable(); + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; + + /* We trust the pm timer based calibration */ + if (!pm_referenced) { + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); + + /* + * Setup the apic timer manually + */ + levt->event_handler = lapic_cal_handler; + lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); + lapic_cal_loops = -1; + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Stop the lapic timer */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + + local_irq_enable(); + + /* Jiffies delta */ + deltaj = lapic_cal_j2 - lapic_cal_j1; + apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + + /* Check, if the jiffies result is consistent */ + if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) + apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); + else + levt->features |= CLOCK_EVT_FEAT_DUMMY; + } else + local_irq_enable(); + + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { + printk(KERN_WARNING + "APIC timer disabled due to verification failure.\n"); + return -1; + } + + return 0; +} + +#endif + /* * Setup the boot APIC * -- cgit v1.2.3 From 6c15822752a13748241e1a34068f2b15b660d28e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:55 -0700 Subject: x86: apic copy apic_64.c to apic_32.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 05498c37f8d..0b11d6e3f09 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -90,6 +90,24 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC +int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif + unsigned long mp_lapic_addr; int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ @@ -231,6 +249,42 @@ static struct apic_ops xapic_ops = { struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +#ifdef HAVE_X2APIC +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; +#endif + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -1308,6 +1362,127 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef HAVE_X2APIC +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} +#endif /* HAVE_X2APIC */ + #ifdef CONFIG_X86_64 /* * Detect and enable local APICs on non-SMP boards. @@ -1438,6 +1613,13 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { +#ifdef HAVE_X2APIC + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } +#endif + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -1501,6 +1683,7 @@ int __init APIC_init_uniprocessor(void) #ifdef CONFIG_X86_64 setup_apic_routing(); #endif + verify_local_APIC(); connect_bsp_APIC(); @@ -1879,6 +2062,11 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); +#ifdef HAVE_X2APIC + if (x2apic) + enable_x2apic(); + else +#endif { /* * Make sure the APICBASE points to the right address -- cgit v1.2.3 From 0f611ffaea81b8e1c69682188ba1ccaf7683a2ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:56 -0700 Subject: x86: rename apic_32.c and apic_64.c to apic.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/apic.c | 2311 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/apic_32.c | 2312 --------------------------------------------- arch/x86/kernel/apic_64.c | 2311 -------------------------------------------- 4 files changed, 2312 insertions(+), 4624 deletions(-) create mode 100644 arch/x86/kernel/apic.c delete mode 100644 arch/x86/kernel/apic_32.c delete mode 100644 arch/x86/kernel/apic_64.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7264f9c3af8..45cecc59d89 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c new file mode 100644 index 00000000000..6e99af5ce67 --- /dev/null +++ b/arch/x86/kernel/apic.c @@ -0,0 +1,2311 @@ +/* + * Local APIC handling, local APIC timers + * + * (c) 1999, 2000 Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively. + * Maciej W. Rozycki : Various updates and fixes. + * Mikael Pettersson : Power Management for UP-APIC. + * Pavel Machek and + * Mikael Pettersson : PM converted to driver model. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + +#ifdef CONFIG_X86_32 +/* + * Knob to control our willingness to enable the local APIC. + * + * +1=force-enable + */ +static int force_enable_local_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + +#endif + +#ifdef CONFIG_X86_64 +static int apic_calibrate_pmtmr __initdata; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC +int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif + +unsigned long mp_lapic_addr; +int disable_apic; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int disable_apic_timer __cpuinitdata; +/* Local APIC timer works in C2 */ +int local_apic_timer_c2_ok; +EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); + +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + +/* + * Debug level, exported for io_apic.c + */ +unsigned int apic_verbosity; + +int pic_mode; + +/* Have we found an MP table */ +int smp_found_config; + +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + +static unsigned int calibration_result; + +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt); +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt); +static void lapic_timer_broadcast(cpumask_t mask); +static void apic_pm_activate(void); + +/* + * The local apic timer can be used for any function which is CPU local. + */ +static struct clock_event_device lapic_clockevent = { + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT + | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + +static unsigned long apic_phys; + +/* + * Get the LAPIC version + */ +static inline int lapic_get_version(void) +{ + return GET_APIC_VERSION(apic_read(APIC_LVR)); +} + +/* + * Check, if the APIC is integrated or a separate chip + */ +static inline int lapic_is_integrated(void) +{ +#ifdef CONFIG_X86_64 + return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif +} + +/* + * Check, whether this is a modern or a first generation APIC + */ +static int modern_apic(void) +{ + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0xf) + return 1; + return lapic_get_version() >= 0x14; +} + +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ +void xapic_wait_icr_idle(void) +{ + while (apic_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + +u32 safe_xapic_wait_icr_idle(void) +{ + u32 send_status; + int timeout; + + timeout = 0; + do { + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + if (!send_status) + break; + udelay(100); + } while (timeout++ < 1000); + + return send_status; +} + +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + +#ifdef HAVE_X2APIC +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; +#endif + +/** + * enable_NMI_through_LVT0 - enable NMI through local vector table 0 + */ +void __cpuinit enable_NMI_through_LVT0(void) +{ + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + + apic_write(APIC_LVT0, v); +} + +#ifdef CONFIG_X86_32 +/** + * get_physical_broadcast - Get number of physical broadcast IDs + */ +int get_physical_broadcast(void) +{ + return modern_apic() ? 0xff : 0xf; +} +#endif + +/** + * lapic_get_maxlvt - get the maximum number of local vector table entries + */ +int lapic_get_maxlvt(void) +{ + unsigned int v; + + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; +} + +/* + * Local APIC timer + */ + +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif + +/* + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. + */ +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) +{ + unsigned int lvtt_value, tmp_value; + + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + + if (!irqen) + lvtt_value |= APIC_LVT_MASKED; + + apic_write(APIC_LVTT, lvtt_value); + + /* + * Divide PICLK by 16 + */ + tmp_value = apic_read(APIC_TDCR); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); + + if (!oneshot) + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); +} + +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + * + * If mask=1, the LVT entry does not generate interrupts while mask=0 + * enables the vector. See also the BKDGs. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} +EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); + +/* + * Program the next event, relative to now + */ +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + apic_write(APIC_TMICT, delta); + return 0; +} + +/* + * Setup the lapic timer in periodic or oneshot mode + */ +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + unsigned long flags; + unsigned int v; + + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return; + + local_irq_save(flags); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC, 1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; + } + + local_irq_restore(flags); +} + +/* + * Local APIC timer broadcast function + */ +static void lapic_timer_broadcast(cpumask_t mask) +{ +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#endif +} + +/* + * Setup the local APIC timer for this CPU. Copy the initilized values + * of the boot CPU and register the clock event in the framework. + */ +static void __cpuinit setup_APIC_timer(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); + + clockevents_register_device(levt); +} + +#ifdef CONFIG_X86_64 +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +#define TICK_COUNT 100000000 + +static int __init calibrate_APIC_clock(void) +{ + unsigned apic, apic_start; + unsigned long tsc, tsc_start; + int result; + + local_irq_disable(); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + * + * No interrupt enable ! + */ + __setup_APIC_LVTT(250000000, 0, 0); + + apic_start = apic_read(APIC_TMCCT); +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ + apic = apic_read(APIC_TMCCT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscll(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscll(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic_start - apic) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * tsc_khz / + (tsc - tsc_start); + } + + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + + printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", + result / 1000 / 1000, result / 1000 % 1000); + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (result * APIC_DIVISOR) / HZ; + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + return 0; +} + +#else +/* + * In this functions we calibrate APIC bus clocks to the external timer. + * + * We want to do the calibration only once since we want to have local timer + * irqs syncron. CPUs connected by the same APIC bus have the very same bus + * frequency. + * + * This was previously done by reading the PIT/HPET and waiting for a wrap + * around to find out, that a tick has elapsed. I have a box, where the PIT + * readout is broken, so it never gets out of the wait loop again. This was + * also reported by others. + * + * Monitoring the jiffies value is inaccurate and the clockevents + * infrastructure allows us to do a simple substitution of the interrupt + * handler. + * + * The calibration routine also uses the pm_timer when possible, as the PIT + * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes + * back to normal later in the boot process). + */ + +#define LAPIC_CAL_LOOPS (HZ/10) + +static __initdata int lapic_cal_loops = -1; +static __initdata long lapic_cal_t1, lapic_cal_t2; +static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; +static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; + +/* + * Temporary interrupt handler. + */ +static void __init lapic_cal_handler(struct clock_event_device *dev) +{ + unsigned long long tsc = 0; + long tapic = apic_read(APIC_TMCCT); + unsigned long pm = acpi_pm_read_early(); + + if (cpu_has_tsc) + rdtscll(tsc); + + switch (lapic_cal_loops++) { + case 0: + lapic_cal_t1 = tapic; + lapic_cal_tsc1 = tsc; + lapic_cal_pm1 = pm; + lapic_cal_j1 = jiffies; + break; + + case LAPIC_CAL_LOOPS: + lapic_cal_t2 = tapic; + lapic_cal_tsc2 = tsc; + if (pm < lapic_cal_pm1) + pm += ACPI_PM_OVRRUN; + lapic_cal_pm2 = pm; + lapic_cal_j2 = jiffies; + break; + } +} + +static int __init calibrate_APIC_clock(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + const long pm_100ms = PMTMR_TICKS_PER_SEC/10; + const long pm_thresh = pm_100ms/100; + void (*real_handler)(struct clock_event_device *dev); + unsigned long deltaj; + long delta, deltapm; + int pm_referenced = 0; + + local_irq_disable(); + + /* Replace the global interrupt handler */ + real_handler = global_clock_event->event_handler; + global_clock_event->event_handler = lapic_cal_handler; + + /* + * Setup the APIC counter to 1e9. There is no way the lapic + * can underflow in the 100ms detection time frame + */ + __setup_APIC_LVTT(1000000000, 0, 0); + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Restore the real event handler */ + global_clock_event->event_handler = real_handler; + + /* Build delta t1-t2 as apic timer counts down */ + delta = lapic_cal_t1 - lapic_cal_t2; + apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + + /* Check, if the PM timer is available */ + deltapm = lapic_cal_pm2 - lapic_cal_pm1; + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + if (deltapm) { + unsigned long mult; + u64 res; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64) deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64) delta) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long) res, delta); + delta = (long) res; + } + pm_referenced = 1; + } + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + + apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); + apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); + apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", + calibration_result); + + if (cpu_has_tsc) { + delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + apic_printk(APIC_VERBOSE, "..... CPU clock speed is " + "%ld.%04ld MHz.\n", + (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + } + + apic_printk(APIC_VERBOSE, "..... host bus clock speed is " + "%u.%04u MHz.\n", + calibration_result / (1000000 / HZ), + calibration_result % (1000000 / HZ)); + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + local_irq_enable(); + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; + + /* We trust the pm timer based calibration */ + if (!pm_referenced) { + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); + + /* + * Setup the apic timer manually + */ + levt->event_handler = lapic_cal_handler; + lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); + lapic_cal_loops = -1; + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Stop the lapic timer */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + + local_irq_enable(); + + /* Jiffies delta */ + deltaj = lapic_cal_j2 - lapic_cal_j1; + apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + + /* Check, if the jiffies result is consistent */ + if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) + apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); + else + levt->features |= CLOCK_EVT_FEAT_DUMMY; + } else + local_irq_enable(); + + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { + printk(KERN_WARNING + "APIC timer disabled due to verification failure.\n"); + return -1; + } + + return 0; +} + +#endif + +/* + * Setup the boot APIC + * + * Calibrate and verify the result. + */ +void __init setup_boot_APIC_clock(void) +{ + /* + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. + */ + if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) { + lapic_clockevent.mult = 1; + setup_APIC_timer(); + } + return; + } + + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + + if (calibrate_APIC_clock()) { + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); + return; + } + + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=%d!\n", nmi_watchdog); + + /* Setup the lapic or request the broadcast */ + setup_APIC_timer(); +} + +void __cpuinit setup_secondary_APIC_clock(void) +{ + setup_APIC_timer(); +} + +/* + * The guts of the apic timer interrupt + */ +static void local_apic_timer_interrupt(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + + /* + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. + * + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } + + /* + * the NMI deadlock-detector uses this. + */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif + + evt->event_handler(evt); +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesn't support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +void smp_apic_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + local_apic_timer_interrupt(); + irq_exit(); + + set_irq_regs(old_regs); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * Local APIC start and shutdown + */ + +/** + * clear_local_APIC - shutdown the local APIC + * + * This is called, when a CPU is disabled and before rebooting, so the state of + * the local APIC has no dangling leftovers. Also used to cleanout any BIOS + * leftovers during boot. + */ +void clear_local_APIC(void) +{ + int maxlvt; + u32 v; + + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + + maxlvt = lapic_get_maxlvt(); + /* + * Masking an LVT entry can trigger a local APIC error + * if the vector is zero. Mask LVTERR first to prevent this. + */ + if (maxlvt >= 3) { + v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + } + /* + * Careful: we have to set masks only first to deassert + * any level-triggered sources. + */ + v = apic_read(APIC_LVTT); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT1); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); + if (maxlvt >= 4) { + v = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); + } + + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif + /* + * Clean APIC state for other OSs: + */ + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); + if (maxlvt >= 3) + apic_write(APIC_LVTERR, APIC_LVT_MASKED); + if (maxlvt >= 4) + apic_write(APIC_LVTPC, APIC_LVT_MASKED); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ + unsigned int value; + + clear_local_APIC(); + + /* + * Disable APIC (implies clearing of registers + * for 82489DX!). + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif +} + +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ +void lapic_shutdown(void) +{ + unsigned long flags; + + if (!cpu_has_apic) + return; + + local_irq_save(flags); + +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + + + local_irq_restore(flags); +} + +/* + * This is to verify that we're looking at a real local APIC. + * Check these against your board if the CPUs aren't getting + * started for no apparent reason. + */ +int __init verify_local_APIC(void) +{ + unsigned int reg0, reg1; + + /* + * The version register is read-only in a real APIC. + */ + reg0 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); + apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); + reg1 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); + + /* + * The two version reads above should print the same + * numbers. If the second one is different, then we + * poke at a non-APIC. + */ + if (reg1 != reg0) + return 0; + + /* + * Check if the version looks reasonably. + */ + reg1 = GET_APIC_VERSION(reg0); + if (reg1 == 0x00 || reg1 == 0xff) + return 0; + reg1 = lapic_get_maxlvt(); + if (reg1 < 0x02 || reg1 == 0xff) + return 0; + + /* + * The ID register is read/write in a real APIC. + */ + reg0 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; + + /* + * The next two are just to see if we have sane values. + * They're only really relevant if we're in Virtual Wire + * compatibility mode, but most boxes are anymore. + */ + reg0 = apic_read(APIC_LVT0); + apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); + reg1 = apic_read(APIC_LVT1); + apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); + + return 1; +} + +/** + * sync_Arb_IDs - synchronize APIC bus arbitration IDs + */ +void __init sync_Arb_IDs(void) +{ + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); +} + +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) +{ + unsigned int value; + + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !cpu_has_apic) + return; + + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV, value); + + /* + * Set up the virtual wire mode. + */ + apic_write(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT1, value); +} + +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + +/** + * setup_local_APIC - setup the local APIC + */ +void __cpuinit setup_local_APIC(void) +{ + unsigned int value; + int i, j; + +#ifdef CONFIG_X86_32 + /* Pound the ESR really hard over the head with a big hammer - mbligh */ + if (esr_disable) { + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + } +#endif + + preempt_disable(); + + /* + * Double-check whether this APIC is really registered. + * This is meaningless in clustered apic mode, so we skip it. + */ + if (!apic_id_registered()) + BUG(); + + /* + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ + init_apic_ldr(); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write(APIC_TASKPRI, value); + + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< 1) || + (boot_cpu_data.x86 == 15)) + break; + goto no_apic; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || + (boot_cpu_data.x86 == 5 && cpu_has_apic)) + break; + goto no_apic; + default: + goto no_apic; + } + + if (!cpu_has_apic) { + /* + * Over-ride BIOS and try to enable the local APIC only if + * "lapic" specified. + */ + if (!force_enable_local_apic) { + printk(KERN_INFO "Local APIC disabled by BIOS -- " + "you can enable it with \"lapic\"\n"); + return -1; + } + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + printk(KERN_INFO + "Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + printk(KERN_WARNING "Could not enable APIC!\n"); + return -1; + } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + printk(KERN_INFO "Found and enabled local APIC!\n"); + + apic_pm_activate(); + + return 0; + +no_apic: + printk(KERN_INFO "No local APIC present or hardware disabled\n"); + return -1; +} +#endif + +#ifdef CONFIG_X86_64 +void __init early_init_lapic_mapping(void) +{ + unsigned long phys_addr; + + /* + * If no local APIC can be found then go out + * : it means there is no mpatable and MADT + */ + if (!smp_found_config) + return; + + phys_addr = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, phys_addr); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + boot_cpu_physical_apicid = read_apic_id(); +} +#endif + +/** + * init_apic_mappings - initialize APIC mappings + */ +void __init init_apic_mappings(void) +{ +#ifdef HAVE_X2APIC + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } +#endif + + /* + * If no local APIC can be found then set up a fake all + * zeroes page to simulate the local APIC and another + * one for the IO-APIC. + */ + if (!smp_found_config && detect_init_APIC()) { + apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + apic_phys = __pa(apic_phys); + } else + apic_phys = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, apic_phys); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = read_apic_id(); +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int apic_version[MAX_APICS]; + +int __init APIC_init_uniprocessor(void) +{ +#ifdef CONFIG_X86_64 + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } +#else + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && + APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + return -1; + } +#endif + +#ifdef HAVE_X2APIC + enable_IR_x2apic(); +#endif +#ifdef CONFIG_X86_64 + setup_apic_routing(); +#endif + + verify_local_APIC(); + connect_bsp_APIC(); + +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +# ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = read_apic_id(); +# endif +#endif + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); + setup_local_APIC(); + +#ifdef CONFIG_X86_64 + /* + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP + */ + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); +#endif + +#ifdef CONFIG_X86_IO_APIC + if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) +#endif + localise_nmi_watchdog(); + end_local_APIC_setup(); + +#ifdef CONFIG_X86_IO_APIC + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +# ifdef CONFIG_X86_64 + else + nr_ioapics = 0; +# endif +#endif + +#ifdef CONFIG_X86_64 + setup_boot_APIC_clock(); + check_nmi_watchdog(); +#else + setup_boot_clock(); +#endif + + return 0; +} + +/* + * Local APIC interrupts + */ + +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_spurious_interrupt(void) +#else +void smp_spurious_interrupt(struct pt_regs *regs) +#endif +{ + u32 v; + +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + /* + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. + */ + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); + +#ifdef CONFIG_X86_64 + add_pda(irq_spurious_count, 1); +#else + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " + "should never happen.\n", smp_processor_id()); + __get_cpu_var(irq_stat).irq_spurious_count++; +#endif + irq_exit(); +} + +/* + * This interrupt should never happen with our APIC/SMP architecture + */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_error_interrupt(void) +#else +void smp_error_interrupt(struct pt_regs *regs) +#endif +{ + u32 v, v1; + +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); + + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", + smp_processor_id(), v , v1); + irq_exit(); +} + +/** + * connect_bsp_APIC - attach the APIC to the interrupt system + */ +void __init connect_bsp_APIC(void) +{ +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif + enable_apic_mode(); +} + +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ +void disconnect_bsp_APIC(int virt_wire_setup) +{ + unsigned int value; + +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + + /* Go back to Virtual Wire compatibility mode */ + + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* + * For LVT0 make it edge triggered, active high, + * external and enabled + */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); + } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); +} + +void __cpuinit generic_processor_info(int apicid, int version) +{ + int cpu; + cpumask_t tmp_map; + + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { + /* + * x86_bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + cpu = 0; + } + if (apicid > max_physical_apicid) + max_physical_apicid = apicid; + +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) + /* are we being called early in kernel startup? */ + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + + cpu_to_apicid[cpu] = apicid; + bios_cpu_apicid[cpu] = apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = apicid; + per_cpu(x86_bios_cpu_apicid, cpu) = apicid; + } +#endif + + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); +} + +#ifdef CONFIG_X86_64 +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} +#endif + +/* + * Power management + */ +#ifdef CONFIG_PM + +static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ + int active; + /* r/w apic fields */ + unsigned int apic_id; + unsigned int apic_taskpri; + unsigned int apic_ldr; + unsigned int apic_dfr; + unsigned int apic_spiv; + unsigned int apic_lvtt; + unsigned int apic_lvtpc; + unsigned int apic_lvt0; + unsigned int apic_lvt1; + unsigned int apic_lvterr; + unsigned int apic_tmict; + unsigned int apic_tdcr; + unsigned int apic_thmr; +} apic_pm_state; + +static int lapic_suspend(struct sys_device *dev, pm_message_t state) +{ + unsigned long flags; + int maxlvt; + + if (!apic_pm_state.active) + return 0; + + maxlvt = lapic_get_maxlvt(); + + apic_pm_state.apic_id = apic_read(APIC_ID); + apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); + apic_pm_state.apic_ldr = apic_read(APIC_LDR); + apic_pm_state.apic_dfr = apic_read(APIC_DFR); + apic_pm_state.apic_spiv = apic_read(APIC_SPIV); + apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); + apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); + apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); + apic_pm_state.apic_tmict = apic_read(APIC_TMICT); + apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif + + local_irq_save(flags); + disable_local_APIC(); + local_irq_restore(flags); + return 0; +} + +static int lapic_resume(struct sys_device *dev) +{ + unsigned int l, h; + unsigned long flags; + int maxlvt; + + if (!apic_pm_state.active) + return 0; + + maxlvt = lapic_get_maxlvt(); + + local_irq_save(flags); + +#ifdef HAVE_X2APIC + if (x2apic) + enable_x2apic(); + else +#endif + { + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } + + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); + apic_write(APIC_ID, apic_pm_state.apic_id); + apic_write(APIC_DFR, apic_pm_state.apic_dfr); + apic_write(APIC_LDR, apic_pm_state.apic_ldr); + apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); + apic_write(APIC_SPIV, apic_pm_state.apic_spiv); + apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); + apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); + apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); + apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); + apic_write(APIC_TMICT, apic_pm_state.apic_tmict); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + + local_irq_restore(flags); + + return 0; +} + +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + +static struct sysdev_class lapic_sysclass = { + .name = "lapic", + .resume = lapic_resume, + .suspend = lapic_suspend, +}; + +static struct sys_device device_lapic = { + .id = 0, + .cls = &lapic_sysclass, +}; + +static void __cpuinit apic_pm_activate(void) +{ + apic_pm_state.active = 1; +} + +static int __init init_lapic_sysfs(void) +{ + int error; + + if (!cpu_has_apic) + return 0; + /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ + + error = sysdev_class_register(&lapic_sysclass); + if (!error) + error = sysdev_register(&device_lapic); + return error; +} +device_initcall(init_lapic_sysfs); + +#else /* CONFIG_PM */ + +static void apic_pm_activate(void) { } + +#endif /* CONFIG_PM */ + +#ifdef CONFIG_X86_64 +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. + */ +__cpuinit int apic_is_clustered_box(void) +{ + int i, clusters, zeros; + unsigned id; + u16 *bios_cpu_apicid; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + /* + * there is not this kind of box with AMD CPU yet. + * Some AMD box with quadcore cpu and 8 sockets apicid + * will be [4, 0x23] or [8, 0x27] could be thought to + * vsmp box still need checking... + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) + return 0; + + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + /* are we being called early in kernel startup? */ + if (bios_cpu_apicid) { + id = bios_cpu_apicid[i]; + } + else if (i < nr_cpu_ids) { + if (cpu_present(i)) + id = per_cpu(x86_bios_cpu_apicid, i); + else + continue; + } + else + break; + + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. + * Since clusters are allocated sequentially, count zeros only if + * they are bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards + */ + if (is_vsmp_box() && clusters > 1) + return 1; + + /* + * If clusters > 2, then should be multi-chassis. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); +} +#endif + +/* + * APIC command line parameters + */ +static int __init setup_disableapic(char *arg) +{ + disable_apic = 1; + setup_clear_cpu_cap(X86_FEATURE_APIC); + return 0; +} +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); + +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + +static int __init parse_disable_apic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); + +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + +static int __init lapic_insert_resource(void) +{ + if (!apic_phys) + return -1; + + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + + return 0; +} + +/* + * need call insert after e820_reserve_resources() + * that is using request_resource + */ +late_initcall(lapic_insert_resource); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c deleted file mode 100644 index 0b11d6e3f09..00000000000 --- a/arch/x86/kernel/apic_32.c +++ /dev/null @@ -1,2312 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - -#ifdef CONFIG_X86_32 -/* - * Knob to control our willingness to enable the local APIC. - * - * +1=force-enable - */ -static int force_enable_local_apic; -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -#endif - -#ifdef CONFIG_X86_64 -static int apic_calibrate_pmtmr __initdata; -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - -#ifdef CONFIG_X86_64 -#define HAVE_X2APIC -#endif - -#ifdef HAVE_X2APIC -int x2apic; -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - -unsigned long mp_lapic_addr; -int disable_apic; -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -/* - * Debug level, exported for io_apic.c - */ -unsigned int apic_verbosity; - -int pic_mode; - -/* Have we found an MP table */ -int smp_found_config; - -static struct resource lapic_resource = { - .name = "Local APIC", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -static unsigned long apic_phys; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a separate chip - */ -static inline int lapic_is_integrated(void) -{ -#ifdef CONFIG_X86_64 - return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 safe_xapic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -#ifdef HAVE_X2APIC -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; -#endif - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void __cpuinit enable_NMI_through_LVT0(void) -{ - unsigned int v; - - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - - apic_write(APIC_LVT0, v); -} - -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v; - - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else -#define APIC_DIVISOR 16 -#endif - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -} - -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - * - * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} -EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void __cpuinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -#ifdef CONFIG_X86_64 -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -#else -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -static int __init calibrate_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltapm; - int pm_referenced = 0; - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to 1e9. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(1000000000, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); - - if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - local_irq_enable(); - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - - /* We trust the pm timer based calibration */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - local_irq_enable(); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - levt->features |= CLOCK_EVT_FEAT_DUMMY; - } else - local_irq_enable(); - - if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - printk(KERN_WARNING - "APIC timer disabled due to verification failure.\n"); - return -1; - } - - return 0; -} - -#endif - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - if (calibrate_APIC_clock()) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - /* - * the NMI deadlock-detector uses this. - */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt; - u32 v; - - /* APIC hasn't been mapped yet */ - if (!apic_phys) - return; - - maxlvt = lapic_get_maxlvt(); - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned int value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned int value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); -} - -static void __cpuinit lapic_setup_esr(void) -{ - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); - } -} - - -/** - * setup_local_APIC - setup the local APIC - */ -void __cpuinit setup_local_APIC(void) -{ - unsigned int value; - int i, j; - -#ifdef CONFIG_X86_32 - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } -#endif - - preempt_disable(); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 1) || - (boot_cpu_data.x86 == 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (!force_enable_local_apic) { - printk(KERN_INFO "Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk(KERN_INFO - "Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk(KERN_WARNING "Could not enable APIC!\n"); - return -1; - } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - printk(KERN_INFO "Found and enabled local APIC!\n"); - - apic_pm_activate(); - - return 0; - -no_apic: - printk(KERN_INFO "No local APIC present or hardware disabled\n"); - return -1; -} -#endif - -#ifdef CONFIG_X86_64 -void __init early_init_lapic_mapping(void) -{ - unsigned long phys_addr; - - /* - * If no local APIC can be found then go out - * : it means there is no mpatable and MADT - */ - if (!smp_found_config) - return; - - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} -#endif - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ -#ifdef HAVE_X2APIC - if (x2apic) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } -#endif - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int apic_version[MAX_APICS]; - -int __init APIC_init_uniprocessor(void) -{ -#ifdef CONFIG_X86_64 - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; - } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - return -1; - } -#endif - -#ifdef HAVE_X2APIC - enable_IR_x2apic(); -#endif -#ifdef CONFIG_X86_64 - setup_apic_routing(); -#endif - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_64 - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - -#ifdef CONFIG_X86_IO_APIC - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) -#endif - localise_nmi_watchdog(); - end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -# ifdef CONFIG_X86_64 - else - nr_ioapics = 0; -# endif -#endif - -#ifdef CONFIG_X86_64 - setup_boot_APIC_clock(); - check_nmi_watchdog(); -#else - setup_boot_clock(); -#endif - - return 0; -} - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_spurious_interrupt(void) -#else -void smp_spurious_interrupt(struct pt_regs *regs) -#endif -{ - u32 v; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - -#ifdef CONFIG_X86_64 - add_pda(irq_spurious_count, 1); -#else - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - __get_cpu_var(irq_stat).irq_spurious_count++; -#endif - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_error_interrupt(void) -#else -void smp_error_interrupt(struct pt_regs *regs) -#endif -{ - u32 v, v1; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -#endif - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - return; - } -#endif - - /* Go back to Virtual Wire compatibility mode */ - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); -} - -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - } - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } -#endif - - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - -#ifdef CONFIG_X86_64 -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} -#endif - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - -#ifdef HAVE_X2APIC - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - local_irq_restore(flags); - - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - .name = "lapic", - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -#ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < NR_CPUS; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } - else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } - else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) - return 1; - - /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. - */ - return (clusters > 2); -} -#endif - -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) -{ - disable_apic = 1; - setup_clear_cpu_cap(X86_FEATURE_APIC); - return 0; -} -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} -early_param("nolapic", setup_nolapic); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - -static int __init lapic_insert_resource(void) -{ - if (!apic_phys) - return -1; - - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - - return 0; -} - -/* - * need call insert after e820_reserve_resources() - * that is using request_resource - */ -late_initcall(lapic_insert_resource); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c deleted file mode 100644 index 6e99af5ce67..00000000000 --- a/arch/x86/kernel/apic_64.c +++ /dev/null @@ -1,2311 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - -#ifdef CONFIG_X86_32 -/* - * Knob to control our willingness to enable the local APIC. - * - * +1=force-enable - */ -static int force_enable_local_apic; -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -#endif - -#ifdef CONFIG_X86_64 -static int apic_calibrate_pmtmr __initdata; -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - -#ifdef CONFIG_X86_64 -#define HAVE_X2APIC -#endif - -#ifdef HAVE_X2APIC -int x2apic; -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - -unsigned long mp_lapic_addr; -int disable_apic; -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -/* - * Debug level, exported for io_apic.c - */ -unsigned int apic_verbosity; - -int pic_mode; - -/* Have we found an MP table */ -int smp_found_config; - -static struct resource lapic_resource = { - .name = "Local APIC", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -static unsigned long apic_phys; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a separate chip - */ -static inline int lapic_is_integrated(void) -{ -#ifdef CONFIG_X86_64 - return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 safe_xapic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -#ifdef HAVE_X2APIC -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; -#endif - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void __cpuinit enable_NMI_through_LVT0(void) -{ - unsigned int v; - - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - - apic_write(APIC_LVT0, v); -} - -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v; - - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else -#define APIC_DIVISOR 16 -#endif - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -} - -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - * - * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} -EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void __cpuinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -#ifdef CONFIG_X86_64 -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -#else -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -static int __init calibrate_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltapm; - int pm_referenced = 0; - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to 1e9. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(1000000000, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); - - if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - local_irq_enable(); - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - - /* We trust the pm timer based calibration */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - local_irq_enable(); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - levt->features |= CLOCK_EVT_FEAT_DUMMY; - } else - local_irq_enable(); - - if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - printk(KERN_WARNING - "APIC timer disabled due to verification failure.\n"); - return -1; - } - - return 0; -} - -#endif - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - if (calibrate_APIC_clock()) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - /* - * the NMI deadlock-detector uses this. - */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt; - u32 v; - - /* APIC hasn't been mapped yet */ - if (!apic_phys) - return; - - maxlvt = lapic_get_maxlvt(); - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned int value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned int value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); -} - -static void __cpuinit lapic_setup_esr(void) -{ - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); - } -} - - -/** - * setup_local_APIC - setup the local APIC - */ -void __cpuinit setup_local_APIC(void) -{ - unsigned int value; - int i, j; - -#ifdef CONFIG_X86_32 - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } -#endif - - preempt_disable(); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 1) || - (boot_cpu_data.x86 == 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (!force_enable_local_apic) { - printk(KERN_INFO "Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk(KERN_INFO - "Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk(KERN_WARNING "Could not enable APIC!\n"); - return -1; - } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - printk(KERN_INFO "Found and enabled local APIC!\n"); - - apic_pm_activate(); - - return 0; - -no_apic: - printk(KERN_INFO "No local APIC present or hardware disabled\n"); - return -1; -} -#endif - -#ifdef CONFIG_X86_64 -void __init early_init_lapic_mapping(void) -{ - unsigned long phys_addr; - - /* - * If no local APIC can be found then go out - * : it means there is no mpatable and MADT - */ - if (!smp_found_config) - return; - - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} -#endif - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ -#ifdef HAVE_X2APIC - if (x2apic) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } -#endif - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int apic_version[MAX_APICS]; - -int __init APIC_init_uniprocessor(void) -{ -#ifdef CONFIG_X86_64 - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; - } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - return -1; - } -#endif - -#ifdef HAVE_X2APIC - enable_IR_x2apic(); -#endif -#ifdef CONFIG_X86_64 - setup_apic_routing(); -#endif - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_64 - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - -#ifdef CONFIG_X86_IO_APIC - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) -#endif - localise_nmi_watchdog(); - end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -# ifdef CONFIG_X86_64 - else - nr_ioapics = 0; -# endif -#endif - -#ifdef CONFIG_X86_64 - setup_boot_APIC_clock(); - check_nmi_watchdog(); -#else - setup_boot_clock(); -#endif - - return 0; -} - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_spurious_interrupt(void) -#else -void smp_spurious_interrupt(struct pt_regs *regs) -#endif -{ - u32 v; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - -#ifdef CONFIG_X86_64 - add_pda(irq_spurious_count, 1); -#else - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - __get_cpu_var(irq_stat).irq_spurious_count++; -#endif - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_error_interrupt(void) -#else -void smp_error_interrupt(struct pt_regs *regs) -#endif -{ - u32 v, v1; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -#endif - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - return; - } -#endif - - /* Go back to Virtual Wire compatibility mode */ - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); -} - -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - } - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } -#endif - - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - -#ifdef CONFIG_X86_64 -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} -#endif - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - -#ifdef HAVE_X2APIC - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - local_irq_restore(flags); - - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - .name = "lapic", - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -#ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < NR_CPUS; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } - else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } - else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) - return 1; - - /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. - */ - return (clusters > 2); -} -#endif - -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) -{ - disable_apic = 1; - setup_clear_cpu_cap(X86_FEATURE_APIC); - return 0; -} -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} -early_param("nolapic", setup_nolapic); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - -static int __init lapic_insert_resource(void) -{ - if (!apic_phys) - return -1; - - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - - return 0; -} - -/* - * need call insert after e820_reserve_resources() - * that is using request_resource - */ -late_initcall(lapic_insert_resource); -- cgit v1.2.3 From e492c5ae85428d4a3815d06bf308c590120b928b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 22:41:26 -0700 Subject: x86: let 64 bit to use 32 bit calibrate_apic_clock Use the 32-bit APIC calibration code - it's more mature. Signed-of-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 88 ++------------------------------------------------ 1 file changed, 2 insertions(+), 86 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 6e99af5ce67..ca5ef71f420 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -478,90 +478,6 @@ static void __cpuinit setup_APIC_timer(void) clockevents_register_device(levt); } -#ifdef CONFIG_X86_64 -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -#else /* * In this functions we calibrate APIC bus clocks to the external timer. * @@ -659,6 +575,7 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); +#ifdef CONFIG_X86_PM_TIMER /* Check, if the PM timer is available */ deltapm = lapic_cal_pm2 - lapic_cal_pm1; apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); @@ -687,6 +604,7 @@ static int __init calibrate_APIC_clock(void) } pm_referenced = 1; } +#endif /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -773,8 +691,6 @@ static int __init calibrate_APIC_clock(void) return 0; } -#endif - /* * Setup the boot APIC * -- cgit v1.2.3 From 1dd6ba2e179773597e20f17f66049a64e6c4b2ec Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 25 Aug 2008 21:27:26 +0400 Subject: x86: apic - unify smp_spurious/error_interrupt declaration According to entry_64.S we do pass pt_regs pointer into interrupt handlers but don't use them. So we safely may merge the declarations. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index ca5ef71f420..0556f375e40 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1659,11 +1659,7 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_spurious_interrupt(void) -#else void smp_spurious_interrupt(struct pt_regs *regs) -#endif { u32 v; @@ -1694,11 +1690,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* * This interrupt should never happen with our APIC/SMP architecture */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_error_interrupt(void) -#else void smp_error_interrupt(struct pt_regs *regs) -#endif { u32 v, v1; -- cgit v1.2.3 From c59d85a7b7822b83fc9783314543eea0ca860480 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 28 Aug 2008 08:56:33 +0200 Subject: sparseirq: export nr_irqs on m68k/sparc/s390 Stephen Rothwell reported such build failures on m68k/sparc/s390: > ERROR: "nr_irqs" [drivers/net/hamradio/baycom_ser_fdx.ko] undefined! > ERROR: "nr_irqs" [drivers/net/3c59x.ko] undefined! export nr_irqs on these architectures too. Reported-by: Stephen Rothwell Signed-off-by: Ingo Molnar --- arch/m68k/kernel/ints.c | 1 + arch/s390/kernel/irq.c | 1 + arch/sparc/kernel/irq.c | 1 + 3 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index 74453d15692..44169e4cd91 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -47,6 +47,7 @@ #endif int nr_irqs = NR_IRQS; +EXPORT_SYMBOL(nr_irqs); extern u32 auto_irqhandler_fixup[]; extern u32 user_irqhandler_fixup[]; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 14eb5496c8a..3624c4a0037 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -18,6 +18,7 @@ #include int nr_irqs = NR_IRQS; +EXPORT_SYMBOL(nr_irqs); /* * show_interrupts is needed by /proc/interrupts. diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 059598b7e0f..4b99e3ce391 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -57,6 +57,7 @@ #endif /* SMP */ int nr_irqs = NR_IRQS; +EXPORT_SYMBOL(nr_irqs); unsigned long __raw_local_irq_save(void) { -- cgit v1.2.3 From a11b5abef50722e42a7d13f6b799c4f606fcb797 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 3 Sep 2008 16:58:31 -0700 Subject: x2apic: fix reserved APIC register accesses in print_local_APIC() APIC_ARBPRI is a reserved register for XAPIC and beyond. APIC_RRR is a reserved register except for 82489DX, APIC for Pentium processors. APIC_EOI is a write only register. APIC_DFR is reserved in x2apic mode. Access to these registers in x2apic will result in #GP fault. Fix these apic register accesses. Signed-off-by: Yinghai Lu Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index d28128e0392..93ceb19d1e9 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1751,21 +1751,30 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); if (APIC_INTEGRATED(ver)) { /* !82489DX */ - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); + if (!APIC_XAPIC(ver)) { + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + } v = apic_read(APIC_PROCPRI); printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); } - v = apic_read(APIC_EOI); - printk(KERN_DEBUG "... APIC EOI: %08x\n", v); - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); + /* + * Remote read supported only in the 82489DX and local APIC for + * Pentium processors. + */ + if (!APIC_INTEGRATED(ver) || maxlvt == 3) { + v = apic_read(APIC_RRR); + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); + } + v = apic_read(APIC_LDR); printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + if (!x2apic_enabled()) { + v = apic_read(APIC_DFR); + printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + } v = apic_read(APIC_SPIV); printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); -- cgit v1.2.3 From 02c1df199c990cd21c09a4dffaa06d4e0b7bf2bf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:57:11 +0200 Subject: x86: print out if acpi want physical flat of all Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 9eca5ba7a6b..2ec2de8d8c4 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) * is an example). */ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { + printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; + } #endif return 0; -- cgit v1.2.3 From 676f4a920be27160747439fe71026aa15ec78e5a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 4 Sep 2008 22:37:49 +0400 Subject: x86: io-apic - use ARRAY_SIZE macro Make the code width a bit shorter with ARRAY_SIZE macro. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 93ceb19d1e9..9b01fdadcb9 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -166,7 +166,7 @@ static void __init init_work(void *data) memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + legacy_count = ARRAY_SIZE(irq_cfg_legacy); for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); -- cgit v1.2.3 From ac54a6c9371bacb86bee1db23f7d82e8685c7e17 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 4 Sep 2008 22:37:50 +0400 Subject: x86: io-apic - declare irq_cfg_lock for SPARSE_IRQ only We use irq_cfg_lock lock in SPARSE_IRQ only context so move it under #ifdef and compiler will be happy. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b01fdadcb9..d22fecf828b 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -147,14 +147,15 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) static struct irq_cfg *irq_cfgx; +#ifdef CONFIG_HAVE_SPARSE_IRQ /* * Protect the irq_cfgx_free freelist: */ static DEFINE_SPINLOCK(irq_cfg_lock); -#ifdef CONFIG_HAVE_SPARSE_IRQ static struct irq_cfg *irq_cfgx_free; #endif + static void __init init_work(void *data) { struct dyn_array *da = data; -- cgit v1.2.3 From b40d575bf0679c45aaf9e1161fc51a6b041b7210 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 5 Sep 2008 18:02:16 -0700 Subject: x86: HPET_MSI Refactor code in preparation for HPET_MSI Preparatory patch before the actual HPET MSI changes. Sets up hpet_set_mode and hpet_next_event for the MSI related changes. Just the code refactoring and should be zero functional change. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index acf62fc233d..f7cb5e9e261 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -227,44 +227,44 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +static void hpet_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt, int timer) { unsigned long cfg, cmp, now; uint64_t delta; switch(mode) { case CLOCK_EVT_MODE_PERIODIC: - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; - delta >>= hpet_clockevent.shift; + delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; + delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); /* * The first write after writing TN_SETVAL to the * config register sets the counter value, the second * write sets the period. */ - hpet_writel(cmp, HPET_T0_CMP); + hpet_writel(cmp, HPET_Tn_CMP(timer)); udelay(1); - hpet_writel((unsigned long) delta, HPET_T0_CMP); + hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); break; case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); break; case CLOCK_EVT_MODE_RESUME: @@ -273,14 +273,14 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, } } -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) +static int hpet_next_event(unsigned long delta, + struct clock_event_device *evt, int timer) { u32 cnt; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; - hpet_writel(cnt, HPET_T0_CMP); + hpet_writel(cnt, HPET_Tn_CMP(timer)); /* * We need to read back the CMP register to make sure that @@ -292,6 +292,18 @@ static int hpet_legacy_next_event(unsigned long delta, return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } +static void hpet_legacy_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + hpet_set_mode(mode, evt, 0); +} + +static int hpet_legacy_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return hpet_next_event(delta, evt, 0); +} + /* * Clock source related code */ -- cgit v1.2.3 From 58ac1e76ce77d515bd5cb65dbc465a040da341c6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 5 Sep 2008 18:02:17 -0700 Subject: x86: HPET_MSI Basic HPET_MSI setup code Basic HPET MSI setup code. Routines to perform basic MSI read write in HPET memory map and setting up irq_chip for HPET MSI. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 54 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f7cb5e9e261..3f10d16a834 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -25,6 +27,15 @@ unsigned long hpet_address; static void __iomem *hpet_virt_address; +struct hpet_dev { + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int flags; + char name[10]; +}; + unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); @@ -304,6 +315,49 @@ static int hpet_legacy_next_event(unsigned long delta, return hpet_next_event(delta, evt, 0); } +/* + * HPET MSI Support + */ + +void hpet_msi_unmask(unsigned int irq) +{ + struct hpet_dev *hdev = get_irq_data(irq); + unsigned long cfg; + + /* unmask it */ + cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg |= HPET_TN_FSB; + hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); +} + +void hpet_msi_mask(unsigned int irq) +{ + unsigned long cfg; + struct hpet_dev *hdev = get_irq_data(irq); + + /* mask it */ + cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg &= ~HPET_TN_FSB; + hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); +} + +void hpet_msi_write(unsigned int irq, struct msi_msg *msg) +{ + struct hpet_dev *hdev = get_irq_data(irq); + + hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); + hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); +} + +void hpet_msi_read(unsigned int irq, struct msi_msg *msg) +{ + struct hpet_dev *hdev = get_irq_data(irq); + + msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); + msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); + msg->address_hi = 0; +} + /* * Clock source related code */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index d22fecf828b..77fa155becf 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -41,6 +41,7 @@ #endif #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include #include #include @@ -3522,6 +3524,68 @@ int arch_setup_dmar_msi(unsigned int irq) } #endif +#ifdef CONFIG_HPET_TIMER + +#ifdef CONFIG_SMP +static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + hpet_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + hpet_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip hpet_msi_type = { + .name = "HPET_MSI", + .unmask = hpet_msi_unmask, + .mask = hpet_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = hpet_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_hpet_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + + hpet_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + #endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support -- cgit v1.2.3 From 4588c1f0354ac96a358b3f9e8e4331c51cf3336f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Sep 2008 14:19:17 +0200 Subject: x86: HPET_MSI Basic HPET_MSI setup code, cleanups small style cleanups. Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3f10d16a834..03d3655734b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1,39 +1,39 @@ #include #include +#include +#include #include #include #include #include -#include -#include -#include #include +#include +#include #include -#include #include -#include +#include -#define HPET_MASK CLOCKSOURCE_MASK(32) -#define HPET_SHIFT 22 +#define HPET_MASK CLOCKSOURCE_MASK(32) +#define HPET_SHIFT 22 /* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000L +#define FSEC_PER_NSEC 1000000L /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ -unsigned long hpet_address; -static void __iomem *hpet_virt_address; +unsigned long hpet_address; +static void __iomem *hpet_virt_address; struct hpet_dev { - struct clock_event_device evt; - unsigned int num; - int cpu; - unsigned int irq; - unsigned int flags; - char name[10]; + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int flags; + char name[10]; }; unsigned long hpet_readl(unsigned long a) @@ -70,7 +70,7 @@ static inline void hpet_clear_mapping(void) static int boot_hpet_disable; int hpet_force_user; -static int __init hpet_setup(char* str) +static int __init hpet_setup(char *str) { if (str) { if (!strncmp("disable", str, 7)) @@ -91,7 +91,7 @@ __setup("nohpet", disable_hpet); static inline int is_hpet_capable(void) { - return (!boot_hpet_disable && hpet_address); + return !boot_hpet_disable && hpet_address; } /* @@ -122,10 +122,10 @@ static void hpet_reserve_platform_timers(unsigned long id) nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - memset(&hd, 0, sizeof (hd)); - hd.hd_phys_address = hpet_address; - hd.hd_address = hpet; - hd.hd_nirqs = nrtimers; + memset(&hd, 0, sizeof(hd)); + hd.hd_phys_address = hpet_address; + hd.hd_address = hpet; + hd.hd_nirqs = nrtimers; hpet_reserve_timer(&hd, 0); #ifdef CONFIG_HPET_EMULATE_RTC @@ -141,8 +141,8 @@ static void hpet_reserve_platform_timers(unsigned long id) hd.hd_irq[1] = HPET_LEGACY_RTC; for (i = 2; i < nrtimers; timer++, i++) { - hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; + hd.hd_irq[i] = (readl(&timer->hpet_config) & + Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } hpet_alloc(&hd); @@ -244,7 +244,7 @@ static void hpet_set_mode(enum clock_event_mode mode, unsigned long cfg, cmp, now; uint64_t delta; - switch(mode) { + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; -- cgit v1.2.3 From 26afe5f2fbf06ea0765aaa316640c4dd472310c0 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 5 Sep 2008 18:02:18 -0700 Subject: x86: HPET_MSI Initialise per-cpu HPET timers Initialize a per CPU HPET MSI timer when possible. We retain the HPET timer 0 (IRQ 0) and timer 1 (IRQ 8) as is when legacy mode is being used. We setup the remaining HPET timers as per CPU MSI based timers. This per CPU timer will eliminate the need for timer broadcasting with IRQ 0 when there is non-functional LAPIC timer across CPU deep C-states. If there are more CPUs than number of available timers, CPUs that do not find any timer to use will continue using LAPIC and IRQ 0 broadcast. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 295 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 293 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 03d3655734b..31e9191b7e1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -21,10 +21,19 @@ NSEC = 10^-9 */ #define FSEC_PER_NSEC 1000000L +#define HPET_DEV_USED_BIT 2 +#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) +#define HPET_DEV_VALID 0x8 +#define HPET_DEV_FSB_CAP 0x1000 +#define HPET_DEV_PERI_CAP 0x2000 + +#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) + /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; +unsigned long hpet_num_timers; static void __iomem *hpet_virt_address; struct hpet_dev { @@ -36,6 +45,10 @@ struct hpet_dev { char name[10]; }; +static struct hpet_dev *hpet_devs; + +static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); + unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); @@ -145,6 +158,16 @@ static void hpet_reserve_platform_timers(unsigned long id) Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } + for (i = 0; i < nrtimers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd.hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(&hd, hdev->num); + } + hpet_alloc(&hd); } @@ -238,6 +261,8 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } +static int hpet_setup_msi_irq(unsigned int irq); + static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { @@ -279,7 +304,15 @@ static void hpet_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_RESUME: - hpet_enable_legacy_int(); + if (timer == 0) { + hpet_enable_legacy_int(); + } else { + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + hpet_setup_msi_irq(hdev->irq); + disable_irq(hdev->irq); + irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); + enable_irq(hdev->irq); + } break; } } @@ -318,7 +351,7 @@ static int hpet_legacy_next_event(unsigned long delta, /* * HPET MSI Support */ - +#ifdef CONFIG_PCI_MSI void hpet_msi_unmask(unsigned int irq) { struct hpet_dev *hdev = get_irq_data(irq); @@ -358,6 +391,253 @@ void hpet_msi_read(unsigned int irq, struct msi_msg *msg) msg->address_hi = 0; } +static void hpet_msi_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + hpet_set_mode(mode, evt, hdev->num); +} + +static int hpet_msi_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + return hpet_next_event(delta, evt, hdev->num); +} + +static int hpet_setup_msi_irq(unsigned int irq) +{ + if (arch_setup_hpet_msi(irq)) { + destroy_irq(irq); + return -EINVAL; + } + return 0; +} + +static int hpet_assign_irq(struct hpet_dev *dev) +{ + unsigned int irq; + + irq = create_irq(); + if (!irq) + return -EINVAL; + + set_irq_data(irq, dev); + + if (hpet_setup_msi_irq(irq)) + return -EINVAL; + + dev->irq = irq; + return 0; +} + +static irqreturn_t hpet_interrupt_handler(int irq, void *data) +{ + struct hpet_dev *dev = (struct hpet_dev *)data; + struct clock_event_device *hevt = &dev->evt; + + if (!hevt->event_handler) { + printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", + dev->num); + return IRQ_HANDLED; + } + + hevt->event_handler(hevt); + return IRQ_HANDLED; +} + +static int hpet_setup_irq(struct hpet_dev *dev) +{ + + if (request_irq(dev->irq, hpet_interrupt_handler, + IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + return -1; + + disable_irq(dev->irq); + irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); + enable_irq(dev->irq); + + return 0; +} + +/* This should be called in specific @cpu */ +static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) +{ + struct clock_event_device *evt = &hdev->evt; + uint64_t hpet_freq; + + WARN_ON(cpu != smp_processor_id()); + if (!(hdev->flags & HPET_DEV_VALID)) + return; + + if (hpet_setup_msi_irq(hdev->irq)) + return; + + hdev->cpu = cpu; + per_cpu(cpu_hpet_dev, cpu) = hdev; + evt->name = hdev->name; + hpet_setup_irq(hdev); + evt->irq = hdev->irq; + + evt->rating = 110; + evt->features = CLOCK_EVT_FEAT_ONESHOT; + if (hdev->flags & HPET_DEV_PERI_CAP) + evt->features |= CLOCK_EVT_FEAT_PERIODIC; + + evt->set_mode = hpet_msi_set_mode; + evt->set_next_event = hpet_msi_next_event; + evt->shift = 32; + + /* + * The period is a femto seconds value. We need to calculate the + * scaled math multiplication factor for nanosecond to hpet tick + * conversion. + */ + hpet_freq = 1000000000000000ULL; + do_div(hpet_freq, hpet_period); + evt->mult = div_sc((unsigned long) hpet_freq, + NSEC_PER_SEC, evt->shift); + /* Calculate the max delta */ + evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); + /* 5 usec minimum reprogramming delta. */ + evt->min_delta_ns = 5000; + + evt->cpumask = cpumask_of_cpu(hdev->cpu); + clockevents_register_device(evt); +} + +#ifdef CONFIG_HPET +/* Reserve at least one timer for userspace (/dev/hpet) */ +#define RESERVE_TIMERS 1 +#else +#define RESERVE_TIMERS 0 +#endif +void hpet_msi_capability_lookup(unsigned int start_timer) +{ + unsigned int id; + unsigned int num_timers; + unsigned int num_timers_used = 0; + int i; + + id = hpet_readl(HPET_ID); + + num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); + num_timers++; /* Value read out starts from 0 */ + + hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); + if (!hpet_devs) + return; + + hpet_num_timers = num_timers; + + for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { + struct hpet_dev *hdev = &hpet_devs[num_timers_used]; + unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); + + /* Only consider HPET timer with MSI support */ + if (!(cfg & HPET_TN_FSB_CAP)) + continue; + + hdev->flags = 0; + if (cfg & HPET_TN_PERIODIC_CAP) + hdev->flags |= HPET_DEV_PERI_CAP; + hdev->num = i; + + sprintf(hdev->name, "hpet%d", i); + if (hpet_assign_irq(hdev)) + continue; + + hdev->flags |= HPET_DEV_FSB_CAP; + hdev->flags |= HPET_DEV_VALID; + num_timers_used++; + if (num_timers_used == num_possible_cpus()) + break; + } + + printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", + num_timers, num_timers_used); +} + +static struct hpet_dev *hpet_get_unused_timer(void) +{ + int i; + + if (!hpet_devs) + return NULL; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + if (test_and_set_bit(HPET_DEV_USED_BIT, + (unsigned long *)&hdev->flags)) + continue; + return hdev; + } + return NULL; +} + +struct hpet_work_struct { + struct delayed_work work; + struct completion complete; +}; + +static void hpet_work(struct work_struct *w) +{ + struct hpet_dev *hdev; + int cpu = smp_processor_id(); + struct hpet_work_struct *hpet_work; + + hpet_work = container_of(w, struct hpet_work_struct, work.work); + + hdev = hpet_get_unused_timer(); + if (hdev) + init_one_hpet_msi_clockevent(hdev, cpu); + + complete(&hpet_work->complete); +} + +static int hpet_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct hpet_work_struct work; + struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); + + switch (action & 0xf) { + case CPU_ONLINE: + INIT_DELAYED_WORK(&work.work, hpet_work); + init_completion(&work.complete); + /* FIXME: add schedule_work_on() */ + schedule_delayed_work_on(cpu, &work.work, 0); + wait_for_completion(&work.complete); + break; + case CPU_DEAD: + if (hdev) { + free_irq(hdev->irq, hdev); + hdev->flags &= ~HPET_DEV_USED; + per_cpu(cpu_hpet_dev, cpu) = NULL; + } + break; + } + return NOTIFY_OK; +} +#else + +void hpet_msi_capability_lookup(unsigned int start_timer) +{ + return; +} + +static int hpet_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + return NOTIFY_OK; +} + +#endif + /* * Clock source related code */ @@ -493,8 +773,10 @@ int __init hpet_enable(void) if (id & HPET_ID_LEGSUP) { hpet_legacy_clockevent_register(); + hpet_msi_capability_lookup(2); return 1; } + hpet_msi_capability_lookup(0); return 0; out_nohpet: @@ -511,6 +793,8 @@ out_nohpet: */ static __init int hpet_late_init(void) { + int cpu; + if (boot_hpet_disable) return -ENODEV; @@ -526,6 +810,13 @@ static __init int hpet_late_init(void) hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + for_each_online_cpu(cpu) { + hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); + } + + /* This notifier should be called after workqueue is ready */ + hotcpu_notifier(hpet_cpuhp_notify, -20); + return 0; } fs_initcall(hpet_late_init); -- cgit v1.2.3 From 3c2cbd2490656fb4b6ede586c557a2b09811a432 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 6 Sep 2008 14:15:33 +0400 Subject: x86: io-apic - code style cleaning for setup_IO_APIC_irqs By changing printout form we are able to shrink (and clean up) code a bit. Former printout example: init IO_APIC IRQs IO-APIC (apicid-pin) 1-1, 1-2, 1-3 not connected. IO-APIC (apicid-pin) 2-1, 2-2, 2-3 not connected. New printout example: init IO_APIC IRQs 1-1 1-2 1-3 (apicid-pin) not connected 2-1 2-2 2-3 (apicid-pin) not connected Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 53 +++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 77fa155becf..4040d575a21 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1518,41 +1518,44 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, static void __init setup_IO_APIC_irqs(void) { - int apic, pin, idx, irq, first_notcon = 1; + int apic, pin, idx, irq; + int notcon = 0; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + idx = find_irq_entry(apic, pin, mp_INT); + if (idx == -1) { + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic].mp_apicid, pin); + if (!notcon) + notcon = 1; + continue; + } - irq = pin_2_irq(idx, apic, pin); + irq = pin_2_irq(idx, apic, pin); #ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) - continue; + if (multi_timer_check(apic, irq)) + continue; #endif - add_pin_to_irq(irq, apic, pin); + add_pin_to_irq(irq, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); + } + if (notcon) { + apic_printk(APIC_VERBOSE, + KERN_DEBUG " (apicid-pin) not connected\n"); + notcon = 0; + } } - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); + if (notcon) + apic_printk(APIC_VERBOSE, + KERN_DEBUG " (apicid-pin) not connected\n"); } /* -- cgit v1.2.3 From 79c09698cac8df16c5539447d5e1047fde9742e5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:57 -0700 Subject: x86: lapic address print out like io apic addr Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0556f375e40..f3f50858048 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1548,7 +1548,7 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); /* -- cgit v1.2.3 From 2a554fb132cf804477087057b9b0ff2162984507 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 8 Sep 2008 19:38:06 +0400 Subject: x86: io-apic - do not use KERN_DEBUG marker too much Do not use KERN_DEBUG several times on the same line being printed. Introduced by mine previous patch, sorry. Reported-by: Yinghai Lu Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4040d575a21..12e59df026d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1528,11 +1528,16 @@ static void __init setup_IO_APIC_irqs(void) idx = find_irq_entry(apic, pin, mp_INT); if (idx == -1) { - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, pin); - if (!notcon) + if (!notcon) { notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic].mp_apicid, + pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", + mp_ioapics[apic].mp_apicid, + pin); continue; } @@ -1548,14 +1553,14 @@ static void __init setup_IO_APIC_irqs(void) } if (notcon) { apic_printk(APIC_VERBOSE, - KERN_DEBUG " (apicid-pin) not connected\n"); + " (apicid-pin) not connected\n"); notcon = 0; } } if (notcon) apic_printk(APIC_VERBOSE, - KERN_DEBUG " (apicid-pin) not connected\n"); + " (apicid-pin) not connected\n"); } /* -- cgit v1.2.3 From f0ed4e695faf6766927c8cfbda2bc7530c7210c2 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Mon, 8 Sep 2008 10:18:40 -0700 Subject: x86: using HPET in MSI mode and setting up per CPU HPET timers, fix On Sat, Sep 06, 2008 at 06:03:53AM -0700, Ingo Molnar wrote: > > it crashes two testsystems, the fault on a NULL pointer in hpet init, > with: > > initcall print_all_ICs+0x0/0x520 returned 0 after 26 msecs > calling hpet_late_init+0x0/0x1c0 > BUG: unable to handle kernel NULL pointer dereference at 000000000000008c > IP: [] hpet_late_init+0xfe/0x1c0 > PGD 0 > Oops: 0000 [1] SMP > CPU 0 > Modules linked in: > Pid: 1, comm: swapper Not tainted 2.6.27-rc5 #29725 > RIP: 0010:[] [] hpet_late_init+0xfe/0x1c0 > RSP: 0018:ffff88003fa07dd0 EFLAGS: 00010246 > RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 > RDX: ffffc20000000160 RSI: 0000000000000000 RDI: 0000000000000003 > RBP: ffff88003fa07e90 R08: 0000000000000000 R09: ffff88003fa07dd0 > R10: 0000000000000001 R11: 0000000000000000 R12: ffff88003fa07dd0 > R13: 0000000000000002 R14: ffffc20000000000 R15: 000000006f57e511 > FS: 0000000000000000(0000) GS:ffffffff80cf6a80(0000) knlGS:0000000000000000 > CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b > CR2: 000000000000008c CR3: 0000000000201000 CR4: 00000000000006e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process swapper (pid: 1, threadinfo ffff88003fa06000, task ffff88003fa08000) > Stack: 00000000fed00000 ffffc20000000000 0000000100000003 0000000800000002 > 0000000000000000 0000000000000000 0000000000000000 0000000000000000 > 0000000000000000 0000000000000000 0000000000000000 0000000000000000 > Call Trace: > [] ? hpet_late_init+0x0/0x1c0 > [] do_one_initcall+0x45/0x190 > [] ? register_irq_proc+0x19/0xe0 > [] ? early_idt_handler+0x0/0x73 > [] kernel_init+0x14c/0x1b0 > [] ? trace_hardirqs_on_thunk+0x3a/0x3f > [] child_rip+0xa/0x11 > [] ? restore_args+0x0/0x30 > [] ? kernel_init+0x0/0x1b0 > [] ? child_rip+0x0/0x11 > Code: 20 48 83 c1 01 48 39 f1 75 e3 44 89 e8 4c 8b 05 29 29 22 00 31 f6 48 8d 78 01 66 66 90 89 f0 48 8d 04 80 48 c1 e0 05 4a 8d 0c 00 81 8c 00 00 00 08 74 26 8b 81 80 00 00 00 8b 91 88 00 00 00 > RIP [] hpet_late_init+0xfe/0x1c0 > RSP > CR2: 000000000000008c > Kernel panic - not syncing: Fatal exception There was one code path, with CONFIG_PCI_MSI disabled, where we were accessing hpet_devs without initialization. That resulted in the above crash. The change below adds a check for hpet_devs. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 31e9191b7e1..01005aeda7d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -126,6 +126,24 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) +{ + int i; + + if (!hpet_devs) + return; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd->hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(hd, hdev->num); + } +} + static void hpet_reserve_platform_timers(unsigned long id) { struct hpet __iomem *hpet = hpet_virt_address; @@ -158,15 +176,7 @@ static void hpet_reserve_platform_timers(unsigned long id) Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } - for (i = 0; i < nrtimers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; - - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - - hd.hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(&hd, hdev->num); - } + hpet_reserve_msi_timers(&hd); hpet_alloc(&hd); -- cgit v1.2.3 From ba374c9baef910fbc5373541d98c50f15e82c3f8 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Mon, 8 Sep 2008 16:19:09 -0700 Subject: x86: fix HPET compiler error when not using CONFIG_PCI_MSI Added dummy function for hpet_setup_msi_irq(). Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 01005aeda7d..422c577ef69 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -635,6 +635,10 @@ static int hpet_cpuhp_notify(struct notifier_block *n, } #else +static int hpet_setup_msi_irq(unsigned int irq) +{ + return 0; +} void hpet_msi_capability_lookup(unsigned int start_timer) { return; -- cgit v1.2.3 From 87783be4c26d79f5cde245e6e8a9fd2b295e92d7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 10 Sep 2008 22:19:50 +0400 Subject: x86: io-apic - get rid of __DO_ACTION macro Replace __DO_ACTION macro with io_apic_modify_irq function. This allow us to 'grep' definitions being hided by __DO_ACTION macro: __unmask_IO_APIC_irq __mask_IO_APIC_irq __mask_and_edge_IO_APIC_irq __unmask_and_level_IO_APIC_irq Signed-off-by: Cyrill Gorcunov Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 103 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 51 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 12e59df026d..ac47384724e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -643,65 +643,66 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } -#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION_DISABLE; \ - reg ACTION_ENABLE; \ - io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - -#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) - -/* mask = 0 */ -DO_ACTION(__unmask, 0, |= 0, &= ~IO_APIC_REDIR_MASKED, ) +static inline void io_apic_modify_irq(unsigned int irq, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + int pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + unsigned int reg; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); + } +} + +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); +} #ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) +void io_apic_sync(struct irq_pin_list *entry) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + /* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ + struct io_apic __iomem *io_apic; + io_apic = io_apic_base(entry->apic); readl(&io_apic->data); } -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic)) - -#else - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, ) - -/* mask = 1, trigger = 0 */ -DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, ) +static void __mask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); +} +#else /* CONFIG_X86_32 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); +} -/* mask = 0, trigger = 1 */ -DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, ) +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} -#endif +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); +} +#endif /* CONFIG_X86_32 */ static void mask_IO_APIC_irq (unsigned int irq) { -- cgit v1.2.3 From 823b259b80158a5fb694f6784e18b5bae669c599 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Sep 2008 21:56:46 -0700 Subject: x86: print out apic id in hex format Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/smpboot.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index f3f50858048..11cb1863958 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1586,7 +1586,7 @@ int __init APIC_init_uniprocessor(void) */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 32e73520adf..8f1e31db2ad 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 99468dbd08d..cce0b6118d5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void) node = first_node(node_online_map); numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c3aca7cb34..f84de2ff933 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid) int timeout; u32 status; - printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); + printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); + printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); /* * Wait for idle. @@ -874,7 +874,7 @@ do_rest: start_ip = setup_trampoline(); /* So we see what's up */ - printk(KERN_INFO "Booting processor %d/%d ip %lx\n", + printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", cpu, apicid, start_ip); /* -- cgit v1.2.3 From 9df08f109572e88fbdab9a61d5cb0f0eae4ac9fa Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 14 Sep 2008 11:55:37 +0400 Subject: x86: apic - lapic_setup_esr does not handle esr_disable - fix it lapic_setup_esr doesn't handle esr_disable inquire. The error brought in during unification process. Fix it. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 63 ++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 11cb1863958..59550cc017d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1081,40 +1081,43 @@ void __init init_bsp_APIC(void) static void __cpuinit lapic_setup_esr(void) { - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); + unsigned int oldvalue, value, maxlvt; + + if (!lapic_is_integrated()) { + printk(KERN_INFO "No ESR for 82489DX.\n"); + return; + } - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); + if (esr_disable) { /* - * spec says clear errors after enabling vector. + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; } + + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08x after: 0x%08x\n", + oldvalue, value); } -- cgit v1.2.3 From 08ad776e3c54e6fe8b50939f176538063b69f89e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 14 Sep 2008 11:55:38 +0400 Subject: x86: apic - skip writting ESR register if we dont have on On 82489DX we don't have ESR register so we should not write it. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 59550cc017d..d730e8d3172 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1131,7 +1131,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { + if (lapic_is_integrated() && esr_disable) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); -- cgit v1.2.3 From b189892de4da4634560657aedd774752094dbfa0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 12 Sep 2008 23:58:24 +0400 Subject: x86: apic - fix unused vars warning in calibrate_APIC_clock If we don't have CONFIG_X86_PM_TIMER=y compiler warns about unused variables. Move PM timer based calibration into a separate function and make the code cleaner and the compiler happy as well. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 81 +++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d730e8d3172..784116933c7 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -538,14 +538,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } +static int __init calibrate_by_pmtimer(long deltapm, long *delta) +{ + const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; + const long pm_thresh = pm_100ms / 100; + unsigned long mult; + u64 res; + +#ifndef CONFIG_X86_PM_TIMER + return -1; +#endif + + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + /* Check, if the PM timer is available */ + if (!deltapm) + return -1; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64)deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64)(*delta)) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long)res, *delta); + *delta = (long)res; + } + + return 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; - long delta, deltapm; + long delta; int pm_referenced = 0; local_irq_disable(); @@ -575,36 +612,9 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); -#ifdef CONFIG_X86_PM_TIMER - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } -#endif + /* we trust the PM based calibration if possible */ + pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, + &delta); /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -646,7 +656,10 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - /* We trust the pm timer based calibration */ + /* + * PM timer calibration failed or not turned on + * so lets try APIC timer based calibration + */ if (!pm_referenced) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); -- cgit v1.2.3 From 56ffa1a028b9fce3860a247c6fe79fce7cbf425b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 13 Sep 2008 13:11:16 +0400 Subject: x86: io-apic - do not use KERN_DEBUG marker too much, fix Yinghai Lu reported: | > 0 add_pin_to_irq: irq 15 --> apic 0 pin 15 | > IOAPIC[0]: Set routing entry (8-15 -> 0x3f -> IRQ 15 Mode:0 Active:0) | > 8-16 8-17 8-18 8-19 8-20 8-21 8-22 8-23 (apicid-pin) not connected | > 9-0 9-1 9-2 9-3 9-4 9-5 9-6 9-7 9-8 9-9 9-10 9-11 9-12 9-13 9-14 9-15 | > 9-16 9-17 9-18 9-19 9-20 9-21 9-22 9-23 (apicid-pin) not connected | > | | only first one not connected at first, and ... here is a quick fix for this. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ac47384724e..6ce5873a406 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1541,6 +1541,11 @@ static void __init setup_IO_APIC_irqs(void) pin); continue; } + if (notcon) { + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); + notcon = 0; + } irq = pin_2_irq(idx, apic, pin); #ifdef CONFIG_X86_32 @@ -1552,11 +1557,6 @@ static void __init setup_IO_APIC_irqs(void) setup_IO_APIC_irq(apic, pin, irq, irq_trigger(idx), irq_polarity(idx)); } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } } if (notcon) -- cgit v1.2.3 From 9d98598d2fc286c8dbcd0b681168639528428db0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Sep 2008 00:12:26 -0700 Subject: sparseirq: remove some debug print out Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6ce5873a406..01419fdc1d5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -277,23 +277,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) spin_unlock_irqrestore(&irq_cfg_lock, flags); - printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); -#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG - { - /* dump the results */ - struct irq_cfg *cfg; - unsigned long phys; - unsigned long bytes = sizeof(struct irq_cfg); - - printk(KERN_DEBUG "=========================== %d\n", irq); - printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); - for_each_irq_cfg(cfg) { - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); - } - printk(KERN_DEBUG "===========================\n"); - } -#endif return cfg; } #else @@ -597,7 +580,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; - printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); return; } @@ -613,7 +595,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry = entry->next; entry->apic = apic; entry->pin = pin; - printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); } /* -- cgit v1.2.3 From 5ffa4eb2224343ec3fbd492ffe71e28e797435b7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 18 Sep 2008 23:37:57 +0400 Subject: x86: io-apic - interrupt remapping fix Interrupt remapping could lead to NULL dereference in case of kzalloc failed and memory leak in other way. So fix the both cases. Signed-off-by: Cyrill Gorcunov Cc: "Maciej W. Rozycki" Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 13 ++++++++++--- arch/x86/kernel/io_apic.c | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 784116933c7..1f48bd1c9f2 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1360,7 +1360,12 @@ void enable_IR_x2apic(void) local_irq_save(flags); mask_8259A(); - save_mask_IO_APIC_setup(); + + ret = save_mask_IO_APIC_setup(); + if (ret) { + printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); + goto end; + } ret = enable_intr_remapping(1); @@ -1370,14 +1375,15 @@ void enable_IR_x2apic(void) } if (ret) - goto end; + goto end_restore; if (!x2apic) { x2apic = 1; apic_ops = &x2apic_ops; enable_x2apic(); } -end: + +end_restore: if (ret) /* * IR enabling failed @@ -1386,6 +1392,7 @@ end: else reinit_intr_remapped_IO_APIC(x2apic_preenabled); +end: unmask_8259A(); local_irq_restore(flags); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 01419fdc1d5..4cc9cb64c81 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -812,7 +812,7 @@ int save_mask_IO_APIC_setup(void) kzalloc(sizeof(struct IO_APIC_route_entry) * nr_ioapic_registers[apic], GFP_KERNEL); if (!early_ioapic_entries[apic]) - return -ENOMEM; + goto nomem; } for (apic = 0; apic < nr_ioapics; apic++) @@ -826,17 +826,32 @@ int save_mask_IO_APIC_setup(void) ioapic_write_entry(apic, pin, entry); } } + return 0; + +nomem: + for (; apic > 0; apic--) + kfree(early_ioapic_entries[apic]); + kfree(early_ioapic_entries[apic]); + memset(early_ioapic_entries, 0, + ARRAY_SIZE(early_ioapic_entries)); + + return -ENOMEM; } void restore_IO_APIC_setup(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) + for (apic = 0; apic < nr_ioapics; apic++) { + if (!early_ioapic_entries[apic]) + break; for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ioapic_write_entry(apic, pin, early_ioapic_entries[apic][pin]); + kfree(early_ioapic_entries[apic]); + early_ioapic_entries[apic] = NULL; + } } void reinit_intr_remapped_IO_APIC(int intr_remapping) -- cgit v1.2.3 From 8da077d6f31da291ee3a7dd559671cb8ca48cbe2 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 23 Sep 2008 08:42:05 -0500 Subject: x86, uv: fix ordering of calls to uv_system_init & uv_cpu_init Fix problem caused by reordering of the calls to uv_cpu_init() & uv_system_init. Originally, uv_cpu_init() was called AFTER uv_system_init. This order was recently broken as a side-effect of other patches. With this patch, initialization of cpu 0 is now done by the system_init call. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 33581d94a90..b689075bbe2 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -356,7 +356,22 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static bool uv_system_inited; +/* + * Called on each cpu to initialize the per_cpu UV data area. + * ZZZ hotplug not supported yet + */ +void __cpuinit uv_cpu_init(void) +{ + /* CPU 0 initilization will be done via uv_system_init. */ + if (!uv_blade_info) + return; + + uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; + + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) + set_x2apic_extra_bits(uv_hub_info->pnode); +} + void __init uv_system_init(void) { @@ -448,21 +463,6 @@ void __init uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); - uv_system_inited = true; -} -/* - * Called on each cpu to initialize the per_cpu UV data area. - * ZZZ hotplug not supported yet - */ -void __cpuinit uv_cpu_init(void) -{ - BUG_ON(!uv_system_inited); - - uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; - - if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->pnode); + uv_cpu_init(); } - - -- cgit v1.2.3 From 7564676813780e0ba4dacf95338202cb72d2172d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 24 Sep 2008 19:04:36 -0700 Subject: x86: irq no should not use hex in /proc/interrupts Arjan van de Ven noticed that we changed IRQ numbers from decimal to hex in /proc/interrupts - that can break user-space utilities like irqbalanced. Reported-by: Arjan van de Ven Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b2e1082cf5a..001772ffc91 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -307,7 +307,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%#x: ",i); + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index c2fca89a3f7..ec266109128 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%#x: ",i); + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else -- cgit v1.2.3 From c1370b49cc4f09de5b447ddf688a3988a297dbfc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 23 Sep 2008 23:00:02 +0400 Subject: x86: io-apic - interrupt remapping fix Clean up obscure for() cycle with straight while() form Signed-off-by: Cyrill Gorcunov Cc: "Maciej W. Rozycki" Acked-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4cc9cb64c81..ed68a6f99dc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -830,9 +830,8 @@ int save_mask_IO_APIC_setup(void) return 0; nomem: - for (; apic > 0; apic--) - kfree(early_ioapic_entries[apic]); - kfree(early_ioapic_entries[apic]); + while (apic >= 0) + kfree(early_ioapic_entries[apic--]); memset(early_ioapic_entries, 0, ARRAY_SIZE(early_ioapic_entries)); -- cgit v1.2.3 From c81bba49a13cb3376654d0cc93dc069fd619ed76 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 25 Sep 2008 11:53:11 -0700 Subject: x86: print out irq nr for msi/ht, v3 v2: fix hpet compiling error v3: Bjorn want to use dev_printk instead Signed-off-by: Yinghai Lu Cc: Bjorn Helgaas Cc: Jesse Barnes Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 3 +++ arch/x86/kernel/io_apic.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 422c577ef69..2913913f4a4 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -467,6 +467,9 @@ static int hpet_setup_irq(struct hpet_dev *dev) irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); enable_irq(dev->irq); + printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", + dev->name, dev->irq); + return 0; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ed68a6f99dc..4ee270d3035 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3354,6 +3354,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) #endif set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); + return 0; } @@ -3586,6 +3588,7 @@ int arch_setup_hpet_msi(unsigned int irq) hpet_msi_write(irq, &msg); set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, "edge"); + return 0; } #endif @@ -3682,6 +3685,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) set_irq_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); + + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); } return err; } -- cgit v1.2.3 From 5f79f2f2ad39b5177c52ed08ffd066ea0c1da924 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Wed, 24 Sep 2008 10:03:17 -0700 Subject: hpet: clean up warning Fix the below compile warnings due to recent HPET MSI changes arch/x86/kernel/hpet.c:48: warning: 'hpet_devs' defined but not used arch/x86/kernel/hpet.c:50: warning: 'per_cpu__cpu_hpet_dev' defined but not used Reported-by: Andrew Morton Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 57 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 2913913f4a4..77017e834cf 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -45,10 +45,6 @@ struct hpet_dev { char name[10]; }; -static struct hpet_dev *hpet_devs; - -static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); - unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); @@ -126,23 +122,8 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) -{ - int i; - if (!hpet_devs) - return; - - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; - - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - - hd->hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(hd, hdev->num); - } -} +static void hpet_reserve_msi_timers(struct hpet_data *hd); static void hpet_reserve_platform_timers(unsigned long id) { @@ -362,6 +343,10 @@ static int hpet_legacy_next_event(unsigned long delta, * HPET MSI Support */ #ifdef CONFIG_PCI_MSI + +static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); +static struct hpet_dev *hpet_devs; + void hpet_msi_unmask(unsigned int irq) { struct hpet_dev *hdev = get_irq_data(irq); @@ -525,7 +510,8 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) #else #define RESERVE_TIMERS 0 #endif -void hpet_msi_capability_lookup(unsigned int start_timer) + +static void hpet_msi_capability_lookup(unsigned int start_timer) { unsigned int id; unsigned int num_timers; @@ -571,6 +557,26 @@ void hpet_msi_capability_lookup(unsigned int start_timer) num_timers, num_timers_used); } +#ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) +{ + int i; + + if (!hpet_devs) + return; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd->hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(hd, hdev->num); + } +} +#endif + static struct hpet_dev *hpet_get_unused_timer(void) { int i; @@ -642,10 +648,17 @@ static int hpet_setup_msi_irq(unsigned int irq) { return 0; } -void hpet_msi_capability_lookup(unsigned int start_timer) +static void hpet_msi_capability_lookup(unsigned int start_timer) +{ + return; +} + +#ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) { return; } +#endif static int hpet_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu) -- cgit v1.2.3 From 4173a0e7371ece227559b44943c6fd456ee470d1 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Thu, 2 Oct 2008 12:18:21 -0500 Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3 Provide a means for UV interrupt MMRs to be setup with the message to be sent when an MSI is raised. Signed-off-by: Dean Nelson Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 68 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/uv_irq.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/uv_irq.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 45cecc59d89..acc0e8bf043 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o + obj-y += bios_uv.o uv_irq.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4ee270d3035..260c95a5e6d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include @@ -3692,6 +3694,72 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ +#ifdef CONFIG_X86_64 +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset) +{ + const cpumask_t *eligible_cpu = get_cpu_mask(cpu); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long flags; + int err; + + err = assign_irq_vector(irq, *eligible_cpu); + if (err != 0) + return err; + + spin_lock_irqsave(&vector_lock, flags); + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + spin_unlock_irqrestore(&vector_lock, flags); + + cfg = irq_cfg(irq); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->vector = cfg->vector; + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cpu_mask_to_apicid(*eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->mask = 1; + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} +#endif /* CONFIG_X86_64 */ + int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c new file mode 100644 index 00000000000..6bd26c91c30 --- /dev/null +++ b/arch/x86/kernel/uv_irq.c @@ -0,0 +1,77 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include + +static void uv_noop(unsigned int irq) +{ +} + +static unsigned int uv_noop_ret(unsigned int irq) +{ + return 0; +} + +static void uv_ack_apic(unsigned int irq) +{ + ack_APIC_irq(); +} + +struct irq_chip uv_irq_chip = { + .name = "UV-CORE", + .startup = uv_noop_ret, + .shutdown = uv_noop, + .enable = uv_noop, + .disable = uv_noop, + .ack = uv_noop, + .mask = uv_noop, + .unmask = uv_noop, + .eoi = uv_ack_apic, + .end = uv_noop, +}; + +/* + * Set up a mapping of an available irq and vector, and enable the specified + * MMR that defines the MSI that is to be sent to the specified CPU when an + * interrupt is raised. + */ +int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, + unsigned long mmr_offset) +{ + int irq; + int ret; + + irq = create_irq(); + if (irq <= 0) + return -EBUSY; + + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); + if (ret != irq) + destroy_irq(irq); + + return ret; +} +EXPORT_SYMBOL_GPL(uv_setup_irq); + +/* + * Tear down a mapping of an irq and vector, and disable the specified MMR that + * defined the MSI that was to be sent to the specified CPU when an interrupt + * was raised. + * + * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). + */ +void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +{ + arch_disable_uv_irq(mmr_blade, mmr_offset); + destroy_irq(irq); +} +EXPORT_SYMBOL_GPL(uv_teardown_irq); -- cgit v1.2.3 From 37762b6ffb37f9e21cbc6f80902aa06b7a053fd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Oct 2008 11:38:37 +0200 Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3, fix fix: arch/x86/kernel/uv_irq.c: In function 'uv_ack_apic': arch/x86/kernel/uv_irq.c:26: error: implicit declaration of function 'ack_APIC_irq' Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_irq.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 6bd26c91c30..aeef529917e 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -10,6 +10,8 @@ #include #include + +#include #include static void uv_noop(unsigned int irq) -- cgit v1.2.3 From a50f70b17541c0060967c6df61133e968bad3652 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:58:54 -0500 Subject: x86: Add UV EFI table entry v4 Look for a UV entry in the EFI tables. Signed-off-by: Russ Anderson Signed-off-by: Paul Jackson Acked-by: Huang Ying Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 945a31cdd81..1119d247fe1 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -366,6 +366,10 @@ void __init efi_init(void) SMBIOS_TABLE_GUID)) { efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, + UV_SYSTEM_TABLE_GUID)) { + efi.uv_systab = config_tables[i].table; + printk(" UVsystab=0x%lx ", config_tables[i].table); } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; -- cgit v1.2.3 From 7f5942329e0787087a5e4dced838cee711ac2b58 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:59:15 -0500 Subject: x86: Add UV bios call infrastructure v4 Add the EFI callback function and associated wrapper code. Initialize SAL system table entry info at boot time. Signed-off-by: Russ Anderson Signed-off-by: Paul Jackson Acked-by: Huang Ying Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/bios_uv.c | 101 +++++++++++++++++++++++++++++++-------- arch/x86/kernel/genx2apic_uv_x.c | 1 + 2 files changed, 81 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index fdd585f9c53..5481eb59f78 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -1,8 +1,6 @@ /* * BIOS run time interface routines. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,33 +14,94 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson */ +#include +#include +#include #include -const char * -x86_bios_strerror(long status) +struct uv_systab uv_systab; + +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { - const char *str; - switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; - } - return str; + struct uv_systab *tab = &uv_systab; + + if (!tab->function) + /* + * BIOS does not support UV systab + */ + return BIOS_STATUS_UNIMPLEMENTED; + + return efi_call6((void *)__va(tab->function), + (u64)which, a1, a2, a3, a4, a5); } -long -x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, - unsigned long *drift_info) +s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { - struct uv_bios_retval isrv; + unsigned long bios_flags; + s64 ret; + + local_irq_save(bios_flags); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + local_irq_restore(bios_flags); + + return ret; +} + +s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + s64 ret; + + preempt_disable(); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + preempt_enable(); - BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); - *ticks_per_second = isrv.v0; - *drift_info = isrv.v1; - return isrv.status; + return ret; +} + +long +x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second, + unsigned long *drift_info) +{ + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, + (u64)ticks_per_second, 0, 0, 0); } EXPORT_SYMBOL_GPL(x86_bios_freq_base); + + +#ifdef CONFIG_EFI +void uv_bios_init(void) +{ + struct uv_systab *tab; + + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || + (efi.uv_systab == (unsigned long)NULL)) { + printk(KERN_CRIT "No EFI UV System Table.\n"); + uv_systab.function = (unsigned long)NULL; + return; + } + + tab = (struct uv_systab *)ioremap(efi.uv_systab, + sizeof(struct uv_systab)); + if (strncmp(tab->signature, "UVST", 4) != 0) + printk(KERN_ERR "bad signature in UV system table!"); + + /* + * Copy table to permanent spot for later use. + */ + memcpy(&uv_systab, tab, sizeof(struct uv_systab)); + iounmap(tab); + + printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision); +} +#else /* !CONFIG_EFI */ + +void uv_bios_init(void) { } +#endif + diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b689075bbe2..aa2e5e15bf6 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -427,6 +427,7 @@ void __init uv_system_init(void) gnode_upper = (((unsigned long)node_id.s.node_id) & ~((1 << n_val) - 1)) << m_val; + uv_bios_init(); uv_rtc_init(); for_each_present_cpu(cpu) { -- cgit v1.2.3 From 922402f15a85f7a064926eb1db68cc52bc4d4a91 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:59:33 -0500 Subject: x86: Add UV partition call v4 Add a bios call to return partitioning related info. Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/bios_uv.c | 44 +++++++++++++++++++++++++++++++++++----- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++------ 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 5481eb59f78..f0dfe6f17e7 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -23,6 +23,7 @@ #include #include #include +#include struct uv_systab uv_systab; @@ -65,14 +66,47 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return ret; } -long -x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second, - unsigned long *drift_info) + +long sn_partition_id; +EXPORT_SYMBOL_GPL(sn_partition_id); +long uv_coherency_id; +EXPORT_SYMBOL_GPL(uv_coherency_id); +long uv_region_size; +EXPORT_SYMBOL_GPL(uv_region_size); +int uv_type; + + +s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, + long *region) +{ + s64 ret; + u64 v0, v1; + union partition_info_u part; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, + (u64)(&v0), (u64)(&v1), 0, 0); + if (ret != BIOS_STATUS_SUCCESS) + return ret; + + part.val = v0; + if (uvtype) + *uvtype = part.hub_version; + if (partid) + *partid = part.partition_id; + if (coher) + *coher = part.coherence_id; + if (region) + *region = part.region_size; + return ret; +} + + +s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) { return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, - (u64)ticks_per_second, 0, 0, 0); + (u64)ticks_per_second, 0, 0, 0); } -EXPORT_SYMBOL_GPL(x86_bios_freq_base); +EXPORT_SYMBOL_GPL(uv_bios_freq_base); #ifdef CONFIG_EFI diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index aa2e5e15bf6..bfd532843df 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode) static __init void uv_rtc_init(void) { - long status, ticks_per_sec, drift; + long status; + u64 ticks_per_sec; - status = - x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, - &drift); - if (status != 0 || ticks_per_sec < 100000) { + status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, + &ticks_per_sec); + if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { printk(KERN_WARNING "unable to determine platform RTC clock frequency, " "guessing.\n"); @@ -428,6 +428,8 @@ void __init uv_system_init(void) ~((1 << n_val) - 1)) << m_val; uv_bios_init(); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, + &uv_coherency_id, &uv_region_size); uv_rtc_init(); for_each_present_cpu(cpu) { @@ -449,7 +451,7 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; - uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ + uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); -- cgit v1.2.3 From fc8c2d763bacc02962048fa042e287debb1416aa Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:59:50 -0500 Subject: x86: Add sysfs entries for UV v4 Create /sys/firmware/sgi_uv sysfs entries for partition_id and coherence_id. Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/uv_sysfs.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/uv_sysfs.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index acc0e8bf043..cb6ade443f0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o uv_irq.o + obj-y += bios_uv.o uv_irq.o uv_sysfs.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c new file mode 100644 index 00000000000..67f9b9dbf80 --- /dev/null +++ b/arch/x86/kernel/uv_sysfs.c @@ -0,0 +1,72 @@ +/* + * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include +#include + +struct kobject *sgi_uv_kobj; + +static ssize_t partition_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); +} + +static ssize_t coherence_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); +} + +static struct kobj_attribute partition_id_attr = + __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); + +static struct kobj_attribute coherence_id_attr = + __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); + + +static int __init sgi_uv_sysfs_init(void) +{ + unsigned long ret; + + if (!sgi_uv_kobj) + sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); + if (!sgi_uv_kobj) { + printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); + return -EINVAL; + } + + ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); + return ret; + } + + ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); + return ret; + } + + return 0; +} + +device_initcall(sgi_uv_sysfs_init); -- cgit v1.2.3 From 4c66a73f0796dacc2ff0d4af75794ec843ceb3d1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 15:52:15 -0700 Subject: x86: sparse_irq: fix typo in debug print out Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 260c95a5e6d..f959acbc0db 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -257,7 +257,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) panic("please boot with nr_irq_cfg= %d\n", count * 2); phys = __pa(cfg); - printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); + printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes); for (i = 0; i < nr_irq_cfg; i++) init_one_irq_cfg(&cfg[i]); -- cgit v1.2.3 From 81608f3c254512b906ab78082ec5966b376aacd5 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 10 Oct 2008 19:00:17 +0400 Subject: x86: apic - unify APIC_DIVISOR Use APIC_DIVISOR being set to 16 for both 32/64bit mode. To escape APIC timer underflow during calibration set it to the maximum possible value. Also typo error (CONFG instead of proper CONFIG) fixed. The error was caught by Venkatesh Pallipadi, thanks a lot Venkatesh! See details on http://lkml.org/lkml/2008/10/9/425 Reported-by: Venkatesh Pallipad Signed-off-by: Cyrill Gorcunov Acked-by: "Maciej W. Rozycki" Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 1f48bd1c9f2..04a7f960bbc 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -332,11 +332,7 @@ int lapic_get_maxlvt(void) */ /* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else #define APIC_DIVISOR 16 -#endif /* * This function sets up the local APIC timer, with a timeout of @@ -592,10 +588,10 @@ static int __init calibrate_APIC_clock(void) global_clock_event->event_handler = lapic_cal_handler; /* - * Setup the APIC counter to 1e9. There is no way the lapic + * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ - __setup_APIC_LVTT(1000000000, 0, 0); + __setup_APIC_LVTT(0xffffffff, 0, 0); /* Let the interrupts run */ local_irq_enable(); -- cgit v1.2.3 From 7ef0c30dbf96a8d9a234e90c248eb19df3c031be Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 13:07:35 +0200 Subject: genirq: define nr_irqs for architectures with GENERIC_HARDIRQS=n Revert the sparse irq changes in m68k/s390/sparc and just define nr_irqs as NR_IRQS for those architectures. Signed-off-by: Thomas Gleixner --- arch/m68k/kernel/ints.c | 3 --- arch/s390/kernel/irq.c | 3 --- arch/sparc/kernel/irq.c | 4 ---- 3 files changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c index 44169e4cd91..7e8a0d394e6 100644 --- a/arch/m68k/kernel/ints.c +++ b/arch/m68k/kernel/ints.c @@ -46,9 +46,6 @@ #include #endif -int nr_irqs = NR_IRQS; -EXPORT_SYMBOL(nr_irqs); - extern u32 auto_irqhandler_fixup[]; extern u32 user_irqhandler_fixup[]; extern u16 user_irqvec_fixup[]; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 3624c4a0037..e7c5bfb7c75 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -17,9 +17,6 @@ #include #include -int nr_irqs = NR_IRQS; -EXPORT_SYMBOL(nr_irqs); - /* * show_interrupts is needed by /proc/interrupts. */ diff --git a/arch/sparc/kernel/irq.c b/arch/sparc/kernel/irq.c index 4b99e3ce391..93e1d1c6529 100644 --- a/arch/sparc/kernel/irq.c +++ b/arch/sparc/kernel/irq.c @@ -55,10 +55,6 @@ #define SMP_NOP2 #define SMP_NOP3 #endif /* SMP */ - -int nr_irqs = NR_IRQS; -EXPORT_SYMBOL(nr_irqs); - unsigned long __raw_local_irq_save(void) { unsigned long retval; -- cgit v1.2.3 From 3235e936c0cc3589309280b6f59e5096779adae3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 13:16:00 +0200 Subject: x86: remove sparse irq from Kconfig This code is not ready yet. Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 600584b7a49..8636ddf2f4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -237,17 +237,6 @@ config SMP If you don't know what to do here, say N. -config HAVE_SPARSE_IRQ - bool "Support sparse irq numbering" - depends on PCI_MSI || HT_IRQ - default y - help - This enables support for sparse irq, esp for msi/msi-x. the irq - number will be bus/dev/fn + 12bit. You may need if you have lots of - cards supports msi-x installed. - - If you don't know what to do here, say Y. - config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER -- cgit v1.2.3 From 2cc21ef843d4fb7da122239b644a1f6f0aca60a6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:16:55 +0200 Subject: genirq: remove sparse irq code This code is not ready, but we need to rip it out instead of rebasing as we would lose the APIC/IO_APIC unification otherwise. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic.c | 130 ++-------------------------------------------- arch/x86/kernel/irq_32.c | 8 --- arch/x86/kernel/irq_64.c | 8 --- 3 files changed, 4 insertions(+), 142 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f959acbc0db..683610517d2 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -111,9 +111,6 @@ struct irq_cfg; struct irq_pin_list; struct irq_cfg { unsigned int irq; -#ifdef CONFIG_HAVE_SPARSE_IRQ - struct irq_cfg *next; -#endif struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; @@ -151,15 +148,6 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) static struct irq_cfg *irq_cfgx; -#ifdef CONFIG_HAVE_SPARSE_IRQ -/* - * Protect the irq_cfgx_free freelist: - */ -static DEFINE_SPINLOCK(irq_cfg_lock); - -static struct irq_cfg *irq_cfgx_free; -#endif - static void __init init_work(void *data) { struct dyn_array *da = data; @@ -174,114 +162,7 @@ static void __init init_work(void *data) legacy_count = ARRAY_SIZE(irq_cfg_legacy); for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); - -#ifdef CONFIG_HAVE_SPARSE_IRQ - for (i = 1; i < *da->nr; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = &irq_cfgx[legacy_count]; - irq_cfgx[legacy_count - 1].next = NULL; -#endif -} - -#ifdef CONFIG_HAVE_SPARSE_IRQ -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); - -#define for_each_irq_cfg(irqX, cfg) \ - for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U) - - -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - struct irq_cfg *cfg; - - cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg = cfg->next; - } - - return NULL; -} - -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - struct irq_cfg *cfg, *cfg_pri; - unsigned long flags; - int count = 0; - int i; - - cfg_pri = cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg_pri = cfg; - cfg = cfg->next; - count++; - } - - spin_lock_irqsave(&irq_cfg_lock, flags); - if (!irq_cfgx_free) { - unsigned long phys; - unsigned long total_bytes; - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - - total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; - if (after_bootmem) - cfg = kzalloc(total_bytes, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); - - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = cfg; - } - - cfg = irq_cfgx_free; - irq_cfgx_free = irq_cfgx_free->next; - cfg->next = NULL; - if (cfg_pri) - cfg_pri->next = cfg; - else - irq_cfgx = cfg; - cfg->irq = irq; - - spin_unlock_irqrestore(&irq_cfg_lock, flags); - - return cfg; } -#else #define for_each_irq_cfg(irq, cfg) \ for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq]) @@ -290,17 +171,16 @@ DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work struct irq_cfg *irq_cfg(unsigned int irq) { - if (irq < nr_irqs) - return &irq_cfgx[irq]; + if (irq < nr_irqs) + return &irq_cfgx[irq]; - return NULL; + return NULL; } struct irq_cfg *irq_cfg_alloc(unsigned int irq) { - return irq_cfg(irq); + return irq_cfg(irq); } -#endif /* * This is performance-critical, we want to do it O(1) * @@ -3068,9 +2948,7 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned long flags; struct irq_cfg *cfg_new; -#ifndef CONFIG_HAVE_SPARSE_IRQ irq_want = nr_irqs - 1; -#endif irq = 0; spin_lock_irqsave(&vector_lock, flags); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 001772ffc91..ccf6c1bf712 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -272,20 +272,12 @@ int show_interrupts(struct seq_file *p, void *v) struct irq_desc *desc = NULL; int tail = 0; -#ifdef CONFIG_HAVE_SPARSE_IRQ - desc = (struct irq_desc *)v; - entries = -1U; - i = desc->irq; - if (!desc->next) - tail = 1; -#else entries = nr_irqs - 1; i = *(loff_t *) v; if (i == nr_irqs) tail = 1; else desc = irq_to_desc(i); -#endif if (i == 0) { seq_printf(p, " "); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ec266109128..21f53b91111 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -77,20 +77,12 @@ int show_interrupts(struct seq_file *p, void *v) struct irq_desc *desc = NULL; int tail = 0; -#ifdef CONFIG_HAVE_SPARSE_IRQ - desc = (struct irq_desc *)v; - entries = -1U; - i = desc->irq; - if (!desc->next) - tail = 1; -#else entries = nr_irqs - 1; i = *(loff_t *) v; if (i == nr_irqs) tail = 1; else desc = irq_to_desc(i); -#endif if (i == 0) { seq_printf(p, " "); -- cgit v1.2.3 From ee32c9732244bde4b9b59eeac2814c23e2b71f8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:34:09 +0200 Subject: genirq: remove irq_to_desc_alloc Remove the leftover of sparseirqs. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic.c | 6 +----- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/kernel/irqinit_64.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 683610517d2..e03bc0f87ee 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1257,11 +1257,7 @@ static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; - /* first time to use this irq_desc */ - if (irq < 16) - desc = irq_to_desc(irq); - else - desc = irq_to_desc_alloc(irq); + desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 9092103a18e..a8d35998d30 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -70,7 +70,7 @@ void __init init_ISA_irqs (void) */ for (i = 0; i < 16; i++) { /* first time call this irq_desc */ - struct irq_desc *desc = irq_to_desc_alloc(i); + struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; desc->action = NULL; diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index d17fbc26d96..ff023539128 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -144,7 +144,7 @@ void __init init_ISA_irqs(void) for (i = 0; i < 16; i++) { /* first time call this irq_desc */ - struct irq_desc *desc = irq_to_desc_alloc(i); + struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; desc->action = NULL; -- cgit v1.2.3 From d6c88a507ef0b6afdb013cba4e7804ba7324d99a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 15:27:23 +0200 Subject: genirq: revert dynarray Revert the dynarray changes. They need more thought and polishing. Signed-off-by: Thomas Gleixner --- arch/Kconfig | 4 - arch/x86/Kconfig | 1 - arch/x86/kernel/io_apic.c | 199 +++++++++++++++------------------------ arch/x86/kernel/setup_percpu.c | 8 +- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/kernel/vmlinux_32.lds.S | 1 - arch/x86/kernel/vmlinux_64.lds.S | 2 - arch/x86/xen/spinlock.c | 2 +- 8 files changed, 78 insertions(+), 141 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index c8a7c2eb649..071004d3a1b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -102,7 +102,3 @@ config HAVE_CLK help The calls support software clock gating and thus are a key power management tool on many systems. - -config HAVE_DYN_ARRAY - def_bool n - diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8636ddf2f4a..8da6123a60d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -33,7 +33,6 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS - select HAVE_DYN_ARRAY config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e03bc0f87ee..6f80dc2f137 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -107,7 +107,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -struct irq_cfg; struct irq_pin_list; struct irq_cfg { unsigned int irq; @@ -120,7 +119,7 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg_legacy[] __initdata = { +static struct irq_cfg irq_cfgx[NR_IRQS] = { [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, @@ -139,48 +138,26 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = { [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -static struct irq_cfg irq_cfg_init = { .irq = -1U, }; - -static void init_one_irq_cfg(struct irq_cfg *cfg) -{ - memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); -} - -static struct irq_cfg *irq_cfgx; - -static void __init init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; - - cfg = *da->name; - - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - - legacy_count = ARRAY_SIZE(irq_cfg_legacy); - for (i = legacy_count; i < *da->nr; i++) - init_one_irq_cfg(&cfg[i]); -} - #define for_each_irq_cfg(irq, cfg) \ - for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq]) + for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); - -struct irq_cfg *irq_cfg(unsigned int irq) +static struct irq_cfg *irq_cfg(unsigned int irq) { - if (irq < nr_irqs) - return &irq_cfgx[irq]; - - return NULL; + return irq < nr_irqs ? irq_cfgx + irq : NULL; } -struct irq_cfg *irq_cfg_alloc(unsigned int irq) + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) { return irq_cfg(irq); } +/* + * Rough estimation of how many shared IRQs there are, can be changed + * anytime. + */ +#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + /* * This is performance-critical, we want to do it O(1) * @@ -193,59 +170,29 @@ struct irq_pin_list { struct irq_pin_list *next; }; -static struct irq_pin_list *irq_2_pin_head; -/* fill one page ? */ -static int nr_irq_2_pin = 0x100; +static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; static struct irq_pin_list *irq_2_pin_ptr; -static void __init irq_2_pin_init_work(void *data) + +static void __init irq_2_pin_init(void) { - struct dyn_array *da = data; - struct irq_pin_list *pin; + struct irq_pin_list *pin = irq_2_pin_head; int i; - pin = *da->name; - - for (i = 1; i < *da->nr; i++) + for (i = 1; i < PIN_MAP_SIZE; i++) pin[i-1].next = &pin[i]; irq_2_pin_ptr = &pin[0]; } -DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); static struct irq_pin_list *get_one_free_irq_2_pin(void) { - struct irq_pin_list *pin; - int i; - - pin = irq_2_pin_ptr; - - if (pin) { - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; - } - - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); - - if (after_bootmem) - pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, - GFP_ATOMIC); - else - pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * - nr_irq_2_pin, PAGE_SIZE, 0); + struct irq_pin_list *pin = irq_2_pin_ptr; if (!pin) panic("can not get more irq_2_pin\n"); - for (i = 1; i < nr_irq_2_pin; i++) - pin[i-1].next = &pin[i]; - irq_2_pin_ptr = pin->next; pin->next = NULL; - return pin; } @@ -284,8 +231,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); + + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -1044,11 +992,11 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; - /* + /* * For MPS mode, so far only needed by ES7000 platform */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } #ifdef CONFIG_X86_32 @@ -1232,19 +1180,19 @@ static struct irq_chip ir_ioapic_chip; #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { - int apic, idx, pin; + int apic, idx, pin; - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* * nonexistent IRQs are edge default */ - return 0; + return 0; } #else static inline int IO_APIC_irq_trigger(int irq) @@ -1509,8 +1457,8 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); @@ -2089,9 +2037,9 @@ static int ioapic_retrigger_irq(unsigned int irq) #else static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + send_IPI_self(irq_cfg(irq)->vector); - return 1; + return 1; } #endif @@ -2189,7 +2137,7 @@ static int migrate_irq_remapped_level(int irq) if (io_apic_level_ack_pending(irq)) { /* - * Interrupt in progress. Migrating irq now will change the + * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse * the EOI broadcast performed by cpu. * So, delay the irq migration to the next instance. @@ -2426,28 +2374,28 @@ static void ack_apic_level(unsigned int irq) } static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, + .set_affinity = set_ioapic_affinity_irq, #endif .retrigger = ioapic_retrigger_irq, }; #ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, + .set_affinity = set_ir_ioapic_affinity_irq, #endif .retrigger = ioapic_retrigger_irq, }; @@ -2636,8 +2584,8 @@ static inline void __init check_timer(void) local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2822,12 +2770,12 @@ void __init setup_IO_APIC(void) io_apic_irqs = ~PIC_IRQS; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* + /* * Set up IO-APIC IRQ routing. */ #ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); #endif sync_Arb_IDs(); setup_IO_APIC_irqs(); @@ -2842,9 +2790,9 @@ void __init setup_IO_APIC(void) static int __init io_apic_bug_finalize(void) { - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; } late_initcall(io_apic_bug_finalize); @@ -3199,7 +3147,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) if (index < 0) { printk(KERN_ERR "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); + pci_name(dev)); return -ENOSPC; } return index; @@ -3885,23 +3833,24 @@ static struct resource * __init ioapic_setup_resources(void) void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; struct resource *ioapic_res; + int i; + irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; #ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } #endif } else { #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 2b7dab699e8..410c88f0bfe 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size, da_size; + ssize_t size, old_size; char *ptr; int cpu; unsigned long align = 1; @@ -150,9 +150,8 @@ void __init setup_per_cpu_areas(void) /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; - da_size = per_cpu_dyn_array_size(&align); align = max_t(unsigned long, PAGE_SIZE, align); - size = roundup(old_size + da_size, align); + size = roundup(old_size, align); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -182,9 +181,6 @@ void __init setup_per_cpu_areas(void) #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - - per_cpu_alloc_dyn_array(cpu, ptr + old_size); - } printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 817aa55a120..0c9667f0752 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) /* * handle this 'virtual interrupt' as a Cobalt one now. */ - kstat_irqs_this_cpu(desc)++; + kstat_incr_irqs_this_cpu(realirq, desc); if (likely(desc->action != NULL)) handle_IRQ_event(realirq, desc->action); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index c36007ab394..a9b8560adbc 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -145,7 +145,6 @@ SECTIONS *(.x86_cpu_dev.init) __x86_cpu_dev_end = .; } - DYN_ARRAY_INIT(8) SECURITY_INIT . = ALIGN(4); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 30973dbac8c..3245ad72594 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -174,8 +174,6 @@ SECTIONS } __x86_cpu_dev_end = .; - DYN_ARRAY_INIT(8) - SECURITY_INIT . = ALIGN(8); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index bb6bc721b13..5601506f2dd 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ - kstat_irqs_this_cpu(irq_to_desc(irq))++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); out: raw_local_irq_restore(flags); -- cgit v1.2.3 From a1aca5de08a0cb840a90fb3f729a5940f8d21185 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 15 Oct 2008 19:29:15 +0200 Subject: genirq: remove artifacts from sparseirq removal Signed-off-by: Ingo Molnar --- arch/Kconfig | 1 + arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/irqinit_32.c | 1 - arch/x86/kernel/vmlinux_64.lds.S | 1 - arch/x86/mm/init_32.c | 3 --- 5 files changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 071004d3a1b..0267babe5eb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -102,3 +102,4 @@ config HAVE_CLK help The calls support software clock gating and thus are a key power management tool on many systems. + diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5fef4fece4a..0d1c26a583c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1254,7 +1254,6 @@ static int __init acpi_parse_madt_ioapic_entries(void) return count; } - count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, nr_irqs); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index a8d35998d30..845aa9803e8 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -184,4 +184,3 @@ void __init native_init_IRQ(void) irq_ctx_init(smp_processor_id()); } - diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 3245ad72594..46e05447405 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -173,7 +173,6 @@ SECTIONS *(.x86_cpu_dev.init) } __x86_cpu_dev_end = .; - SECURITY_INIT . = ALIGN(8); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 91343c2694b..8396868e82c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -66,7 +66,6 @@ static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; static int __initdata after_init_bootmem; -int after_bootmem; static __init void *alloc_low_page(unsigned long *phys) { @@ -989,8 +988,6 @@ void __init mem_init(void) set_highmem_pages_init(); - after_bootmem = 1; - codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; -- cgit v1.2.3 From c0c168ca26b54a4a6ad34fc813fe00f275fbc94c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 11:15:28 +0200 Subject: x86: cleanup show_interrupts The sparseirq patches introduced some more ugliness in show_interrupts(). Clean it up all together and make the code easier to read by splitting out the "tail" function which prints the special interrupts. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq_32.c | 160 +++++++++++++++++++++++------------------------ arch/x86/kernel/irq_64.c | 153 ++++++++++++++++++++++---------------------- 2 files changed, 154 insertions(+), 159 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index ccf6c1bf712..8d525765a6c 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -263,22 +263,67 @@ atomic_t irq_err_count; * /proc/interrupts printing: */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", nmi_count(j)); + seq_printf(p, " Non-maskable interrupts\n"); +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { + unsigned long flags, any_count = 0; int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - unsigned int entries; - struct irq_desc *desc = NULL; - int tail = 0; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; - entries = nr_irqs - 1; - i = *(loff_t *) v; if (i == nr_irqs) - tail = 1; - else - desc = irq_to_desc(i); + return show_other_interrupts(p); + /* print header */ if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) @@ -286,88 +331,37 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i <= entries) { - unsigned any_count = 0; - - spin_lock_irqsave(&desc->lock, flags); + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP - any_count = kstat_irqs(i); + any_count = kstat_irqs(i); #else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); #endif - action = desc->action; - if (!action && !any_count) - goto skip; - seq_printf(p, "%3d: ", i); + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); + seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); } - if (tail) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_printf(p, " Non-maskable interrupts\n"); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#endif -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); -#endif -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - } + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); return 0; } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 21f53b91111..4f374294f29 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -68,22 +68,65 @@ static inline void stack_overflow_check(struct pt_regs *regs) * Generic, controller-independent functions: */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); +#endif + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { - int i, j; - struct irqaction * action; - unsigned long flags; - unsigned int entries; - struct irq_desc *desc = NULL; - int tail = 0; - - entries = nr_irqs - 1; - i = *(loff_t *) v; + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; + if (i == nr_irqs) - tail = 1; - else - desc = irq_to_desc(i); + return show_other_interrupts(p); + /* print header */ if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) @@ -91,79 +134,37 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i <= entries) { - unsigned any_count = 0; - - spin_lock_irqsave(&desc->lock, flags); + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP - any_count = kstat_irqs(i); + any_count = kstat_irqs(i); #else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); #endif - action = desc->action; - if (!action && !any_count) - goto skip; - seq_printf(p, "%3d: ", i); + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); + seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); - } - - if (tail) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_printf(p, " Non-maskable interrupts\n"); - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); - seq_printf(p, "THR: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); - seq_printf(p, " Threshold APIC interrupts\n"); -#endif - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); } + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); return 0; } -- cgit v1.2.3 From 6b39ba771e3c78d00e0abcebad270bd4212b28bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 11:32:24 +0200 Subject: x86: unify show_interrupts() and proc helpers show_interrupts() and proc helpers are basically the same for 32 and 64 bit. Move them to a shared source file. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/irq.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 146 ----------------------------------------- arch/x86/kernel/irq_64.c | 132 ------------------------------------- 4 files changed, 167 insertions(+), 279 deletions(-) create mode 100644 arch/x86/kernel/irq.c (limited to 'arch') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cb6ade443f0..d7e5a58ee22 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o -obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o +obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c new file mode 100644 index 00000000000..3b912849897 --- /dev/null +++ b/arch/x86/kernel/irq.c @@ -0,0 +1,166 @@ +/* + * Common interrupt code for 32 and 64 bit + */ +#include +#include +#include +#include + +#include +#include +#include + +atomic_t irq_err_count; + +#ifdef CONFIG_X86_32 +# define irq_stats(x) (&per_cpu(irq_stat,x)) +#else +# define irq_stats(x) cpu_pda(x) +#endif +/* + * /proc/interrupts printing: + */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); +# ifdef CONFIG_X86_64 + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); +# endif +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif + return 0; +} + +int show_interrupts(struct seq_file *p, void *v) +{ + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; + + if (i == nr_irqs) + return show_other_interrupts(p); + + /* print header */ + if (i == 0) { + seq_printf(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%-8d",j); + seq_putc(p, '\n'); + } + + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); +#endif + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); +#endif + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); + + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } + + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} + +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = irq_stats(cpu)->__nmi_count; + +#ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += irq_stats(cpu)->irq_resched_count; + sum += irq_stats(cpu)->irq_call_count; + sum += irq_stats(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += irq_stats(cpu)->irq_thermal_count; +# ifdef CONFIG_X86_64 + sum += irq_stats(cpu)->irq_threshold_count; +#endif +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 8d525765a6c..6d9bf3936c7 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -253,152 +253,6 @@ unsigned int do_IRQ(struct pt_regs *regs) return 1; } -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ - -static int show_other_interrupts(struct seq_file *p) -{ - int j; - - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_printf(p, " Non-maskable interrupts\n"); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#endif -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); -#endif -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - return 0; -} - -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j; - struct irqaction *action; - struct irq_desc *desc; - - if (i > nr_irqs) - return 0; - - if (i == nr_irqs) - return show_other_interrupts(p); - - /* print header */ - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - spin_lock_irqsave(&desc->lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); -#endif - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = nmi_count(cpu); - -#ifdef CONFIG_X86_LOCAL_APIC - sum += per_cpu(irq_stat, cpu).apic_timer_irqs; -#endif -#ifdef CONFIG_SMP - sum += per_cpu(irq_stat, cpu).irq_resched_count; - sum += per_cpu(irq_stat, cpu).irq_call_count; - sum += per_cpu(irq_stat, cpu).irq_tlb_count; -#endif -#ifdef CONFIG_X86_MCE - sum += per_cpu(irq_stat, cpu).irq_thermal_count; -#endif -#ifdef CONFIG_X86_LOCAL_APIC - sum += per_cpu(irq_stat, cpu).irq_spurious_count; -#endif - return sum; -} - -u64 arch_irq_stat(void) -{ - u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif - return sum; -} - #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4f374294f29..39ef7feb9ea 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,8 +18,6 @@ #include #include -atomic_t irq_err_count; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -64,136 +62,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) } #endif -/* - * Generic, controller-independent functions: - */ - -static int show_other_interrupts(struct seq_file *p) -{ - int j; - - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_printf(p, " Non-maskable interrupts\n"); - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); - seq_printf(p, "THR: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); - seq_printf(p, " Threshold APIC interrupts\n"); -#endif - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); - - return 0; -} - -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j; - struct irqaction *action; - struct irq_desc *desc; - - if (i > nr_irqs) - return 0; - - if (i == nr_irqs) - return show_other_interrupts(p); - - /* print header */ - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - spin_lock_irqsave(&desc->lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); -#endif - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = cpu_pda(cpu)->__nmi_count; - - sum += cpu_pda(cpu)->apic_timer_irqs; -#ifdef CONFIG_SMP - sum += cpu_pda(cpu)->irq_resched_count; - sum += cpu_pda(cpu)->irq_call_count; - sum += cpu_pda(cpu)->irq_tlb_count; -#endif -#ifdef CONFIG_X86_MCE - sum += cpu_pda(cpu)->irq_thermal_count; - sum += cpu_pda(cpu)->irq_threshold_count; -#endif - sum += cpu_pda(cpu)->irq_spurious_count; - return sum; -} - -u64 arch_irq_stat(void) -{ - return atomic_read(&irq_err_count); -} - /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific -- cgit v1.2.3 From 249f6d9eab372790579ada8991bba3384c204e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 12:18:50 +0200 Subject: x86: move ack_bad_irq() to irq.c Share more duplicated code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 23 +++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 23 ----------------------- arch/x86/kernel/irq_64.c | 20 -------------------- 3 files changed, 23 insertions(+), 43 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3b912849897..ccf6c503fc3 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,29 @@ atomic_t irq_err_count; +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But only ack when the APIC is enabled -AK + */ + if (cpu_has_apic) + ack_APIC_irq(); +#endif +} + #ifdef CONFIG_X86_32 # define irq_stats(x) (&per_cpu(irq_stat,x)) #else diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 6d9bf3936c7..a51382672de 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif -} - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ static int check_stack_overflow(void) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 39ef7feb9ea..60eb84eb77a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,26 +18,6 @@ #include #include -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But don't ack when the APIC is disabled. -AK - */ - if (!disable_apic) - ack_APIC_irq(); -} - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* * Probabilistic stack overflow check: -- cgit v1.2.3 From 4b1135a277f4b38f60b9c9f28adae467feb07856 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 15:33:18 +0200 Subject: genirq: fix name space collisions of nr_irqs in arch/* local shadows of global variables are _bad_ Signed-off-by: Thomas Gleixner --- arch/alpha/kernel/sys_sable.c | 6 +++--- arch/arm/mach-ixp2000/ixdp2x00.c | 4 ++-- arch/arm/mach-omap2/irq.c | 8 ++++---- arch/avr32/mach-at32ap/extint.c | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 99a7f19da13..a4555f49763 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -47,7 +47,7 @@ typedef struct irq_swizzle_struct static irq_swizzle_t *sable_lynx_irq_swizzle; -static void sable_lynx_init_irq(int nr_irqs); +static void sable_lynx_init_irq(int nr_of_irqs); #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SABLE) @@ -530,11 +530,11 @@ sable_lynx_srm_device_interrupt(unsigned long vector) } static void __init -sable_lynx_init_irq(int nr_irqs) +sable_lynx_init_irq(int nr_of_irqs) { long i; - for (i = 0; i < nr_irqs; ++i) { + for (i = 0; i < nr_of_irqs; ++i) { irq_desc[i].status = IRQ_DISABLED | IRQ_LEVEL; irq_desc[i].chip = &sable_lynx_irq_type; } diff --git a/arch/arm/mach-ixp2000/ixdp2x00.c b/arch/arm/mach-ixp2000/ixdp2x00.c index b0653a87159..30451300751 100644 --- a/arch/arm/mach-ixp2000/ixdp2x00.c +++ b/arch/arm/mach-ixp2000/ixdp2x00.c @@ -143,7 +143,7 @@ static struct irq_chip ixdp2x00_cpld_irq_chip = { .unmask = ixdp2x00_irq_unmask }; -void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_irqs) +void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigned long *mask_reg, unsigned long nr_of_irqs) { unsigned int irq; @@ -154,7 +154,7 @@ void __init ixdp2x00_init_irq(volatile unsigned long *stat_reg, volatile unsigne board_irq_stat = stat_reg; board_irq_mask = mask_reg; - board_irq_count = nr_irqs; + board_irq_count = nr_of_irqs; *board_irq_mask = 0xffffffff; diff --git a/arch/arm/mach-omap2/irq.c b/arch/arm/mach-omap2/irq.c index 196a9565a8d..003901f1e2d 100644 --- a/arch/arm/mach-omap2/irq.c +++ b/arch/arm/mach-omap2/irq.c @@ -111,7 +111,7 @@ static void __init omap_irq_bank_init_one(struct omap_irq_bank *bank) void __init omap_init_irq(void) { - unsigned long nr_irqs = 0; + unsigned long nr_of_irqs = 0; unsigned int nr_banks = 0; int i; @@ -124,14 +124,14 @@ void __init omap_init_irq(void) omap_irq_bank_init_one(bank); - nr_irqs += bank->nr_irqs; + nr_of_irqs += bank->nr_irqs; nr_banks++; } printk(KERN_INFO "Total of %ld interrupts on %d active controller%s\n", - nr_irqs, nr_banks, nr_banks > 1 ? "s" : ""); + nr_of_irqs, nr_banks, nr_banks > 1 ? "s" : ""); - for (i = 0; i < nr_irqs; i++) { + for (i = 0; i < nr_of_irqs; i++) { set_irq_chip(i, &omap_irq_chip); set_irq_handler(i, handle_level_irq); set_irq_flags(i, IRQF_VALID); diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c index c36a6d59d6f..310477ba1bb 100644 --- a/arch/avr32/mach-at32ap/extint.c +++ b/arch/avr32/mach-at32ap/extint.c @@ -191,7 +191,7 @@ static int __init eic_probe(struct platform_device *pdev) struct eic *eic; struct resource *regs; unsigned int i; - unsigned int nr_irqs; + unsigned int nr_of_irqs; unsigned int int_irq; int ret; u32 pattern; @@ -224,7 +224,7 @@ static int __init eic_probe(struct platform_device *pdev) eic_writel(eic, IDR, ~0UL); eic_writel(eic, MODE, ~0UL); pattern = eic_readl(eic, MODE); - nr_irqs = fls(pattern); + nr_of_irqs = fls(pattern); /* Trigger on low level unless overridden by driver */ eic_writel(eic, EDGE, 0UL); @@ -232,7 +232,7 @@ static int __init eic_probe(struct platform_device *pdev) eic->chip = &eic_chip; - for (i = 0; i < nr_irqs; i++) { + for (i = 0; i < nr_of_irqs; i++) { set_irq_chip_and_handler(eic->first_irq + i, &eic_chip, handle_level_irq); set_irq_chip_data(eic->first_irq + i, eic); @@ -256,7 +256,7 @@ static int __init eic_probe(struct platform_device *pdev) eic->regs, int_irq); dev_info(&pdev->dev, "Handling %u external IRQs, starting with IRQ %u\n", - nr_irqs, eic->first_irq); + nr_of_irqs, eic->first_irq); return 0; -- cgit v1.2.3 From 10e0298686be6249b0665465737a6b83446c4d35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 16 Oct 2008 17:04:22 +0200 Subject: io_apic: make irq_mis_count available on 64-bit too Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6f80dc2f137..b764d7429c6 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2277,9 +2277,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } -#ifdef CONFIG_X86_32 atomic_t irq_mis_count; -#endif static void ack_apic_level(unsigned int irq) { -- cgit v1.2.3 From 3baf63a507094992a5bf238ba3bcea71f458b1e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 16 Oct 2008 19:00:57 +0200 Subject: m32r: fix build due to notify_cpu_starting() change fix: arch/m32r/kernel/smpboot.c:501: err or: implicit declaration of function 'notify_cpu_starting' make[2]: *** [arch/m32r/kernel/smpboot.o] Error 1 make[2]: *** Waiting for unfinished jobs.... Signed-off-by: Ingo Molnar --- arch/m32r/kernel/smpboot.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index fc2994811f1..39cb6da72dc 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -40,6 +40,7 @@ */ #include +#include #include #include #include -- cgit v1.2.3 From 3038edabf48f01421c621cb77a712b446d3a5d67 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Oct 2008 01:26:27 +0200 Subject: x86 ACPI: fix breakage of resume on 64-bit UP systems with SMP kernel x86 ACPI: Fix breakage of resume on 64-bit UP systems with SMP kernel We are now using per CPU GDT tables in head_64.S and the original early_gdt_descr.address is invalidated after boot by setup_per_cpu_areas(). This breaks resume from suspend to RAM on x86_64 UP systems using SMP kernels, because this part of head_64.S is also executed during the resume and the invalid GDT address causes the system to crash. It doesn't break on 'true' SMP systems, because early_gdt_descr.address is modified every time native_cpu_up() runs. However, during resume it should point to the GDT of the boot CPU rather than to another CPU's GDT. For this reason, during suspend to RAM always make early_gdt_descr.address point to the boot CPU's GDT. This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11568, which is a regression from 2.6.26. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Signed-off-by: Ingo Molnar Reported-and-tested-by: Andy Wettstein --- arch/x86/kernel/acpi/sleep.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 426e5d91b63..c44cd6dbfa1 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "realmode/wakeup.h" #include "sleep.h" @@ -98,6 +99,8 @@ int acpi_save_state_mem(void) header->trampoline_segment = setup_trampoline() >> 4; #ifdef CONFIG_SMP stack_start.sp = temp_stack + 4096; + early_gdt_descr.address = + (unsigned long)get_cpu_gdt_table(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; -- cgit v1.2.3 From c36167de652f8acdf0b374c78805e662646cd327 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 17 Oct 2008 18:09:15 +0200 Subject: ide: remove dead It has been dead for more than 3 years and needs to be converted to use platform PATA support anyway. Cc: Russell King Signed-off-by: Bartlomiej Zolnierkiewicz --- arch/arm/mach-sa1100/include/mach/ide.h | 75 --------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 arch/arm/mach-sa1100/include/mach/ide.h (limited to 'arch') diff --git a/arch/arm/mach-sa1100/include/mach/ide.h b/arch/arm/mach-sa1100/include/mach/ide.h deleted file mode 100644 index 4c99c8f5e61..00000000000 --- a/arch/arm/mach-sa1100/include/mach/ide.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * arch/arm/mach-sa1100/include/mach/ide.h - * - * Copyright (c) 1998 Hugo Fiennes & Nicolas Pitre - * - * 18-aug-2000: Cleanup by Erik Mouw (J.A.K.Mouw@its.tudelft.nl) - * Get rid of the special ide_init_hwif_ports() functions - * and make a generalised function that can be used by all - * architectures. - */ - -#include -#include -#include - -#error "This code is broken and needs update to match with current ide support" - - -/* - * Set up a hw structure for a specified data port, control port and IRQ. - * This should follow whatever the default interface uses. - */ -static inline void ide_init_hwif_ports(hw_regs_t *hw, unsigned long data_port, - unsigned long ctrl_port, int *irq) -{ - unsigned long reg = data_port; - int i; - int regincr = 1; - - /* The Empeg board has the first two address lines unused */ - if (machine_is_empeg()) - regincr = 1 << 2; - - /* The LART doesn't use A0 for IDE */ - if (machine_is_lart()) - regincr = 1 << 1; - - memset(hw, 0, sizeof(*hw)); - - for (i = 0; i <= 7; i++) { - hw->io_ports_array[i] = reg; - reg += regincr; - } - - hw->io_ports.ctl_addr = ctrl_port; - - if (irq) - *irq = 0; -} - -/* - * This registers the standard ports for this architecture with the IDE - * driver. - */ -static __inline__ void -ide_init_default_hwifs(void) -{ - if (machine_is_lart()) { -#ifdef CONFIG_SA1100_LART - hw_regs_t hw; - - /* Enable GPIO as interrupt line */ - GPDR &= ~LART_GPIO_IDE; - set_irq_type(LART_IRQ_IDE, IRQ_TYPE_EDGE_RISING); - - /* set PCMCIA interface timing */ - MECR = 0x00060006; - - /* init the interface */ - ide_init_hwif_ports(&hw, PCMCIA_IO_0_BASE + 0x0000, PCMCIA_IO_0_BASE + 0x1000, NULL); - hw.irq = LART_IRQ_IDE; - ide_register_hw(&hw); -#endif - } -} -- cgit v1.2.3 From 81e192d6ce303b6792aa38ff35f41a1a7357f23a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 17 Oct 2008 18:48:36 +0000 Subject: parisc: convert to generic compat_sys_ptrace This patch does the compat_sys_ptrace conversion for parisc. In addition it does convert the parisc ptrace code to use the architecture-independent ptrace infrastructure instead of own coding. Signed-off-by: Helge Deller Signed-off-by: Kyle McMartin --- arch/parisc/include/asm/ptrace.h | 10 + arch/parisc/kernel/ptrace.c | 429 +++++++++++++++---------------------- arch/parisc/kernel/syscall_table.S | 2 +- 3 files changed, 183 insertions(+), 258 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h index 3e94c5d85ff..afa5333187b 100644 --- a/arch/parisc/include/asm/ptrace.h +++ b/arch/parisc/include/asm/ptrace.h @@ -47,6 +47,16 @@ struct pt_regs { #define task_regs(task) ((struct pt_regs *) ((char *)(task) + TASK_REGS)) +#define __ARCH_WANT_COMPAT_SYS_PTRACE + +struct task_struct; +#define arch_has_single_step() 1 +void user_disable_single_step(struct task_struct *task); +void user_enable_single_step(struct task_struct *task); + +#define arch_has_block_step() 1 +void user_enable_block_step(struct task_struct *task); + /* XXX should we use iaoq[1] or iaoq[0] ? */ #define user_mode(regs) (((regs)->iaoq[0] & 3) ? 1 : 0) #define user_space(regs) (((regs)->iasq[1] != 0) ? 1 : 0) diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 49c63797078..90904f9dfc5 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Hewlett-Packard Co, Linuxcare Inc. * Copyright (C) 2000 Matthew Wilcox * Copyright (C) 2000 David Huggins-Daines + * Copyright (C) 2008 Helge Deller */ #include @@ -27,15 +28,149 @@ /* PSW bits we allow the debugger to modify */ #define USER_PSW_BITS (PSW_N | PSW_V | PSW_CB) -#undef DEBUG_PTRACE +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure single step bits etc are not set. + */ +void ptrace_disable(struct task_struct *task) +{ + task->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); -#ifdef DEBUG_PTRACE -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif + /* make sure the trap bits are not set */ + pa_psw(task)->r = 0; + pa_psw(task)->t = 0; + pa_psw(task)->h = 0; + pa_psw(task)->l = 0; +} + +/* + * The following functions are called by ptrace_resume() when + * enabling or disabling single/block tracing. + */ +void user_disable_single_step(struct task_struct *task) +{ + ptrace_disable(task); +} + +void user_enable_single_step(struct task_struct *task) +{ + task->ptrace &= ~PT_BLOCKSTEP; + task->ptrace |= PT_SINGLESTEP; + + if (pa_psw(task)->n) { + struct siginfo si; + + /* Nullified, just crank over the queue. */ + task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1]; + task_regs(task)->iasq[0] = task_regs(task)->iasq[1]; + task_regs(task)->iaoq[1] = task_regs(task)->iaoq[0] + 4; + pa_psw(task)->n = 0; + pa_psw(task)->x = 0; + pa_psw(task)->y = 0; + pa_psw(task)->z = 0; + pa_psw(task)->b = 0; + ptrace_disable(task); + /* Don't wake up the task, but let the + parent know something happened. */ + si.si_code = TRAP_TRACE; + si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3); + si.si_signo = SIGTRAP; + si.si_errno = 0; + force_sig_info(SIGTRAP, &si, task); + /* notify_parent(task, SIGCHLD); */ + return; + } + + /* Enable recovery counter traps. The recovery counter + * itself will be set to zero on a task switch. If the + * task is suspended on a syscall then the syscall return + * path will overwrite the recovery counter with a suitable + * value such that it traps once back in user space. We + * disable interrupts in the tasks PSW here also, to avoid + * interrupts while the recovery counter is decrementing. + */ + pa_psw(task)->r = 1; + pa_psw(task)->t = 0; + pa_psw(task)->h = 0; + pa_psw(task)->l = 0; +} + +void user_enable_block_step(struct task_struct *task) +{ + task->ptrace &= ~PT_SINGLESTEP; + task->ptrace |= PT_BLOCKSTEP; + + /* Enable taken branch trap. */ + pa_psw(task)->r = 0; + pa_psw(task)->t = 1; + pa_psw(task)->h = 0; + pa_psw(task)->l = 0; +} + +long arch_ptrace(struct task_struct *child, long request, long addr, long data) +{ + unsigned long tmp; + long ret = -EIO; -#ifdef CONFIG_64BIT + switch (request) { + + /* Read the word at location addr in the USER area. For ptraced + processes, the kernel saves all regs on a syscall. */ + case PTRACE_PEEKUSR: + if ((addr & (sizeof(long)-1)) || + (unsigned long) addr >= sizeof(struct pt_regs)) + break; + tmp = *(unsigned long *) ((char *) task_regs(child) + addr); + ret = put_user(tmp, (unsigned long *) data); + break; + + /* Write the word at location addr in the USER area. This will need + to change when the kernel no longer saves all regs on a syscall. + FIXME. There is a problem at the moment in that r3-r18 are only + saved if the process is ptraced on syscall entry, and even then + those values are overwritten by actual register values on syscall + exit. */ + case PTRACE_POKEUSR: + /* Some register values written here may be ignored in + * entry.S:syscall_restore_rfi; e.g. iaoq is written with + * r31/r31+4, and not with the values in pt_regs. + */ + if (addr == PT_PSW) { + /* Allow writing to Nullify, Divide-step-correction, + * and carry/borrow bits. + * BEWARE, if you set N, and then single step, it won't + * stop on the nullified instruction. + */ + data &= USER_PSW_BITS; + task_regs(child)->gr[0] &= ~USER_PSW_BITS; + task_regs(child)->gr[0] |= data; + ret = 0; + break; + } + + if ((addr & (sizeof(long)-1)) || + (unsigned long) addr >= sizeof(struct pt_regs)) + break; + if ((addr >= PT_GR1 && addr <= PT_GR31) || + addr == PT_IAOQ0 || addr == PT_IAOQ1 || + (addr >= PT_FR0 && addr <= PT_FR31 + 4) || + addr == PT_SAR) { + *(unsigned long *) ((char *) task_regs(child) + addr) = data; + ret = 0; + } + break; + + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + + +#ifdef CONFIG_COMPAT /* This function is needed to translate 32 bit pt_regs offsets in to * 64 bit pt_regs offsets. For example, a 32 bit gdb under a 64 bit kernel @@ -61,106 +196,25 @@ static long translate_usr_offset(long offset) else return -1; } -#endif -/* - * Called by kernel/ptrace.c when detaching.. - * - * Make sure single step bits etc are not set. - */ -void ptrace_disable(struct task_struct *child) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t addr, compat_ulong_t data) { - /* make sure the trap bits are not set */ - pa_psw(child)->r = 0; - pa_psw(child)->t = 0; - pa_psw(child)->h = 0; - pa_psw(child)->l = 0; -} - -long arch_ptrace(struct task_struct *child, long request, long addr, long data) -{ - long ret; -#ifdef DEBUG_PTRACE - long oaddr=addr, odata=data; -#endif + compat_uint_t tmp; + long ret = -EIO; switch (request) { - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: { -#ifdef CONFIG_64BIT - if (__is_compat_task(child)) { - int copied; - unsigned int tmp; - - addr &= 0xffffffffL; - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - goto out_tsk; - ret = put_user(tmp,(unsigned int *) data); - DBG("sys_ptrace(PEEK%s, %d, %lx, %lx) returning %ld, data %x\n", - request == PTRACE_PEEKTEXT ? "TEXT" : "DATA", - pid, oaddr, odata, ret, tmp); - } - else -#endif - ret = generic_ptrace_peekdata(child, addr, data); - goto out_tsk; - } - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = 0; -#ifdef CONFIG_64BIT - if (__is_compat_task(child)) { - unsigned int tmp = (unsigned int)data; - DBG("sys_ptrace(POKE%s, %d, %lx, %lx)\n", - request == PTRACE_POKETEXT ? "TEXT" : "DATA", - pid, oaddr, odata); - addr &= 0xffffffffL; - if (access_process_vm(child, addr, &tmp, sizeof(tmp), 1) == sizeof(tmp)) - goto out_tsk; - } - else -#endif - { - if (access_process_vm(child, addr, &data, sizeof(data), 1) == sizeof(data)) - goto out_tsk; - } - ret = -EIO; - goto out_tsk; - - /* Read the word at location addr in the USER area. For ptraced - processes, the kernel saves all regs on a syscall. */ - case PTRACE_PEEKUSR: { - ret = -EIO; -#ifdef CONFIG_64BIT - if (__is_compat_task(child)) { - unsigned int tmp; - - if (addr & (sizeof(int)-1)) - goto out_tsk; - if ((addr = translate_usr_offset(addr)) < 0) - goto out_tsk; - - tmp = *(unsigned int *) ((char *) task_regs(child) + addr); - ret = put_user(tmp, (unsigned int *) data); - DBG("sys_ptrace(PEEKUSR, %d, %lx, %lx) returning %ld, addr %lx, data %x\n", - pid, oaddr, odata, ret, addr, tmp); - } - else -#endif - { - unsigned long tmp; + case PTRACE_PEEKUSR: + if (addr & (sizeof(compat_uint_t)-1)) + break; + addr = translate_usr_offset(addr); + if (addr < 0) + break; - if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs)) - goto out_tsk; - tmp = *(unsigned long *) ((char *) task_regs(child) + addr); - ret = put_user(tmp, (unsigned long *) data); - } - goto out_tsk; - } + tmp = *(compat_uint_t *) ((char *) task_regs(child) + addr); + ret = put_user(tmp, (compat_uint_t *) (unsigned long) data); + break; /* Write the word at location addr in the USER area. This will need to change when the kernel no longer saves all regs on a syscall. @@ -169,185 +223,46 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) those values are overwritten by actual register values on syscall exit. */ case PTRACE_POKEUSR: - ret = -EIO; /* Some register values written here may be ignored in * entry.S:syscall_restore_rfi; e.g. iaoq is written with * r31/r31+4, and not with the values in pt_regs. */ - /* PT_PSW=0, so this is valid for 32 bit processes under 64 - * bit kernels. - */ if (addr == PT_PSW) { - /* PT_PSW=0, so this is valid for 32 bit processes - * under 64 bit kernels. - * - * Allow writing to Nullify, Divide-step-correction, - * and carry/borrow bits. - * BEWARE, if you set N, and then single step, it won't - * stop on the nullified instruction. + /* Since PT_PSW==0, it is valid for 32 bit processes + * under 64 bit kernels as well. */ - DBG("sys_ptrace(POKEUSR, %d, %lx, %lx)\n", - pid, oaddr, odata); - data &= USER_PSW_BITS; - task_regs(child)->gr[0] &= ~USER_PSW_BITS; - task_regs(child)->gr[0] |= data; - ret = 0; - goto out_tsk; - } -#ifdef CONFIG_64BIT - if (__is_compat_task(child)) { - if (addr & (sizeof(int)-1)) - goto out_tsk; - if ((addr = translate_usr_offset(addr)) < 0) - goto out_tsk; - DBG("sys_ptrace(POKEUSR, %d, %lx, %lx) addr %lx\n", - pid, oaddr, odata, addr); + ret = arch_ptrace(child, request, addr, data); + } else { + if (addr & (sizeof(compat_uint_t)-1)) + break; + addr = translate_usr_offset(addr); + if (addr < 0) + break; if (addr >= PT_FR0 && addr <= PT_FR31 + 4) { /* Special case, fp regs are 64 bits anyway */ - *(unsigned int *) ((char *) task_regs(child) + addr) = data; + *(__u64 *) ((char *) task_regs(child) + addr) = data; ret = 0; } else if ((addr >= PT_GR1+4 && addr <= PT_GR31+4) || addr == PT_IAOQ0+4 || addr == PT_IAOQ1+4 || addr == PT_SAR+4) { /* Zero the top 32 bits */ - *(unsigned int *) ((char *) task_regs(child) + addr - 4) = 0; - *(unsigned int *) ((char *) task_regs(child) + addr) = data; + *(__u32 *) ((char *) task_regs(child) + addr - 4) = 0; + *(__u32 *) ((char *) task_regs(child) + addr) = data; ret = 0; } - goto out_tsk; } - else -#endif - { - if ((addr & (sizeof(long)-1)) || (unsigned long) addr >= sizeof(struct pt_regs)) - goto out_tsk; - if ((addr >= PT_GR1 && addr <= PT_GR31) || - addr == PT_IAOQ0 || addr == PT_IAOQ1 || - (addr >= PT_FR0 && addr <= PT_FR31 + 4) || - addr == PT_SAR) { - *(unsigned long *) ((char *) task_regs(child) + addr) = data; - ret = 0; - } - goto out_tsk; - } - - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - ret = -EIO; - DBG("sys_ptrace(%s)\n", - request == PTRACE_SYSCALL ? "SYSCALL" : "CONT"); - if (!valid_signal(data)) - goto out_tsk; - child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP); - if (request == PTRACE_SYSCALL) { - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } else { - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - child->exit_code = data; - goto out_wake_notrap; - - case PTRACE_KILL: - /* - * make the child exit. Best I can do is send it a - * sigkill. perhaps it should be put in the status - * that it wants to exit. - */ - ret = 0; - DBG("sys_ptrace(KILL)\n"); - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - goto out_tsk; - child->exit_code = SIGKILL; - goto out_wake_notrap; - - case PTRACE_SINGLEBLOCK: - DBG("sys_ptrace(SINGLEBLOCK)\n"); - ret = -EIO; - if (!valid_signal(data)) - goto out_tsk; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->ptrace &= ~PT_SINGLESTEP; - child->ptrace |= PT_BLOCKSTEP; - child->exit_code = data; - - /* Enable taken branch trap. */ - pa_psw(child)->r = 0; - pa_psw(child)->t = 1; - pa_psw(child)->h = 0; - pa_psw(child)->l = 0; - goto out_wake; - - case PTRACE_SINGLESTEP: - DBG("sys_ptrace(SINGLESTEP)\n"); - ret = -EIO; - if (!valid_signal(data)) - goto out_tsk; - - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->ptrace &= ~PT_BLOCKSTEP; - child->ptrace |= PT_SINGLESTEP; - child->exit_code = data; - - if (pa_psw(child)->n) { - struct siginfo si; - - /* Nullified, just crank over the queue. */ - task_regs(child)->iaoq[0] = task_regs(child)->iaoq[1]; - task_regs(child)->iasq[0] = task_regs(child)->iasq[1]; - task_regs(child)->iaoq[1] = task_regs(child)->iaoq[0] + 4; - pa_psw(child)->n = 0; - pa_psw(child)->x = 0; - pa_psw(child)->y = 0; - pa_psw(child)->z = 0; - pa_psw(child)->b = 0; - ptrace_disable(child); - /* Don't wake up the child, but let the - parent know something happened. */ - si.si_code = TRAP_TRACE; - si.si_addr = (void __user *) (task_regs(child)->iaoq[0] & ~3); - si.si_signo = SIGTRAP; - si.si_errno = 0; - force_sig_info(SIGTRAP, &si, child); - //notify_parent(child, SIGCHLD); - //ret = 0; - goto out_wake; - } - - /* Enable recovery counter traps. The recovery counter - * itself will be set to zero on a task switch. If the - * task is suspended on a syscall then the syscall return - * path will overwrite the recovery counter with a suitable - * value such that it traps once back in user space. We - * disable interrupts in the childs PSW here also, to avoid - * interrupts while the recovery counter is decrementing. - */ - pa_psw(child)->r = 1; - pa_psw(child)->t = 0; - pa_psw(child)->h = 0; - pa_psw(child)->l = 0; - /* give it a chance to run. */ - goto out_wake; - - case PTRACE_GETEVENTMSG: - ret = put_user(child->ptrace_message, (unsigned int __user *) data); - goto out_tsk; + break; default: - ret = ptrace_request(child, request, addr, data); - goto out_tsk; + ret = compat_ptrace_request(child, request, addr, data); + break; } -out_wake_notrap: - ptrace_disable(child); -out_wake: - wake_up_process(child); - ret = 0; -out_tsk: - DBG("arch_ptrace(%ld, %d, %lx, %lx) returning %ld\n", - request, pid, oaddr, odata, ret); return ret; } +#endif + void syscall_trace(void) { diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index 6084667eacf..4ed01f2d655 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -87,7 +87,7 @@ ENTRY_SAME(setuid) ENTRY_SAME(getuid) ENTRY_COMP(stime) /* 25 */ - ENTRY_SAME(ptrace) + ENTRY_COMP(ptrace) ENTRY_SAME(alarm) /* see stat comment */ ENTRY_COMP(newfstat) -- cgit v1.2.3 From d768cb6929773060171eee8397a63883f60ddc07 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Aug 2008 15:44:59 -0600 Subject: x86/PCI: follow lspci device/vendor style Use "[%04x:%04x]" for PCI vendor/device IDs to follow the format used by lspci(8). Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 006599db0dc..52a1de1128c 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq if (pirq <= 4) irq = read_config_nybble(router, 0x56, pirq - 1); dev_info(&dev->dev, - "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", + "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n", dev->vendor, dev->device, pirq, irq); return irq; } @@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { dev_info(&dev->dev, - "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", + "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n", dev->vendor, dev->device, pirq, irq); if (pirq <= 4) write_config_nybble(router, 0x56, pirq - 1, irq); @@ -823,7 +823,7 @@ static void __init pirq_find_router(struct irq_router *r) r->get = NULL; r->set = NULL; - DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", rt->rtr_vendor, rt->rtr_device); pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); @@ -843,7 +843,7 @@ static void __init pirq_find_router(struct irq_router *r) h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } - dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n", pirq_router.name, pirq_router_dev->vendor, pirq_router_dev->device); -- cgit v1.2.3 From 37a84ec668ba251ae02cf2c2c664baf6b247ae1f Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Thu, 28 Aug 2008 15:40:59 -0700 Subject: x86/PCI: irq and pci_ids patch for Intel Ibex Peak DeviceIDs This patch updates the Intel Ibex Peak (PCH) LPC and SMBus Controller DeviceIDs. The LPC Controller ID is set by Firmware within the range of 0x3b00-3b1f. This range is included in pci_ids.h using min and max values, and irq.c now has code to handle the range (in lieu of 32 additions to a SWITCH statement). The SMBus Controller ID is a fixed-value and will not change. Signed-off-by: Seth Heasley Acked-by: Jean Delvare Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 52a1de1128c..bf69dbe08bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_PCH_0: - case PCI_DEVICE_ID_INTEL_PCH_1: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; return 1; } + + if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) { + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + return 0; } -- cgit v1.2.3 From f19aeb1f3638b7bb4ca21eb361f004fac2bfe259 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 3 Oct 2008 19:49:32 +1000 Subject: PCI: Add ability to mmap legacy_io on some platforms This adds the ability to mmap legacy IO space to the legacy_io files in sysfs on platforms that support it. This will allow to clean up X to use this instead of /dev/mem for legacy IO accesses such as those performed by Int10. While at it I moved pci_create/remove_legacy_files() to pci-sysfs.c where I think they belong, thus making more things statis in there and cleaned up some spurrious prototypes in the ia64 pci.h file Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- arch/ia64/include/asm/pci.h | 12 ++---------- arch/ia64/pci/pci.c | 7 ++++++- 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index 0149097b736..ce342fb7424 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -95,16 +95,8 @@ extern int pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); #define HAVE_PCI_LEGACY extern int pci_mmap_legacy_page_range(struct pci_bus *bus, - struct vm_area_struct *vma); -extern ssize_t pci_read_legacy_io(struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count); -extern ssize_t pci_write_legacy_io(struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t off, size_t count); -extern int pci_mmap_legacy_mem(struct kobject *kobj, - struct bin_attribute *attr, - struct vm_area_struct *vma); + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); #define pci_get_legacy_mem platform_pci_get_legacy_mem #define pci_legacy_read platform_pci_legacy_read diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 7545037a862..211fcfd115f 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -614,12 +614,17 @@ char *ia64_pci_get_legacy_mem(struct pci_bus *bus) * vector to get the base address. */ int -pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma) +pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) { unsigned long size = vma->vm_end - vma->vm_start; pgprot_t prot; char *addr; + /* We only support mmap'ing of legacy memory space */ + if (mmap_state != pci_mmap_mem) + return -ENOSYS; + /* * Avoid attribute aliasing. See Documentation/ia64/aliasing.txt * for more details. -- cgit v1.2.3 From e9f82cb75096ae30658a72d473bf170bf4d3bb2e Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 14 Oct 2008 11:55:31 +1100 Subject: powerpc/PCI: Add legacy PCI access via sysfs This patch adds support for legacy_io and legacy_mem files in bus class directories in sysfs for powerpc Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Jesse Barnes --- arch/powerpc/include/asm/pci-bridge.h | 7 ++ arch/powerpc/include/asm/pci.h | 11 +++ arch/powerpc/kernel/pci-common.c | 136 +++++++++++++++++++++++++++++++++- 3 files changed, 153 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index ae2ea803a0f..9047af7baa6 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -74,6 +74,13 @@ struct pci_controller { unsigned long pci_io_size; #endif + /* Some machines have a special region to forward the ISA + * "memory" cycles such as VGA memory regions. Left to 0 + * if unsupported + */ + resource_size_t isa_mem_phys; + resource_size_t isa_mem_size; + struct pci_ops *ops; unsigned int __iomem *cfg_addr; void __iomem *cfg_data; diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 0e52c7828ea..39d547fde95 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -123,6 +123,16 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, /* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */ #define HAVE_PCI_MMAP 1 +extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, + size_t count); +extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, + size_t count); +extern int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); + +#define HAVE_PCI_LEGACY 1 + #if defined(CONFIG_PPC64) || defined(CONFIG_NOT_COHERENT_CACHE) /* * For 64-bit kernels, pci_unmap_{single,page} is not a nop. @@ -226,5 +236,6 @@ extern void pci_resource_to_user(const struct pci_dev *dev, int bar, extern void pcibios_do_bus_setup(struct pci_bus *bus); extern void pcibios_fixup_of_probed_bus(struct pci_bus *bus); + #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_PCI_H */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 01ce8c38bae..3815d84a1ef 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -451,7 +451,8 @@ pgprot_t pci_phys_mem_access_prot(struct file *file, pci_dev_put(pdev); } - DBG("non-PCI map for %lx, prot: %lx\n", offset, prot); + DBG("non-PCI map for %llx, prot: %lx\n", + (unsigned long long)offset, prot); return __pgprot(prot); } @@ -490,6 +491,131 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return ret; } +/* This provides legacy IO read access on a bus */ +int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size) +{ + unsigned long offset; + struct pci_controller *hose = pci_bus_to_host(bus); + struct resource *rp = &hose->io_resource; + void __iomem *addr; + + /* Check if port can be supported by that bus. We only check + * the ranges of the PHB though, not the bus itself as the rules + * for forwarding legacy cycles down bridges are not our problem + * here. So if the host bridge supports it, we do it. + */ + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + offset += port; + + if (!(rp->flags & IORESOURCE_IO)) + return -ENXIO; + if (offset < rp->start || (offset + size) > rp->end) + return -ENXIO; + addr = hose->io_base_virt + port; + + switch(size) { + case 1: + *((u8 *)val) = in_8(addr); + return 1; + case 2: + if (port & 1) + return -EINVAL; + *((u16 *)val) = in_le16(addr); + return 2; + case 4: + if (port & 3) + return -EINVAL; + *((u32 *)val) = in_le32(addr); + return 4; + } + return -EINVAL; +} + +/* This provides legacy IO write access on a bus */ +int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, size_t size) +{ + unsigned long offset; + struct pci_controller *hose = pci_bus_to_host(bus); + struct resource *rp = &hose->io_resource; + void __iomem *addr; + + /* Check if port can be supported by that bus. We only check + * the ranges of the PHB though, not the bus itself as the rules + * for forwarding legacy cycles down bridges are not our problem + * here. So if the host bridge supports it, we do it. + */ + offset = (unsigned long)hose->io_base_virt - _IO_BASE; + offset += port; + + if (!(rp->flags & IORESOURCE_IO)) + return -ENXIO; + if (offset < rp->start || (offset + size) > rp->end) + return -ENXIO; + addr = hose->io_base_virt + port; + + /* WARNING: The generic code is idiotic. It gets passed a pointer + * to what can be a 1, 2 or 4 byte quantity and always reads that + * as a u32, which means that we have to correct the location of + * the data read within those 32 bits for size 1 and 2 + */ + switch(size) { + case 1: + out_8(addr, val >> 24); + return 1; + case 2: + if (port & 1) + return -EINVAL; + out_le16(addr, val >> 16); + return 2; + case 4: + if (port & 3) + return -EINVAL; + out_le32(addr, val); + return 4; + } + return -EINVAL; +} + +/* This provides legacy IO or memory mmap access on a bus */ +int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + struct pci_controller *hose = pci_bus_to_host(bus); + resource_size_t offset = + ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT; + resource_size_t size = vma->vm_end - vma->vm_start; + struct resource *rp; + + pr_debug("pci_mmap_legacy_page_range(%04x:%02x, %s @%llx..%llx)\n", + pci_domain_nr(bus), bus->number, + mmap_state == pci_mmap_mem ? "MEM" : "IO", + (unsigned long long)offset, + (unsigned long long)(offset + size - 1)); + + if (mmap_state == pci_mmap_mem) { + if ((offset + size) > hose->isa_mem_size) + return -ENXIO; + offset += hose->isa_mem_phys; + } else { + unsigned long io_offset = (unsigned long)hose->io_base_virt - _IO_BASE; + unsigned long roffset = offset + io_offset; + rp = &hose->io_resource; + if (!(rp->flags & IORESOURCE_IO)) + return -ENXIO; + if (roffset < rp->start || (roffset + size) > rp->end) + return -ENXIO; + offset += hose->io_base_phys; + } + pr_debug(" -> mapping phys %llx\n", (unsigned long long)offset); + + vma->vm_pgoff = offset >> PAGE_SHIFT; + vma->vm_page_prot |= _PAGE_NO_CACHE | _PAGE_GUARDED; + return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + void pci_resource_to_user(const struct pci_dev *dev, int bar, const struct resource *rsrc, resource_size_t *start, resource_size_t *end) @@ -592,6 +718,12 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, cpu_addr = of_translate_address(dev, ranges + 3); size = of_read_number(ranges + pna + 3, 2); ranges += np; + + /* If we failed translation or got a zero-sized region + * (some FW try to feed us with non sensical zero sized regions + * such as power3 which look like some kind of attempt at exposing + * the VGA memory hole) + */ if (cpu_addr == OF_BAD_ADDR || size == 0) continue; @@ -665,6 +797,8 @@ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose, isa_hole = memno; if (primary || isa_mem_base == 0) isa_mem_base = cpu_addr; + hose->isa_mem_phys = cpu_addr; + hose->isa_mem_size = size; } /* We get the PCI/Mem offset from the first range or -- cgit v1.2.3 From f4432c5caec5fa95ea7eefd00f8e6cee17e2e023 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 20 Oct 2008 13:31:45 -0400 Subject: Update email addresses. Update assorted email addresses and related info to point to a single current, valid address. additionally - trivial CREDITS entry updates. (Not that this file means much any more) - remove arjans dead redhat.com address from powernow driver Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 4 ++-- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 2 +- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 4 ++-- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 2 +- arch/x86/kernel/cpu/mcheck/k7.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_32.c | 2 +- arch/x86/kernel/cpu/mcheck/non-fatal.c | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 06fcce516d5..b0461856acf 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -1,5 +1,5 @@ /* - * (C) 2001-2004 Dave Jones. + * (C) 2001-2004 Dave Jones. * (C) 2002 Padraig Brady. * * Licensed under the terms of the GNU GPL License version 2. @@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(revid_errata, int, 0644); MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); -MODULE_AUTHOR ("Dave Jones "); +MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index b5ced806a31..c1ac5790c63 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void) } -MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_AUTHOR("Arjan van de Ven, Dave Jones , Dominik Brodowski "); MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 0a61159d7b7..7c7d56b4313 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -1,6 +1,6 @@ /* * AMD K7 Powernow driver. - * (C) 2003 Dave Jones on behalf of SuSE Labs. + * (C) 2003 Dave Jones on behalf of SuSE Labs. * (C) 2003-2004 Dave Jones * * Licensed under the terms of the GNU GPL License version 2. @@ -692,7 +692,7 @@ static void __exit powernow_exit (void) module_param(acpi_force, int, 0444); MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); -MODULE_AUTHOR ("Dave Jones "); +MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 84bb395038d..008d23ba491 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -7,7 +7,7 @@ * Support : mark.langsdorf@amd.com * * Based on the powernow-k7.c module written by Dave Jones. - * (C) 2003 Dave Jones on behalf of SuSE Labs + * (C) 2003 Dave Jones on behalf of SuSE Labs * (C) 2004 Dominik Brodowski * (C) 2004 Pavel Machek * Licensed under the terms of the GNU GPL License version 2. diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 191f7263c61..04d0376b64b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -431,7 +431,7 @@ static void __exit speedstep_exit(void) } -MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); +MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index f390c9f6635..dd3af6e7b39 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -1,6 +1,6 @@ /* - * Athlon/Hammer specific Machine Check Exception Reporting - * (C) Copyright 2002 Dave Jones + * Athlon specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones */ #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 774d87cfd8c..0ebf3fc6a61 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -1,6 +1,6 @@ /* * mce.c - x86 Machine Check Exception Reporting - * (c) 2002 Alan Cox , Dave Jones + * (c) 2002 Alan Cox , Dave Jones */ #include diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index cc1fccdd31e..a74af128efc 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -1,7 +1,7 @@ /* * Non Fatal Machine Check Exception Reporting * - * (C) Copyright 2002 Dave Jones. + * (C) Copyright 2002 Dave Jones. * * This file contains routines to check for non-fatal MCEs every 15s * -- cgit v1.2.3 From 653c03168348ac7aebb969931f87ba281749d7dd Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Mon, 20 Oct 2008 16:00:08 -0700 Subject: misc: replace remaining __FUNCTION__ with __func__ __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Acked-by: Randy Dunlap Signed-off-by: Linus Torvalds --- arch/arm/mach-iop13xx/include/mach/time.h | 4 ++-- arch/arm/mach-pxa/include/mach/zylonite.h | 4 ++-- arch/powerpc/include/asm/ptrace.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-iop13xx/include/mach/time.h b/arch/arm/mach-iop13xx/include/mach/time.h index 49213d9d7ca..d6d52527589 100644 --- a/arch/arm/mach-iop13xx/include/mach/time.h +++ b/arch/arm/mach-iop13xx/include/mach/time.h @@ -41,7 +41,7 @@ static inline unsigned long iop13xx_core_freq(void) return 1200000000; default: printk("%s: warning unknown frequency, defaulting to 800Mhz\n", - __FUNCTION__); + __func__); } return 800000000; @@ -60,7 +60,7 @@ static inline unsigned long iop13xx_xsi_bus_ratio(void) return 4; default: printk("%s: warning unknown ratio, defaulting to 2\n", - __FUNCTION__); + __func__); } return 2; diff --git a/arch/arm/mach-pxa/include/mach/zylonite.h b/arch/arm/mach-pxa/include/mach/zylonite.h index 0d35ca04731..bf6785adccf 100644 --- a/arch/arm/mach-pxa/include/mach/zylonite.h +++ b/arch/arm/mach-pxa/include/mach/zylonite.h @@ -30,7 +30,7 @@ extern void zylonite_pxa300_init(void); static inline void zylonite_pxa300_init(void) { if (cpu_is_pxa300() || cpu_is_pxa310()) - panic("%s: PXA300/PXA310 not supported\n", __FUNCTION__); + panic("%s: PXA300/PXA310 not supported\n", __func__); } #endif @@ -40,7 +40,7 @@ extern void zylonite_pxa320_init(void); static inline void zylonite_pxa320_init(void) { if (cpu_is_pxa320()) - panic("%s: PXA320 not supported\n", __FUNCTION__); + panic("%s: PXA320 not supported\n", __func__); } #endif diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 734e0754fb9..280a90cc989 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -129,7 +129,7 @@ extern int ptrace_put_reg(struct task_struct *task, int regno, #define CHECK_FULL_REGS(regs) \ do { \ if ((regs)->trap & 1) \ - printk(KERN_CRIT "%s: partial register set\n", __FUNCTION__); \ + printk(KERN_CRIT "%s: partial register set\n", __func__); \ } while (0) #endif /* __powerpc64__ */ -- cgit v1.2.3