From 611b1597680dd4a57896bcca1af0484be463c55e Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Mon, 21 Jul 2008 18:49:56 +0300 Subject: x86: fix mmiotrace 8-bit register decoding When SIL, DIL, BPL or SPL registers were used in MMIO, the datum was extracted from AH, BH, CH, or DH, which are incorrect. Signed-off-by: Pekka Paalanen Cc: "Vegard Nossum" Cc: "Steven Rostedt" Cc: proski@gnu.org Cc: "Pekka Enberg" Signed-off-by: Ingo Molnar --- arch/x86/mm/pf_in.c | 121 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c index efa1911e20c..df3d5c861cd 100644 --- a/arch/x86/mm/pf_in.c +++ b/arch/x86/mm/pf_in.c @@ -79,25 +79,34 @@ static unsigned int mw32[] = { 0xC7 }; static unsigned int mw64[] = { 0x89, 0x8B }; #endif /* not __i386__ */ -static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, - int *rexr) +struct prefix_bits { + unsigned shorted:1; + unsigned enlarged:1; + unsigned rexr:1; + unsigned rex:1; +}; + +static int skip_prefix(unsigned char *addr, struct prefix_bits *prf) { int i; unsigned char *p = addr; - *shorted = 0; - *enlarged = 0; - *rexr = 0; + prf->shorted = 0; + prf->enlarged = 0; + prf->rexr = 0; + prf->rex = 0; restart: for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { if (*p == prefix_codes[i]) { if (*p == 0x66) - *shorted = 1; + prf->shorted = 1; #ifdef __amd64__ if ((*p & 0xf8) == 0x48) - *enlarged = 1; + prf->enlarged = 1; if ((*p & 0xf4) == 0x44) - *rexr = 1; + prf->rexr = 1; + if ((*p & 0xf0) == 0x40) + prf->rex = 1; #endif p++; goto restart; @@ -135,12 +144,12 @@ enum reason_type get_ins_type(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int shorted, enlarged, rexr; + struct prefix_bits prf; int i; enum reason_type rv = OTHERS; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); CHECK_OP_TYPE(opcode, reg_rop, REG_READ); @@ -156,10 +165,11 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(rw8); i++) @@ -168,7 +178,7 @@ static unsigned int get_ins_reg_width(unsigned long ins_addr) for (i = 0; i < ARRAY_SIZE(rw32); i++) if (rw32[i] == opcode) - return (shorted ? 2 : (enlarged ? 8 : 4)); + return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); return 0; @@ -178,10 +188,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) { unsigned int opcode; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(mw8); i++) @@ -194,11 +205,11 @@ unsigned int get_ins_mem_width(unsigned long ins_addr) for (i = 0; i < ARRAY_SIZE(mw32); i++) if (mw32[i] == opcode) - return shorted ? 2 : 4; + return prf.shorted ? 2 : 4; for (i = 0; i < ARRAY_SIZE(mw64); i++) if (mw64[i] == opcode) - return shorted ? 2 : (enlarged ? 8 : 4); + return prf.shorted ? 2 : (prf.enlarged ? 8 : 4); printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); return 0; @@ -238,7 +249,7 @@ enum { #endif }; -static unsigned char *get_reg_w8(int no, struct pt_regs *regs) +static unsigned char *get_reg_w8(int no, int rex, struct pt_regs *regs) { unsigned char *rv = NULL; @@ -255,18 +266,6 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs) case arg_DL: rv = (unsigned char *)®s->dx; break; - case arg_AH: - rv = 1 + (unsigned char *)®s->ax; - break; - case arg_BH: - rv = 1 + (unsigned char *)®s->bx; - break; - case arg_CH: - rv = 1 + (unsigned char *)®s->cx; - break; - case arg_DH: - rv = 1 + (unsigned char *)®s->dx; - break; #ifdef __amd64__ case arg_R8: rv = (unsigned char *)®s->r8; @@ -294,9 +293,55 @@ static unsigned char *get_reg_w8(int no, struct pt_regs *regs) break; #endif default: - printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); break; } + + if (rv) + return rv; + + if (rex) { + /* + * If REX prefix exists, access low bytes of SI etc. + * instead of AH etc. + */ + switch (no) { + case arg_SI: + rv = (unsigned char *)®s->si; + break; + case arg_DI: + rv = (unsigned char *)®s->di; + break; + case arg_BP: + rv = (unsigned char *)®s->bp; + break; + case arg_SP: + rv = (unsigned char *)®s->sp; + break; + default: + break; + } + } else { + switch (no) { + case arg_AH: + rv = 1 + (unsigned char *)®s->ax; + break; + case arg_BH: + rv = 1 + (unsigned char *)®s->bx; + break; + case arg_CH: + rv = 1 + (unsigned char *)®s->cx; + break; + case arg_DH: + rv = 1 + (unsigned char *)®s->dx; + break; + default: + break; + } + } + + if (!rv) + printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); + return rv; } @@ -368,11 +413,12 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) unsigned char mod_rm; int reg; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; unsigned long rv; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(reg_rop); i++) if (reg_rop[i] == opcode) { @@ -392,10 +438,10 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) do_work: mod_rm = *p; - reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); + reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3); switch (get_ins_reg_width(ins_addr)) { case 1: - return *get_reg_w8(reg, regs); + return *get_reg_w8(reg, prf.rex, regs); case 2: return *(unsigned short *)get_reg_w32(reg, regs); @@ -422,11 +468,12 @@ unsigned long get_ins_imm_val(unsigned long ins_addr) unsigned char mod_rm; unsigned char mod; unsigned char *p; - int i, shorted, enlarged, rexr; + struct prefix_bits prf; + int i; unsigned long rv; p = (unsigned char *)ins_addr; - p += skip_prefix(p, &shorted, &enlarged, &rexr); + p += skip_prefix(p, &prf); p += get_opcode(p, &opcode); for (i = 0; i < ARRAY_SIZE(imm_wop); i++) if (imm_wop[i] == opcode) { -- cgit v1.2.3 From 8b1fa1d7b22f386747c7b78b918d4c680c16066f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 29 Jul 2008 12:36:02 +0200 Subject: ftrace: mark lapic_wd_event() notrace it can be called in the NMI path: [ 0.645999] calling ftrace_dynamic_init+0x0/0xd6 [ 0.647521] ------------[ cut here ]------------ [ 0.647521] WARNING: at kernel/trace/ftrace.c:348 ftrace_record_ip+0x4e/0x252() [ 0.647521] Modules linked in: [ 0.647521] Pid: 15, comm: kstop1 Not tainted 2.6.27-rc1-tip #22686 [ 0.647521] [ 0.647521] Call Trace: [ 0.647521] [] warn_on_slowpath+0x5d/0x84 [ 0.647521] [] ? lapic_wd_event+0xb/0x5c [ 0.647521] [] ftrace_record_ip+0x4e/0x252 [ 0.647521] [] mcount_call+0x5/0x31 [ 0.647521] [] ? lapic_wd_event+0x10/0x5c [ 0.647521] [] nmi_watchdog_tick+0x19d/0x1ad [ 0.647521] [] default_do_nmi+0x75/0x1e3 [ 0.647521] [] do_nmi+0x5d/0x94 [ 0.647521] [] nmi+0xa2/0xc2 [ 0.647521] [] ? check_bytes_and_report+0x11/0xcc [ 0.647521] <> [] ? mcount_call+0x5/0x31 [ 0.647521] [] check_object+0x61/0x1b0 [ 0.647521] [] __slab_free+0x169/0x2ae [ 0.647521] [] ? __cleanup_sighand+0x25/0x27 [ 0.647521] [] ? __cleanup_sighand+0x25/0x27 [ 0.647521] [] kmem_cache_free+0x85/0xb9 [ 0.647521] [] __cleanup_sighand+0x25/0x27 [ 0.647521] [] release_task+0x256/0x339 [ 0.647521] [] do_exit+0x764/0x7ef [ 0.647521] [] __xchg+0x0/0x38 [ 0.647521] [] ? stop_cpu+0x0/0xb2 [ 0.647521] [] ? stop_cpu+0x0/0xb2 [ 0.647521] [] kthread+0x4e/0x7b [ 0.647521] [] child_rip+0xa/0x11 [ 0.647521] [] ? restore_args+0x0/0x30 [ 0.647521] [] ? native_load_tls+0x14/0x2e [ 0.647521] [] ? kthread+0x0/0x7b [ 0.647521] [] ? child_rip+0x0/0x11 [ 0.647521] [ 0.647521] ---[ end trace 4eaa2a86a8e2da22 ]--- [ 0.672032] initcall ftrace_dynamic_init+0x0/0xd6 returned 0 after 19 msecs also mark it no-kprobes while at it. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perfctr-watchdog.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6bff382094f..9abd48b2267 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -17,6 +17,8 @@ #include #include #include +#include + #include #include @@ -336,7 +338,8 @@ static void single_msr_unreserve(void) release_perfctr_nmi(wd_ops->perfctr); } -static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes +single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { /* start the cycle over again */ write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); @@ -401,7 +404,7 @@ static int setup_p6_watchdog(unsigned nmi_hz) return 1; } -static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { /* * P6 based Pentium M need to re-unmask @@ -605,7 +608,7 @@ static void p4_unreserve(void) release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); } -static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) +static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) { unsigned dummy; /* @@ -784,7 +787,7 @@ unsigned lapic_adjust_nmi_hz(unsigned hz) return hz; } -int lapic_wd_event(unsigned nmi_hz) +int __kprobes lapic_wd_event(unsigned nmi_hz) { struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); u64 ctr; -- cgit v1.2.3 From e4b2b8866121bd06d5f3d9de0f79a04339ffa252 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:11 -0400 Subject: ftrace: enable using mcount recording on x86 Enable the use of the __mcount_loc infrastructure on x86_64 and i386. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index fc8351f374f..5a34c5427a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -25,6 +25,7 @@ config X86 select HAVE_KPROBES select ARCH_WANT_OPTIONAL_GPIOLIB select HAVE_KRETPROBES + select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) -- cgit v1.2.3 From 0a37605c2261a06d8cafc62dee11374ad676c8c4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 15:45:12 -0400 Subject: ftrace: x86 mcount stub x86 now sets up the mcount locations through the build and no longer needs to record the ip when the function is executed. This patch changes the initial mcount to simply return. There's no need to do any other work. If the ftrace start up test fails, the original mcount will be what everything will use, so having this as fast as possible is a good thing. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 14 -------------- arch/x86/kernel/entry_64.S | 26 -------------------------- arch/x86/kernel/ftrace.c | 14 ++------------ 3 files changed, 2 insertions(+), 52 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b21fbfaffe3..4e4269c73bb 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1153,20 +1153,6 @@ ENDPROC(xen_failsafe_callback) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) - pushl %eax - pushl %ecx - pushl %edx - movl 0xc(%esp), %eax - subl $MCOUNT_INSN_SIZE, %eax - -.globl mcount_call -mcount_call: - call ftrace_stub - - popl %edx - popl %ecx - popl %eax - ret END(mcount) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1db6ce4314e..09e7145484c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -64,32 +64,6 @@ #ifdef CONFIG_FTRACE #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) - - subq $0x38, %rsp - movq %rax, (%rsp) - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %rsi, 24(%rsp) - movq %rdi, 32(%rsp) - movq %r8, 40(%rsp) - movq %r9, 48(%rsp) - - movq 0x38(%rsp), %rdi - subq $MCOUNT_INSN_SIZE, %rdi - -.globl mcount_call -mcount_call: - call ftrace_stub - - movq 48(%rsp), %r9 - movq 40(%rsp), %r8 - movq 32(%rsp), %rdi - movq 24(%rsp), %rsi - movq 16(%rsp), %rdx - movq 8(%rsp), %rcx - movq (%rsp), %rax - addq $0x38, %rsp - retq END(mcount) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index ab115cd15fd..96aadbfedcc 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -112,18 +112,8 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) notrace int ftrace_mcount_set(unsigned long *data) { - unsigned long ip = (long)(&mcount_call); - unsigned long *addr = data; - unsigned char old[MCOUNT_INSN_SIZE], *new; - - /* - * Replace the mcount stub with a pointer to the - * ip recorder function. - */ - memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); - new = ftrace_call_replace(ip, *addr); - *addr = ftrace_modify_code(ip, old, new); - + /* mcount is initialized as a nop */ + *data = 0; return 0; } -- cgit v1.2.3 From 732f3ca7d4ba3c1be8d051d52302ef441ee7748b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Aug 2008 18:05:05 -0400 Subject: ftrace: use only 5 byte nops for x86 Mathieu Desnoyers revealed a bug in the original code. The nop that is used to relpace the mcount caller can be a two part nop. This runs the risk where a process can be preempted after executing the first nop, but before the second part of the nop. The ftrace code calls kstop_machine to keep multiple CPUs from executing code that is being modified, but it does not protect against a task preempting in the middle of a two part nop. If the above preemption happens and the tracer is enabled, after the kstop_machine runs, all those nops will be calls to the trace function. If the preempted process that was preempted between the two nops is executed again, it will execute half of the call to the trace function, and this might crash the system. This patch instead uses what both the latest Intel and AMD spec suggests. That is the P6_NOP5 sequence of "0x0f 0x1f 0x44 0x00 0x00". Note, some older CPUs and QEMU might fault on this nop, so this nop is executed with fault handling first. If it detects a fault, it will then use the code "0x66 0x66 0x66 0x66 0x90". If that faults, it will then default to a simple "jmp 1f; .byte 0x00 0x00 0x00; 1:". The jmp is not optimal but will do if the first two can not be executed. TODO: Examine the cpuid to determine the nop to use. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 68 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 96aadbfedcc..4151c91254e 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -16,8 +16,8 @@ #include #include -#include #include +#include /* Long is fine, even if it is only 4 bytes ;-) */ @@ -119,13 +119,67 @@ notrace int ftrace_mcount_set(unsigned long *data) int __init ftrace_dyn_arch_init(void *data) { - const unsigned char *const *noptable = find_nop_table(); - - /* This is running in kstop_machine */ - - ftrace_mcount_set(data); + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; - ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "jmp ftrace_test_jmp\n" + /* This code needs to stay around */ + ".section .text, \"ax\"\n" + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + ".byte 0x00,0x00,0x00\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "jmp 1f\n" + ".previous\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); + ftrace_nop = (unsigned long *)ftrace_test_p6nop; + break; + case 1: + pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); + ftrace_nop = (unsigned long *)ftrace_test_nop5; + break; + case 2: + pr_info("ftrace: converting mcount calls to jmp 1f\n"); + ftrace_nop = (unsigned long *)ftrace_test_jmp; + break; + } + + /* The return code is retured via data */ + *(unsigned long *)data = 0; return 0; } -- cgit v1.2.3 From 6f93fc076a464bfe24e8d4c5fea3f6ca5bdb264d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 20 Aug 2008 12:55:07 -0400 Subject: ftrace: x86 use copy to and from user functions The modification of code is performed either by kstop_machine, before SMP starts, or on module code before the module is executed. There is no reason to do the modifications from assembly. The copy to and from user functions are sufficient and produces cleaner and easier to read code. Thanks to Benjamin Herrenschmidt for suggesting the idea. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4151c91254e..082d9964262 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -60,11 +61,7 @@ notrace int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { - unsigned replaced; - unsigned old = *(unsigned *)old_code; /* 4 bytes */ - unsigned new = *(unsigned *)new_code; /* 4 bytes */ - unsigned char newch = new_code[4]; - int faulted = 0; + unsigned char replaced[MCOUNT_INSN_SIZE]; /* * Note: Due to modules and __init, code can @@ -72,29 +69,20 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * as well as code changing. * * No real locking needed, this code is run through - * kstop_machine. + * kstop_machine, or before SMP starts. */ - asm volatile ( - "1: lock\n" - " cmpxchg %3, (%2)\n" - " jnz 2f\n" - " movb %b4, 4(%2)\n" - "2:\n" - ".section .fixup, \"ax\"\n" - "3: movl $1, %0\n" - " jmp 2b\n" - ".previous\n" - _ASM_EXTABLE(1b, 3b) - : "=r"(faulted), "=a"(replaced) - : "r"(ip), "r"(new), "c"(newch), - "0"(faulted), "a"(old) - : "memory"); - sync_core(); + if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) + return 1; - if (replaced != old && replaced != new) - faulted = 2; + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return 2; - return faulted; + WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code, + MCOUNT_INSN_SIZE)); + + sync_core(); + + return 0; } notrace int ftrace_update_ftrace_func(ftrace_func_t func) -- cgit v1.2.3 From bbe5c7830c6dbde58726d44ec0337bc8b2d95d37 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 21:54:16 +0300 Subject: x86 mmiotrace: fix a rare memory leak Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 635b50e8558..754bd1eaf4f 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -307,8 +307,10 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, map.map_id = trace->id; spin_lock_irq(&trace_lock); - if (!is_enabled()) + if (!is_enabled()) { + kfree(trace); goto not_enabled; + } mmio_trace_mapping(&map); list_add_tail(&trace->list, &trace_list); -- cgit v1.2.3 From 9e57fb35d711331a9b1410c5c56ebeb3733428a0 Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:00:34 +0300 Subject: x86 mmiotrace: implement mmiotrace_printk() Offer mmiotrace users a function to inject markers from inside the kernel. This depends on the trace_vprintk() patch. Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 19 ++++++++++++++++++- arch/x86/mm/testmmiotrace.c | 4 ++++ 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 754bd1eaf4f..5e2e2e72ee8 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -75,7 +75,7 @@ static LIST_HEAD(trace_list); /* struct remap_trace */ * and trace_lock. * - Routines depending on is_enabled() must take trace_lock. * - trace_list users must hold trace_lock. - * - is_enabled() guarantees that mmio_trace_record is allowed. + * - is_enabled() guarantees that mmio_trace_{rw,mapping} are allowed. * - pre/post callbacks assume the effect of is_enabled() being true. */ @@ -379,6 +379,23 @@ void mmiotrace_iounmap(volatile void __iomem *addr) iounmap_trace_core(addr); } +int mmiotrace_printk(const char *fmt, ...) +{ + int ret = 0; + va_list args; + unsigned long flags; + va_start(args, fmt); + + spin_lock_irqsave(&trace_lock, flags); + if (is_enabled()) + ret = mmio_trace_printk(fmt, args); + spin_unlock_irqrestore(&trace_lock, flags); + + va_end(args); + return ret; +} +EXPORT_SYMBOL(mmiotrace_printk); + static void clear_trace_list(void) { struct remap_trace *trace; diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index d877c5b423e..ab50a8d7402 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -3,6 +3,7 @@ */ #include #include +#include #define MODULE_NAME "testmmiotrace" @@ -13,6 +14,7 @@ MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); static void do_write_test(void __iomem *p) { unsigned int i; + mmiotrace_printk("Write test.\n"); for (i = 0; i < 256; i++) iowrite8(i, p + i); for (i = 1024; i < (5 * 1024); i += 2) @@ -24,6 +26,7 @@ static void do_write_test(void __iomem *p) static void do_read_test(void __iomem *p) { unsigned int i; + mmiotrace_printk("Read test.\n"); for (i = 0; i < 256; i++) ioread8(p + i); for (i = 1024; i < (5 * 1024); i += 2) @@ -39,6 +42,7 @@ static void do_test(void) pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); return; } + mmiotrace_printk("ioremap returned %p.\n", p); do_write_test(p); do_read_test(p); iounmap(p); -- cgit v1.2.3 From 4427414170a63331a9cc36b9598502c5cdfe453b Mon Sep 17 00:00:00 2001 From: Pekka Paalanen Date: Tue, 16 Sep 2008 22:03:56 +0300 Subject: mmiotrace: remove left-over marker cruft Signed-off-by: Pekka Paalanen Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/mm/mmio-mod.c | 64 -------------------------------------------------- 1 file changed, 64 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 5e2e2e72ee8..2c4baa88f2c 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -56,13 +56,6 @@ struct remap_trace { static DEFINE_PER_CPU(struct trap_reason, pf_reason); static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); -#if 0 /* XXX: no way gather this info anymore */ -/* Access to this is not per-cpu. */ -static DEFINE_PER_CPU(atomic_t, dropped); -#endif - -static struct dentry *marker_file; - static DEFINE_MUTEX(mmiotrace_mutex); static DEFINE_SPINLOCK(trace_lock); static atomic_t mmiotrace_enabled; @@ -97,44 +90,6 @@ static bool is_enabled(void) return atomic_read(&mmiotrace_enabled); } -#if 0 /* XXX: needs rewrite */ -/* - * Write callback for the debugfs entry: - * Read a marker and write it to the mmio trace log - */ -static ssize_t write_marker(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) -{ - char *event = NULL; - struct mm_io_header *headp; - ssize_t len = (count > 65535) ? 65535 : count; - - event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); - if (!event) - return -ENOMEM; - - headp = (struct mm_io_header *)event; - headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); - headp->data_len = len; - - if (copy_from_user(event + sizeof(*headp), buffer, len)) { - kfree(event); - return -EFAULT; - } - - spin_lock_irq(&trace_lock); -#if 0 /* XXX: convert this to use tracing */ - if (is_enabled()) - relay_write(chan, event, sizeof(*headp) + len); - else -#endif - len = -EINVAL; - spin_unlock_irq(&trace_lock); - kfree(event); - return len; -} -#endif - static void print_pte(unsigned long address) { unsigned int level; @@ -481,26 +436,12 @@ static void leave_uniprocessor(void) } #endif -#if 0 /* XXX: out of order */ -static struct file_operations fops_marker = { - .owner = THIS_MODULE, - .write = write_marker -}; -#endif - void enable_mmiotrace(void) { mutex_lock(&mmiotrace_mutex); if (is_enabled()) goto out; -#if 0 /* XXX: tracing does not support text entries */ - marker_file = debugfs_create_file("marker", 0660, dir, NULL, - &fops_marker); - if (!marker_file) - pr_err(NAME "marker file creation failed.\n"); -#endif - if (nommiotrace) pr_info(NAME "MMIO tracing disabled.\n"); enter_uniprocessor(); @@ -525,11 +466,6 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); - if (marker_file) { - debugfs_remove(marker_file); - marker_file = NULL; - } - pr_info(NAME "disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); -- cgit v1.2.3 From 37a52f5ef120b93734bb2461744512b55695f69c Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 23 Sep 2008 15:05:46 -0700 Subject: x86: suppress trivial sparse signedness warnings Could just as easily change the three casts to cast to the correct type...this patch changes the type of ftrace_nop instead. Supresses sparse warnings: arch/x86/kernel/ftrace.c:157:14: warning: incorrect type in assignment (different signedness) arch/x86/kernel/ftrace.c:157:14: expected long *static [toplevel] ftrace_nop arch/x86/kernel/ftrace.c:157:14: got unsigned long * arch/x86/kernel/ftrace.c:161:14: warning: incorrect type in assignment (different signedness) arch/x86/kernel/ftrace.c:161:14: expected long *static [toplevel] ftrace_nop arch/x86/kernel/ftrace.c:161:14: got unsigned long * arch/x86/kernel/ftrace.c:165:14: warning: incorrect type in assignment (different signedness) arch/x86/kernel/ftrace.c:165:14: expected long *static [toplevel] ftrace_nop arch/x86/kernel/ftrace.c:165:14: got unsigned long * Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 082d9964262..66d900248fc 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -22,7 +22,7 @@ /* Long is fine, even if it is only 4 bytes ;-) */ -static long *ftrace_nop; +static unsigned long *ftrace_nop; union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; -- cgit v1.2.3 From ac2b86fdef5b44f194eefaa6b7b6aea9423d1bc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Weisbecker?= Date: Wed, 24 Sep 2008 16:31:56 +0100 Subject: x86/ftrace: use uaccess in atomic context With latest -tip I get this bug: [ 49.439988] in_atomic():0, irqs_disabled():1 [ 49.440118] INFO: lockdep is turned off. [ 49.440118] Pid: 2814, comm: modprobe Tainted: G W 2.6.27-rc7 #4 [ 49.440118] [] __might_sleep+0xe1/0x120 [ 49.440118] [] ftrace_modify_code+0x2a/0xd0 [ 49.440118] [] ? ftrace_test_p6nop+0x0/0xa [ 49.440118] [] __ftrace_update_code+0xfe/0x2f0 [ 49.440118] [] ? ftrace_test_p6nop+0x0/0xa [ 49.440118] [] ftrace_convert_nops+0x50/0x80 [ 49.440118] [] ftrace_init_module+0x16/0x20 [ 49.440118] [] load_module+0x185b/0x1d30 [ 49.440118] [] ? find_get_page+0x0/0xf0 [ 49.440118] [] ? sprintf+0x0/0x30 [ 49.440118] [] ? mutex_lock_interruptible_nested+0x1f2/0x350 [ 49.440118] [] sys_init_module+0x53/0x1b0 [ 49.440118] [] ? do_page_fault+0x0/0x740 [ 49.440118] [] syscall_call+0x7/0xb [ 49.440118] ======================= It is because ftrace_modify_code() calls copy_to_user and copy_from_user. These functions have been inserted after guessing that there couldn't be any race condition but copy_[to/from]_user might sleep and __ftrace_update_code is called with local_irq_saved. These function have been inserted since this commit: d5e92e8978fd2574e415dc2792c5eb592978243d: "ftrace: x86 use copy from user function" Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 66d900248fc..222507e8157 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -71,13 +71,13 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) + if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) return 1; if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) return 2; - WARN_ON_ONCE(__copy_to_user((char __user *)ip, new_code, + WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, MCOUNT_INSN_SIZE)); sync_core(); -- cgit v1.2.3 From 8b27386a9ce9c7f0f8702cff7565a46802ad57d1 Mon Sep 17 00:00:00 2001 From: Anders Kaseorg Date: Thu, 9 Oct 2008 22:19:08 -0400 Subject: ftrace: make ftrace_test_p6nop disassembler-friendly Commit 4c3dc21b136f8cb4b72afee16c3ba7e961656c0b in tip introduced the 5-byte NOP ftrace_test_p6nop: jmp . + 5 .byte 0x00, 0x00, 0x00 This is not friendly to disassemblers because an odd number of 0x00s ends in the middle of an instruction boundary. This changes the 0x00s to 1-byte NOPs (0x90). Signed-off-by: Anders Kaseorg Acked-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 222507e8157..d073d981a73 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -132,7 +132,9 @@ int __init ftrace_dyn_arch_init(void *data) ".section .text, \"ax\"\n" "ftrace_test_jmp:" "jmp ftrace_test_p6nop\n" - ".byte 0x00,0x00,0x00\n" /* 2 byte jmp + 3 bytes */ + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ "ftrace_test_p6nop:" P6_NOP5 "jmp 1f\n" @@ -161,7 +163,7 @@ int __init ftrace_dyn_arch_init(void *data) ftrace_nop = (unsigned long *)ftrace_test_nop5; break; case 2: - pr_info("ftrace: converting mcount calls to jmp 1f\n"); + pr_info("ftrace: converting mcount calls to jmp . + 5\n"); ftrace_nop = (unsigned long *)ftrace_test_jmp; break; } -- cgit v1.2.3 From fe648be019119722ec0ac54e3a4b2e5bf5168589 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:41 -0700 Subject: x86: add after_bootmem flag for 32bit to prepare to use dyn_array support etc. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8396868e82c..91343c2694b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -66,6 +66,7 @@ static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; static int __initdata after_init_bootmem; +int after_bootmem; static __init void *alloc_low_page(unsigned long *phys) { @@ -988,6 +989,8 @@ void __init mem_init(void) set_highmem_pages_init(); + after_bootmem = 1; + codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; -- cgit v1.2.3 From 1f3fcd4b1adc972d5c6a34cfed98931c46575b49 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:44 -0700 Subject: add per_cpu_dyn_array support allow dyn-array in per_cpu area, allocated dynamically. usage: | /* in .h */ | struct kernel_stat { | struct cpu_usage_stat cpustat; | unsigned int *irqs; | }; | | /* in .c */ | DEFINE_PER_CPU(struct kernel_stat, kstat); | | DEFINE_PER_CPU_DYN_ARRAY_ADDR(per_cpu__kstat_irqs, per_cpu__kstat.irqs, sizeof(unsigned int), nr_irqs, sizeof(unsigned long), NULL); after setup_percpu()/per_cpu_alloc_dyn_array(), the dyn_array in per_cpu area is ready to use. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 0e67f72d931..13ba7a83808 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size = PERCPU_ENOUGH_ROOM; + ssize_t size, old_size; char *ptr; int cpu; @@ -148,7 +148,8 @@ void __init setup_per_cpu_areas(void) setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ - size = PERCPU_ENOUGH_ROOM; + old_size = PERCPU_ENOUGH_ROOM; + size = old_size + per_cpu_dyn_array_size(); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -176,6 +177,8 @@ void __init setup_per_cpu_areas(void) per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); + per_cpu_alloc_dyn_array(cpu, ptr + old_size); + } printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", -- cgit v1.2.3 From 1f8ff037a871690c762d267d8a052529d3102fc9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:45 -0700 Subject: x86: alloc dyn_array all together so could spare some memory with small alignment in bootmem also tighten the alignment checking, and make print out less debug info. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup_percpu.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 13ba7a83808..2b7dab699e8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,26 +140,31 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size; + ssize_t size, old_size, da_size; char *ptr; int cpu; + unsigned long align = 1; /* Setup cpu_pda map */ setup_cpu_pda_map(); /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; - size = old_size + per_cpu_dyn_array_size(); + da_size = per_cpu_dyn_array_size(&align); + align = max_t(unsigned long, PAGE_SIZE, align); + size = roundup(old_size + da_size, align); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); for_each_possible_cpu(cpu) { #ifndef CONFIG_NEED_MULTIPLE_NODES - ptr = alloc_bootmem_pages(size); + ptr = __alloc_bootmem(size, align, + __pa(MAX_DMA_ADDRESS)); #else int node = early_cpu_to_node(cpu); if (!node_online(node) || !NODE_DATA(node)) { - ptr = alloc_bootmem_pages(size); + ptr = __alloc_bootmem(size, align, + __pa(MAX_DMA_ADDRESS)); printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); @@ -168,7 +173,8 @@ void __init setup_per_cpu_areas(void) cpu, __pa(ptr)); } else { - ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, + __pa(MAX_DMA_ADDRESS)); if (ptr) printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", cpu, node, __pa(ptr)); -- cgit v1.2.3 From 6da55c3e8da88e8a7cb6452160776ad6706798ad Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:46 -0700 Subject: x86: enable dyn_array support Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/kernel/vmlinux_32.lds.S | 1 + arch/x86/kernel/vmlinux_64.lds.S | 3 +++ 3 files changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f65c2744d57..42f98009d75 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -33,6 +33,7 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS + select HAVE_DYN_ARRAY config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc..c36007ab394 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -145,6 +145,7 @@ SECTIONS *(.x86_cpu_dev.init) __x86_cpu_dev_end = .; } + DYN_ARRAY_INIT(8) SECURITY_INIT . = ALIGN(4); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405..30973dbac8c 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -173,6 +173,9 @@ SECTIONS *(.x86_cpu_dev.init) } __x86_cpu_dev_end = .; + + DYN_ARRAY_INIT(8) + SECURITY_INIT . = ALIGN(8); -- cgit v1.2.3 From 0799e432acfda879eaeef9622426bfa1434f3786 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:49:48 -0700 Subject: x86: use nr_irqs also add first_free_entry and pin_map_size, which were NR_IRQS derived constants. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 26 ++++++++++++++------------ arch/x86/kernel/io_apic_64.c | 33 +++++++++++++++++---------------- arch/x86/kernel/irq_32.c | 8 ++++---- arch/x86/kernel/irq_64.c | 8 ++++---- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/kernel/irqinit_64.c | 2 +- 6 files changed, 41 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index e710289f673..d382990244f 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -70,6 +70,7 @@ int timer_through_8259 __initdata; */ int sis_apic_bug = -1; +int first_free_entry = NR_IRQS; /* * # of IRQ routing registers */ @@ -100,6 +101,8 @@ static int disable_timer_pin_1 __initdata; #define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +int pin_map_size = PIN_MAP_SIZE; + /* * This is performance-critical, we want to do it O(1) * @@ -213,7 +216,6 @@ static void ioapic_mask_entry(int apic, int pin) */ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; while (entry->next) @@ -222,7 +224,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) if (entry->pin != -1) { entry->next = first_free_entry; entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) + if (++first_free_entry >= pin_map_size) panic("io_apic.c: whoops"); } entry->apic = apic; @@ -457,7 +459,7 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) int i, j; for_each_online_cpu(i) { - for (j = 0; j < NR_IRQS; j++) { + for (j = 0; j < nr_irqs; j++) { if (!irq_desc[j].action) continue; /* Is it a significant load ? */ @@ -492,7 +494,7 @@ static void do_irq_balance(void) if (!cpu_online(i)) continue; package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < NR_IRQS; j++) { + for (j = 0; j < nr_irqs; j++) { unsigned long value_now, delta; /* Is this an active IRQ or balancing disabled ? */ if (!irq_desc[j].action || irq_balancing_disabled(j)) @@ -587,7 +589,7 @@ tryanotherirq: */ move_this_load = 0; selected_irq = -1; - for (j = 0; j < NR_IRQS; j++) { + for (j = 0; j < nr_irqs; j++) { /* Is this an active IRQ? */ if (!irq_desc[j].action) continue; @@ -664,7 +666,7 @@ static int balanced_irq(void *unused) long time_remaining = balanced_irq_interval; /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < NR_IRQS ; i++) { + for (i = 0 ; i < nr_irqs ; i++) { irq_desc[i].pending_mask = cpumask_of_cpu(0); set_pending_irq(i, cpumask_of_cpu(0)); } @@ -712,8 +714,8 @@ static int __init balanced_irq_init(void) physical_balance = 1; for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); - irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); + irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); + irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { printk(KERN_ERR "balanced_irq_init: out of memory"); goto failed; @@ -1441,7 +1443,7 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < nr_irqs; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; @@ -1621,7 +1623,7 @@ static void __init enable_IO_APIC(void) int i, apic; unsigned long flags; - for (i = 0; i < PIN_MAP_SIZE; i++) { + for (i = 0; i < pin_map_size; i++) { irq_2_pin[i].pin = -1; irq_2_pin[i].next = 0; } @@ -2005,7 +2007,7 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < NR_IRQS ; irq++) { + for (irq = 0; irq < nr_irqs ; irq++) { if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { /* * Hmm.. We don't have an entry for this, @@ -2449,7 +2451,7 @@ int create_irq(void) irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { + for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; if (irq_vector[new] != 0) diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 02063ae042f..448384c7c1e 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -132,6 +132,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); #define MAX_PLUS_SHARED_IRQS NR_IRQS #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) +int pin_map_size = PIN_MAP_SIZE; /* * This is performance-critical, we want to do it O(1) * @@ -224,7 +225,7 @@ static inline void io_apic_sync(unsigned int apic) int pin; \ struct irq_pin_list *entry = irq_2_pin + irq; \ \ - BUG_ON(irq >= NR_IRQS); \ + BUG_ON(irq >= nr_irqs); \ for (;;) { \ unsigned int reg; \ pin = entry->pin; \ @@ -301,7 +302,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) int apic, pin; struct irq_pin_list *entry = irq_2_pin + irq; - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); for (;;) { unsigned int reg; apic = entry->apic; @@ -358,19 +359,19 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ +int first_free_entry = NR_IRQS; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - static int first_free_entry = NR_IRQS; struct irq_pin_list *entry = irq_2_pin + irq; - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); while (entry->next) entry = irq_2_pin + entry->next; if (entry->pin != -1) { entry->next = first_free_entry; entry = irq_2_pin + entry->next; - if (++first_free_entry >= PIN_MAP_SIZE) + if (++first_free_entry >= pin_map_size) panic("io_apic.c: ran out of irq_2_pin entries!"); } entry->apic = apic; @@ -634,7 +635,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) best_guess = irq; } } - BUG_ON(best_guess >= NR_IRQS); + BUG_ON(best_guess >= nr_irqs); return best_guess; } @@ -766,7 +767,7 @@ static int pin_2_irq(int idx, int apic, int pin) irq += nr_ioapic_registers[i++]; irq += pin; } - BUG_ON(irq >= NR_IRQS); + BUG_ON(irq >= nr_irqs); return irq; } @@ -801,7 +802,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) int cpu; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON((unsigned)irq >= nr_irqs); cfg = &irq_cfg[irq]; /* Only try and allocate irqs on cpus that are present */ @@ -875,7 +876,7 @@ static void __clear_irq_vector(int irq) cpumask_t mask; int cpu, vector; - BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON((unsigned)irq >= nr_irqs); cfg = &irq_cfg[irq]; BUG_ON(!cfg->vector); @@ -895,7 +896,7 @@ void __setup_vector_irq(int cpu) int irq, vector; /* Mark the inuse vectors */ - for (irq = 0; irq < NR_IRQS; ++irq) { + for (irq = 0; irq < nr_irqs; ++irq) { if (!cpu_isset(cpu, irq_cfg[irq].domain)) continue; vector = irq_cfg[irq].vector; @@ -1193,7 +1194,7 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < nr_irqs; i++) { struct irq_pin_list *entry = irq_2_pin + i; if (entry->pin < 0) continue; @@ -1366,7 +1367,7 @@ void __init enable_IO_APIC(void) int i, apic; unsigned long flags; - for (i = 0; i < PIN_MAP_SIZE; i++) { + for (i = 0; i < pin_map_size; i++) { irq_2_pin[i].pin = -1; irq_2_pin[i].next = 0; } @@ -1658,7 +1659,7 @@ static void ir_irq_migration(struct work_struct *work) { int irq; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { struct irq_desc *desc = irq_desc + irq; if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -1707,7 +1708,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; - if (irq >= NR_IRQS) + if (irq >= nr_irqs) continue; desc = irq_desc + irq; @@ -1865,7 +1866,7 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < NR_IRQS ; irq++) { + for (irq = 0; irq < nr_irqs ; irq++) { if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { /* * Hmm.. We don't have an entry for this, @@ -2279,7 +2280,7 @@ int create_irq(void) irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); - for (new = (NR_IRQS - 1); new >= 0; new--) { + for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; if (irq_cfg[new].vector != 0) diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b71e02d42f4..4c7ffb32854 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -226,7 +226,7 @@ unsigned int do_IRQ(struct pt_regs *regs) int overflow, irq = ~regs->orig_ax; struct irq_desc *desc = irq_desc + irq; - if (unlikely((unsigned)irq >= NR_IRQS)) { + if (unlikely((unsigned)irq >= nr_irqs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", __func__, irq); BUG(); @@ -271,7 +271,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < NR_IRQS) { + if (i < nr_irqs) { unsigned any_count = 0; spin_lock_irqsave(&irq_desc[i].lock, flags); @@ -303,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { + } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); @@ -396,7 +396,7 @@ void fixup_irqs(cpumask_t map) unsigned int irq; static int warned; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { cpumask_t mask; if (irq == 2) continue; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f065fe9071b..e1f0839430d 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -81,7 +81,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < NR_IRQS) { + if (i < nr_irqs) { unsigned any_count = 0; spin_lock_irqsave(&irq_desc[i].lock, flags); @@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); - } else if (i == NR_IRQS) { + } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); @@ -201,7 +201,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(irq < NR_IRQS)) + if (likely(irq < nr_irqs)) generic_handle_irq(irq); else { if (!disable_apic) @@ -224,7 +224,7 @@ void fixup_irqs(cpumask_t map) unsigned int irq; static int warned; - for (irq = 0; irq < NR_IRQS; irq++) { + for (irq = 0; irq < nr_irqs; irq++) { cpumask_t mask; int break_affinity = 0; int set_affinity = 1; diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 9200a1e2752..65c1c950770 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -100,7 +100,7 @@ void __init native_init_IRQ(void) */ for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= NR_IRQS) + if (i >= nr_irqs) break; /* SYSCALL_VECTOR was reserved in trap_init. */ if (!test_bit(vector, used_vectors)) diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 5b5be9d43c2..165c5d9b0d1 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,7 +142,7 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < NR_IRQS; i++) { + for (i = 0; i < nr_irqs; i++) { irq_desc[i].status = IRQ_DISABLED; irq_desc[i].action = NULL; irq_desc[i].depth = 1; -- cgit v1.2.3 From 301e619020dd67bde7e7e64bb9ffb7f30d26c979 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:02 -0700 Subject: x86: use dyn_array in io_apic_xx.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 54 +++++++++++++++++++++++++++++++++++--------- arch/x86/kernel/io_apic_64.c | 28 +++++++++++++++++------ arch/x86/kernel/setup.c | 6 +++++ 3 files changed, 70 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index d382990244f..7f2bcc3dad8 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -70,7 +70,7 @@ int timer_through_8259 __initdata; */ int sis_apic_bug = -1; -int first_free_entry = NR_IRQS; +int first_free_entry; /* * # of IRQ routing registers */ @@ -98,10 +98,7 @@ static int disable_timer_pin_1 __initdata; * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) - -int pin_map_size = PIN_MAP_SIZE; +int pin_map_size; /* * This is performance-critical, we want to do it O(1) @@ -112,7 +109,9 @@ int pin_map_size = PIN_MAP_SIZE; static struct irq_pin_list { int apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; +} *irq_2_pin; + +DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL); struct io_apic { unsigned int index; @@ -403,9 +402,28 @@ static struct irq_cpu_info { #define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) -static cpumask_t balance_irq_affinity[NR_IRQS] = { - [0 ... NR_IRQS-1] = CPU_MASK_ALL -}; +static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL; + +static cpumask_t *balance_irq_affinity; + + +static void __init irq_affinity_init_work(void *data) +{ + struct dyn_array *da = data; + + int i; + struct balance_irq_affinity *affinity; + + affinity = *da->name; + + for (i = 0; i < *da->nr; i++) + memcpy(&affinity[i], &balance_irq_affinity_init, + sizeof(struct balance_irq_affinity)); + +} + +DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work); + void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) { @@ -1170,14 +1188,28 @@ static inline int IO_APIC_irq_trigger(int irq) } /* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly = { FIRST_DEVICE_VECTOR , 0 }; +static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR; +static u8 *irq_vector; + +static void __init irq_vector_init_work(void *data) +{ + struct dyn_array *da = data; + + u8 *irq_vec; + + irq_vec = *da->name; + + irq_vec[0] = irq_vector_init_first; +} + +DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work); static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset; int vector, offset; - BUG_ON((unsigned)irq >= NR_IRQ_VECTORS); + BUG_ON((unsigned)irq >= nr_irqs); if (irq_vector[irq] > 0) return irq_vector[irq]; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 448384c7c1e..93a3ffabfe6 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -66,7 +66,7 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { +static struct irq_cfg irq_cfg_legacy[] __initdata = { [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, @@ -85,6 +85,17 @@ static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; +static struct irq_cfg *irq_cfg; + +static void __init init_work(void *data) +{ + struct dyn_array *da = data; + + memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy)); +} + +DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); + static int assign_irq_vector(int irq, cpumask_t mask); int first_system_vector = 0xfe; @@ -129,10 +140,9 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); * Rough estimation of how many shared IRQs there are, can * be changed anytime. */ -#define MAX_PLUS_SHARED_IRQS NR_IRQS -#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) -int pin_map_size = PIN_MAP_SIZE; +int pin_map_size; + /* * This is performance-critical, we want to do it O(1) * @@ -141,8 +151,12 @@ int pin_map_size = PIN_MAP_SIZE; */ static struct irq_pin_list { - short apic, pin, next; -} irq_2_pin[PIN_MAP_SIZE]; + short apic, pin; + int next; +} *irq_2_pin; + +DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL); + struct io_apic { unsigned int index; @@ -359,7 +373,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -int first_free_entry = NR_IRQS; +int first_free_entry; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { struct irq_pin_list *entry = irq_2_pin + irq; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2255782e8d4..558ec26b08e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1067,9 +1067,15 @@ void __init setup_arch(char **cmdline_p) #endif prefill_possible_map(); + #ifdef CONFIG_X86_64 + /* need to wait for nr_cpu_ids settle down */ + if (nr_irqs == NR_IRQS) + nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif + pin_map_size = nr_irqs * 2; + first_free_entry = nr_irqs; init_apic_mappings(); ioapic_init_mappings(); -- cgit v1.2.3 From a84488c213a8cfc29200344a6fb6357d48c8ed85 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 19 Aug 2008 20:50:31 -0700 Subject: irq: sparse irqs, fix #3 fix non-APIC UP build: arch/x86/kernel/built-in.o: In function `setup_arch': : undefined reference to `pin_map_size' arch/x86/kernel/built-in.o: In function `setup_arch': : undefined reference to `first_free_entry' Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 558ec26b08e..02e3a669797 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1074,8 +1074,10 @@ void __init setup_arch(char **cmdline_p) nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif +#ifdef CONFIG_X86_IO_APIC pin_map_size = nr_irqs * 2; first_free_entry = nr_irqs; +#endif init_apic_mappings(); ioapic_init_mappings(); -- cgit v1.2.3 From 71f521bbaf375b685aeea20c6b0ed8600cd6edfe Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:03 -0700 Subject: x86, irq: get nr_irqs from madt Until now, NR_IRQS was derived from black magic defines that had to be "large enough" to both accomodate NR_CPUS and MAX_NR_IO_APICs. This resulted in a way too large irq_desc[] array on most x86 systems. Especially with larger CPU masks, the size of irq_desc can spiral out of control quickly. So be smarter about it and use precise allocation instead: determine the default maximum possible IRQ number from the ACPI MADT. Use a minimum limit of at least 32 IRQs for broken BIOSes. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index eb875cdc736..3e9d163fd92 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -957,6 +957,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int get_nr_irqs_via_madt(void) +{ + int idx; + int nr = 0; + + for (idx = 0; idx < nr_ioapics; idx++) { + if (mp_ioapic_routing[idx].gsi_end > nr) + nr = mp_ioapic_routing[idx].gsi_end; + } + + nr++; + + /* double it for hotplug and msi and nmi */ + nr <<= 1; + + /* something wrong ? */ + if (nr < 32) + nr = 32; + + return nr; + +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { @@ -1254,9 +1277,12 @@ static int __init acpi_parse_madt_ioapic_entries(void) return count; } + + nr_irqs = get_nr_irqs_via_madt(); + count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, - NR_IRQ_VECTORS); + nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); @@ -1276,7 +1302,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, - NR_IRQ_VECTORS); + nr_irqs); if (count < 0) { printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); /* TBD: Cleanup to allow fallback to MPS */ -- cgit v1.2.3 From 08678b0841267c1d00d771fe01548d86043d065e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:05 -0700 Subject: generic: sparse irqs: use irq_desc() together with dyn_array, instead of irq_desc[] add CONFIG_HAVE_SPARSE_IRQ to for use condensed array. Get rid of irq_desc[] array assumptions. Preallocate 32 irq_desc, and irq_desc() will try to get more. ( No change in functionality is expected anywhere, except the odd build failure where we missed a code site or where a crossing commit itroduces new irq_desc[] usage. ) v2: according to Eric, change get_irq_desc() to irq_desc() Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + arch/x86/kernel/io_apic_32.c | 46 +++++++++++++++++------ arch/x86/kernel/io_apic_64.c | 75 ++++++++++++++++++++++++------------- arch/x86/kernel/irq_32.c | 24 +++++++----- arch/x86/kernel/irq_64.c | 35 +++++++++-------- arch/x86/kernel/irqinit_64.c | 10 +++-- arch/x86/kernel/visws_quirks.c | 30 ++++++++------- arch/x86/mach-voyager/voyager_smp.c | 4 +- 8 files changed, 142 insertions(+), 83 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 42f98009d75..1004888e9b1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,6 +34,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_DYN_ARRAY + select HAVE_SPARSE_IRQ if X86_64 config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 7f2bcc3dad8..c2160cfdec9 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) @@ -365,7 +366,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } - irq_desc[irq].affinity = cpumask; + desc = irq_to_desc(irq); + desc->affinity = cpumask; spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -475,10 +477,12 @@ static inline void balance_irq(int cpu, int irq) static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; + struct irq_desc *desc; for_each_online_cpu(i) { for (j = 0; j < nr_irqs; j++) { - if (!irq_desc[j].action) + desc = irq_to_desc(j); + if (!desc->action) continue; /* Is it a significant load ? */ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < @@ -505,6 +509,7 @@ static void do_irq_balance(void) unsigned long tmp_cpu_irq; unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; + struct irq_desc *desc; for_each_possible_cpu(i) { int package_index; @@ -515,7 +520,8 @@ static void do_irq_balance(void) for (j = 0; j < nr_irqs; j++) { unsigned long value_now, delta; /* Is this an active IRQ or balancing disabled ? */ - if (!irq_desc[j].action || irq_balancing_disabled(j)) + desc = irq_to_desc(j); + if (!desc->action || irq_balancing_disabled(j)) continue; if (package_index == i) IRQ_DELTA(package_index, j) = 0; @@ -609,7 +615,8 @@ tryanotherirq: selected_irq = -1; for (j = 0; j < nr_irqs; j++) { /* Is this an active IRQ? */ - if (!irq_desc[j].action) + desc = irq_to_desc(j); + if (!desc->action) continue; if (imbalance <= IRQ_DELTA(max_loaded, j)) continue; @@ -682,10 +689,12 @@ static int balanced_irq(void *unused) int i; unsigned long prev_balance_time = jiffies; long time_remaining = balanced_irq_interval; + struct irq_desc *desc; /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < nr_irqs ; i++) { - irq_desc[i].pending_mask = cpumask_of_cpu(0); + desc = irq_to_desc(i); + desc->pending_mask = cpumask_of_cpu(0); set_pending_irq(i, cpumask_of_cpu(0)); } @@ -1254,13 +1263,16 @@ static struct irq_chip ioapic_chip; static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { + struct irq_desc *desc; + + desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) { - irq_desc[irq].status |= IRQ_LEVEL; + desc->status |= IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); } else { - irq_desc[irq].status &= ~IRQ_LEVEL; + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); } @@ -2027,6 +2039,7 @@ static struct irq_chip ioapic_chip __read_mostly = { static inline void init_IO_APIC_traps(void) { int irq; + struct irq_desc *desc; /* * NOTE! The local APIC isn't very good at handling @@ -2048,9 +2061,11 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else + else { + desc = irq_to_desc(irq); /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; + desc->chip = &no_irq_chip; + } } } } @@ -2089,7 +2104,10 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq, int vector) { - irq_desc[irq].status &= ~IRQ_LEVEL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); set_intr_gate(vector, interrupt[irq]); @@ -2556,6 +2574,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp; int vector; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2575,7 +2594,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2649,6 +2669,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2659,7 +2680,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(mask); target_ht_irq(irq, dest); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 93a3ffabfe6..cab5a25d81b 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -345,6 +345,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) unsigned long flags; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -361,9 +362,10 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); + desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); - irq_desc[irq].affinity = mask; + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } #endif @@ -933,14 +935,17 @@ static struct irq_chip ir_ioapic_chip; static void ioapic_register_intr(int irq, unsigned long trigger) { + struct irq_desc *desc; + + desc = irq_to_desc(irq); if (trigger) - irq_desc[irq].status |= IRQ_LEVEL; + desc->status |= IRQ_LEVEL; else - irq_desc[irq].status &= ~IRQ_LEVEL; + desc->status &= ~IRQ_LEVEL; #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { - irq_desc[irq].status |= IRQ_MOVE_PCNTXT; + desc->status |= IRQ_MOVE_PCNTXT; if (trigger) set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, handle_fasteoi_irq, @@ -1596,10 +1601,10 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); static void migrate_ioapic_irq(int irq, cpumask_t mask) { struct irq_cfg *cfg = irq_cfg + irq; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; - int modify_ioapic_rte = desc->status & IRQ_LEVEL; + int modify_ioapic_rte; unsigned int dest; unsigned long flags; @@ -1616,6 +1621,8 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; if (modify_ioapic_rte) { spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); @@ -1637,12 +1644,13 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) cfg->move_in_progress = 0; } - irq_desc[irq].affinity = mask; + desc->affinity = mask; } static int migrate_irq_remapped_level(int irq) { int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); mask_IO_APIC_irq(irq); @@ -1658,11 +1666,11 @@ static int migrate_irq_remapped_level(int irq) } /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, irq_desc[irq].pending_mask); + migrate_ioapic_irq(irq, desc->pending_mask); ret = 0; - irq_desc[irq].status &= ~IRQ_MOVE_PENDING; - cpus_clear(irq_desc[irq].pending_mask); + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); unmask: unmask_IO_APIC_irq(irq); @@ -1674,7 +1682,7 @@ static void ir_irq_migration(struct work_struct *work) int irq; for (irq = 0; irq < nr_irqs; irq++) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; @@ -1686,8 +1694,7 @@ static void ir_irq_migration(struct work_struct *work) continue; } - desc->chip->set_affinity(irq, - irq_desc[irq].pending_mask); + desc->chip->set_affinity(irq, desc->pending_mask); spin_unlock_irqrestore(&desc->lock, flags); } } @@ -1698,9 +1705,11 @@ static void ir_irq_migration(struct work_struct *work) */ static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - if (irq_desc[irq].status & IRQ_LEVEL) { - irq_desc[irq].status |= IRQ_MOVE_PENDING; - irq_desc[irq].pending_mask = mask; + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; migrate_irq_remapped_level(irq); return; } @@ -1725,7 +1734,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) if (irq >= nr_irqs) continue; - desc = irq_desc + irq; + desc = irq_to_desc(irq); cfg = irq_cfg + irq; spin_lock(&desc->lock); if (!cfg->move_cleanup_count) @@ -1791,7 +1800,7 @@ static void ack_apic_level(unsigned int irq) irq_complete_move(irq); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_IO_APIC_irq(irq); } @@ -1868,6 +1877,7 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { static inline void init_IO_APIC_traps(void) { int irq; + struct irq_desc *desc; /* * NOTE! The local APIC isn't very good at handling @@ -1889,9 +1899,11 @@ static inline void init_IO_APIC_traps(void) */ if (irq < 16) make_8259A_irq(irq); - else + else { + desc = irq_to_desc(irq); /* Strange. Oh, well.. */ - irq_desc[irq].chip = &no_irq_chip; + desc->chip = &no_irq_chip; + } } } } @@ -1926,7 +1938,10 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq) { - irq_desc[irq].status &= ~IRQ_LEVEL; + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); } @@ -2402,6 +2417,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2421,7 +2437,8 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #ifdef CONFIG_INTR_REMAP @@ -2435,6 +2452,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2469,7 +2487,8 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) cfg->move_in_progress = 0; } - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif #endif /* CONFIG_SMP */ @@ -2543,7 +2562,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) #ifdef CONFIG_INTR_REMAP if (irq_remapped(irq)) { - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); /* * irq migration in process context */ @@ -2655,6 +2674,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2674,7 +2694,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); dmar_msi_write(irq, &msg); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2731,6 +2752,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) struct irq_cfg *cfg = irq_cfg + irq; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2743,7 +2765,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - irq_desc[irq].affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 4c7ffb32854..ede513be517 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -224,7 +224,7 @@ unsigned int do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs; /* high bit used in ret_from_ code */ int overflow, irq = ~regs->orig_ax; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_to_desc(irq); if (unlikely((unsigned)irq >= nr_irqs)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", @@ -273,15 +273,16 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; + struct irq_desc *desc = irq_to_desc(i); - spin_lock_irqsave(&irq_desc[i].lock, flags); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); #else for_each_online_cpu(j) any_count |= kstat_cpu(j).irqs[i]; #endif - action = irq_desc[i].action; + action = desc->action; if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); @@ -291,8 +292,8 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); if (action) { seq_printf(p, " %s", action->name); @@ -302,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -398,17 +399,20 @@ void fixup_irqs(cpumask_t map) for (irq = 0; irq < nr_irqs; irq++) { cpumask_t mask; + struct irq_desc *desc; + if (irq == 2) continue; - cpus_and(mask, irq_desc[irq].affinity, map); + desc = irq_to_desc(irq); + cpus_and(mask, desc->affinity, map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); mask = map; } - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); - else if (irq_desc[irq].action && !(warned++)) + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, mask); + else if (desc->action && !(warned++)) printk("Cannot set affinity for irq %i\n", irq); } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index e1f0839430d..738eb65a924 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,15 +83,16 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; + struct irq_desc *desc = irq_to_desc(i); - spin_lock_irqsave(&irq_desc[i].lock, flags); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP any_count = kstat_irqs(i); #else for_each_online_cpu(j) any_count |= kstat_cpu(j).irqs[i]; #endif - action = irq_desc[i].action; + action = desc->action; if (!action && !any_count) goto skip; seq_printf(p, "%3d: ",i); @@ -101,8 +102,8 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif - seq_printf(p, " %8s", irq_desc[i].chip->name); - seq_printf(p, "-%-8s", irq_desc[i].name); + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); if (action) { seq_printf(p, " %s", action->name); @@ -111,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) } seq_putc(p, '\n'); skip: - spin_unlock_irqrestore(&irq_desc[i].lock, flags); + spin_unlock_irqrestore(&desc->lock, flags); } else if (i == nr_irqs) { seq_printf(p, "NMI: "); for_each_online_cpu(j) @@ -228,37 +229,39 @@ void fixup_irqs(cpumask_t map) cpumask_t mask; int break_affinity = 0; int set_affinity = 1; + struct irq_desc *desc; if (irq == 2) continue; + desc = irq_to_desc(irq); /* interrupt's are disabled at this point */ - spin_lock(&irq_desc[irq].lock); + spin_lock(&desc->lock); if (!irq_has_action(irq) || - cpus_equal(irq_desc[irq].affinity, map)) { - spin_unlock(&irq_desc[irq].lock); + cpus_equal(desc->affinity, map)) { + spin_unlock(&desc->lock); continue; } - cpus_and(mask, irq_desc[irq].affinity, map); + cpus_and(mask, desc->affinity, map); if (cpus_empty(mask)) { break_affinity = 1; mask = map; } - if (irq_desc[irq].chip->mask) - irq_desc[irq].chip->mask(irq); + if (desc->chip->mask) + desc->chip->mask(irq); - if (irq_desc[irq].chip->set_affinity) - irq_desc[irq].chip->set_affinity(irq, mask); + if (desc->chip->set_affinity) + desc->chip->set_affinity(irq, mask); else if (!(warned++)) set_affinity = 0; - if (irq_desc[irq].chip->unmask) - irq_desc[irq].chip->unmask(irq); + if (desc->chip->unmask) + desc->chip->unmask(irq); - spin_unlock(&irq_desc[irq].lock); + spin_unlock(&desc->lock); if (break_affinity && set_affinity) printk("Broke affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 165c5d9b0d1..0744b49b4d1 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -143,9 +143,11 @@ void __init init_ISA_irqs(void) init_8259A(0); for (i = 0; i < nr_irqs; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = NULL; - irq_desc[i].depth = 1; + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; if (i < 16) { /* @@ -157,7 +159,7 @@ void __init init_ISA_irqs(void) /* * 'high' PCI IRQs filled in on demand */ - irq_desc[i].chip = &no_irq_chip; + desc->chip = &no_irq_chip; } } } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 61a97e616f7..9d85ab38443 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -484,10 +484,11 @@ static void disable_cobalt_irq(unsigned int irq) static unsigned int startup_cobalt_irq(unsigned int irq) { unsigned long flags; + struct irq_desc *desc = irq_to_desc(irq); spin_lock_irqsave(&cobalt_lock, flags); - if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) - irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); + if ((desc->status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) + desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); enable_cobalt_irq(irq); spin_unlock_irqrestore(&cobalt_lock, flags); return 0; @@ -506,9 +507,10 @@ static void ack_cobalt_irq(unsigned int irq) static void end_cobalt_irq(unsigned int irq) { unsigned long flags; + struct irq_desc *desc = irq_to_desc(irq); spin_lock_irqsave(&cobalt_lock, flags); - if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) + if (!(desc->status & (IRQ_DISABLED | IRQ_INPROGRESS))) enable_cobalt_irq(irq); spin_unlock_irqrestore(&cobalt_lock, flags); } @@ -626,7 +628,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) spin_unlock_irqrestore(&i8259A_lock, flags); - desc = irq_desc + realirq; + desc = irq_to_desc(realirq); /* * handle this 'virtual interrupt' as a Cobalt one now. @@ -662,27 +664,29 @@ void init_VISWS_APIC_irqs(void) int i; for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { - irq_desc[i].status = IRQ_DISABLED; - irq_desc[i].action = 0; - irq_desc[i].depth = 1; + struct irq_desc *desc = irq_to_desc(i); + + desc->status = IRQ_DISABLED; + desc->action = 0; + desc->depth = 1; if (i == 0) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_IDE0) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_IDE1) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } else if (i == CO_IRQ_8259) { - irq_desc[i].chip = &piix4_master_irq_type; + desc->chip = &piix4_master_irq_type; } else if (i < CO_IRQ_APIC0) { - irq_desc[i].chip = &piix4_virtual_irq_type; + desc->chip = &piix4_virtual_irq_type; } else if (IS_CO_APIC(i)) { - irq_desc[i].chip = &cobalt_irq_type; + desc->chip = &cobalt_irq_type; } } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 199a5f4a873..0f6e8a6523a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -1483,7 +1483,7 @@ static void disable_local_vic_irq(unsigned int irq) * the interrupt off to another CPU */ static void before_handle_vic_irq(unsigned int irq) { - irq_desc_t *desc = irq_desc + irq; + irq_desc_t *desc = irq_to_desc(irq); __u8 cpu = smp_processor_id(); _raw_spin_lock(&vic_irq_lock); @@ -1518,7 +1518,7 @@ static void before_handle_vic_irq(unsigned int irq) /* Finish the VIC interrupt: basically mask */ static void after_handle_vic_irq(unsigned int irq) { - irq_desc_t *desc = irq_desc + irq; + irq_desc_t *desc = irq_to_desc(irq); _raw_spin_lock(&vic_irq_lock); { -- cgit v1.2.3 From 3ac2de48ed3c998df7f366e039c97eedb27e7c3d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:06 -0700 Subject: x86: add irq_cfg in io_apic_64.c preallocate size is 32, and if it is not enough, irq_cfg will more via alloc_bootmem() or kzalloc(). (depending on how early we are in system setup) v2: fix typo about size of init_one_irq_cfg ... should use sizeof(struct irq_cfg) v3: according to Eric, change get_irq_cfg() to irq_cfg() v4: squash add irq_cfg_alloc in Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 209 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 169 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index cab5a25d81b..858c37a31a2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -57,7 +57,11 @@ #define __apicdebuginit(type) static type __init +struct irq_cfg; + struct irq_cfg { + unsigned int irq; + struct irq_cfg *next; cpumask_t domain; cpumask_t old_domain; unsigned move_cleanup_count; @@ -67,34 +71,132 @@ struct irq_cfg { /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -static struct irq_cfg *irq_cfg; +static struct irq_cfg irq_cfg_init = { .irq = -1U, }; +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; +} + +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +static void init_one_irq_cfg(struct irq_cfg *cfg) +{ + memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); +} static void __init init_work(void *data) { struct dyn_array *da = data; + struct irq_cfg *cfg; + int i; - memcpy(*da->name, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + cfg = *da->name; + + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + + i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (; i < *da->nr; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < *da->nr; i++) + cfg[i-1].next = &cfg[i]; } -DEFINE_DYN_ARRAY(irq_cfg, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); +static struct irq_cfg *irq_cfgx; +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + struct irq_cfg *cfg; + + BUG_ON(irq == -1U); + + cfg = &irq_cfgx[0]; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + if (cfg->irq == -1U) + return NULL; + + cfg = cfg->next; + } + + return NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + struct irq_cfg *cfg, *cfg_pri; + int i; + int count = 0; + + BUG_ON(irq == -1U); + + cfg_pri = cfg = &irq_cfgx[0]; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + if (cfg->irq == -1U) { + cfg->irq = irq; + return cfg; + } + cfg_pri = cfg; + cfg = cfg->next; + count++; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + + if (after_bootmem) + cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0); + + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); + + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + cfg->irq = irq; + cfg_pri->next = cfg; + + return cfg; +} static int assign_irq_vector(int irq, cpumask_t mask); @@ -341,7 +443,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; unsigned int dest; cpumask_t tmp; @@ -381,6 +483,8 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) struct irq_pin_list *entry = irq_2_pin + irq; BUG_ON(irq >= nr_irqs); + irq_cfg_alloc(irq); + while (entry->next) entry = irq_2_pin + entry->next; @@ -819,7 +923,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) struct irq_cfg *cfg; BUG_ON((unsigned)irq >= nr_irqs); - cfg = &irq_cfg[irq]; + cfg = irq_cfg(irq); /* Only try and allocate irqs on cpus that are present */ cpus_and(mask, mask, cpu_online_map); @@ -893,7 +997,7 @@ static void __clear_irq_vector(int irq) int cpu, vector; BUG_ON((unsigned)irq >= nr_irqs); - cfg = &irq_cfg[irq]; + cfg = irq_cfg(irq); BUG_ON(!cfg->vector); vector = cfg->vector; @@ -913,17 +1017,23 @@ void __setup_vector_irq(int cpu) /* Mark the inuse vectors */ for (irq = 0; irq < nr_irqs; ++irq) { - if (!cpu_isset(cpu, irq_cfg[irq].domain)) + struct irq_cfg *cfg = irq_cfg(irq); + + if (!cpu_isset(cpu, cfg->domain)) continue; - vector = irq_cfg[irq].vector; + vector = cfg->vector; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ for (vector = 0; vector < NR_VECTORS; ++vector) { + struct irq_cfg *cfg; + irq = per_cpu(vector_irq, cpu)[vector]; if (irq < 0) continue; - if (!cpu_isset(cpu, irq_cfg[irq].domain)) + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } } @@ -1029,13 +1139,15 @@ static int setup_ioapic_entry(int apic, int irq, static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, int trigger, int polarity) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct IO_APIC_route_entry entry; cpumask_t mask; if (!IO_APIC_IRQ(irq)) return; + cfg = irq_cfg(irq); + mask = TARGET_CPUS; if (assign_irq_vector(irq, mask)) return; @@ -1553,7 +1665,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) static int ioapic_retrigger_irq(unsigned int irq) { - struct irq_cfg *cfg = &irq_cfg[irq]; + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; spin_lock_irqsave(&vector_lock, flags); @@ -1600,7 +1712,7 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); */ static void migrate_ioapic_irq(int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct irq_desc *desc; cpumask_t tmp, cleanup_mask; struct irte irte; @@ -1618,6 +1730,7 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -1735,7 +1848,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) continue; desc = irq_to_desc(irq); - cfg = irq_cfg + irq; + cfg = irq_cfg(irq); spin_lock(&desc->lock); if (!cfg->move_cleanup_count) goto unlock; @@ -1754,7 +1867,7 @@ unlock: static void irq_complete_move(unsigned int irq) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg = irq_cfg(irq); unsigned vector, me; if (likely(!cfg->move_in_progress)) @@ -1891,7 +2004,10 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < nr_irqs ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) { + struct irq_cfg *cfg; + + cfg = irq_cfg(irq); + if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2028,7 +2144,7 @@ static inline void __init unlock_ExtINT_logic(void) */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = irq_cfg + 0; + struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; unsigned long flags; int no_pin1 = 0; @@ -2306,14 +2422,19 @@ int create_irq(void) int irq; int new; unsigned long flags; + struct irq_cfg *cfg_new; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_cfg[new].vector != 0) + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) continue; + /* check if need to create one */ + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; @@ -2346,7 +2467,7 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; int err; unsigned dest; cpumask_t tmp; @@ -2356,6 +2477,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms if (err) return err; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -2413,7 +2535,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -2426,6 +2548,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2448,7 +2571,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) */ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp, cleanup_mask; struct irte irte; @@ -2464,6 +2587,7 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2670,7 +2794,7 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; @@ -2683,6 +2807,7 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2749,7 +2874,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; struct irq_desc *desc; @@ -2761,6 +2886,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) if (assign_irq_vector(irq, mask)) return; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); @@ -2783,7 +2909,7 @@ static struct irq_chip ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { - struct irq_cfg *cfg = irq_cfg + irq; + struct irq_cfg *cfg; int err; cpumask_t tmp; @@ -2793,6 +2919,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) struct ht_irq_msg msg; unsigned dest; + cfg = irq_cfg(irq); cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); @@ -2891,6 +3018,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; if (skip_ioapic_setup == 1) return; @@ -2906,7 +3034,8 @@ void __init setup_ioapic_dest(void) * when you have too many devices, because at that time only boot * cpu is online. */ - if (!irq_cfg[irq].vector) + cfg = irq_cfg(irq); + if (!cfg->vector) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); -- cgit v1.2.3 From e5a53714acfc7b5f868d07d27c5f02cb00b118db Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:07 -0700 Subject: x86: put irq_2_pin pointer into irq_cfg preallocate 32 irq_2_pin entries, and use get_one_free_irq_2_pin() to get one more and link to irq_cfg if needed. so don't waste one where no irq is enabled. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 161 ++++++++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 858c37a31a2..51ef7eb75f2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -58,10 +58,11 @@ #define __apicdebuginit(type) static type __init struct irq_cfg; - +struct irq_pin_list; struct irq_cfg { unsigned int irq; struct irq_cfg *next; + struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; unsigned move_cleanup_count; @@ -252,13 +253,66 @@ int pin_map_size; * between pins and IRQs. */ -static struct irq_pin_list { - short apic, pin; - int next; -} *irq_2_pin; +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *irq_2_pin_head; +/* fill one page ? */ +static int nr_irq_2_pin = 0x100; +static struct irq_pin_list *irq_2_pin_ptr; +static void __init irq_2_pin_init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_pin_list *pin; + int i; + + pin = *da->name; + + for (i = 1; i < *da->nr; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} +DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin; + int i; + + pin = irq_2_pin_ptr; + + if (pin) { + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); + + if (after_bootmem) + pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, + GFP_ATOMIC); + else + pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * + nr_irq_2_pin, PAGE_SIZE, 0); + + if (!pin) + panic("can not get more irq_2_pin\n"); -DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, sizeof(struct irq_pin_list), NULL); + for (i = 1; i < nr_irq_2_pin; i++) + pin[i-1].next = &pin[i]; + irq_2_pin_ptr = pin->next; + pin->next = NULL; + + return pin; +} struct io_apic { unsigned int index; @@ -300,16 +354,17 @@ static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); spin_lock_irqsave(&ioapic_lock, flags); - entry = irq_2_pin + irq; + entry = cfg->irq_2_pin; for (;;) { unsigned int reg; int pin; - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { @@ -318,7 +373,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) } if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } spin_unlock_irqrestore(&ioapic_lock, flags); @@ -339,21 +394,24 @@ static inline void io_apic_sync(unsigned int apic) \ { \ int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ \ BUG_ON(irq >= nr_irqs); \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ for (;;) { \ unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ + if (!entry) \ break; \ + pin = entry->pin; \ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ io_apic_modify(entry->apic, reg); \ FINAL; \ if (!entry->next) \ break; \ - entry = irq_2_pin + entry->next; \ + entry = entry->next; \ } \ } @@ -416,15 +474,20 @@ static void ioapic_mask_entry(int apic, int pin) static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) { int apic, pin; - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; BUG_ON(irq >= nr_irqs); + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; for (;;) { unsigned int reg; + + if (!entry) + break; + apic = entry->apic; pin = entry->pin; - if (pin == -1) - break; /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. @@ -437,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) io_apic_modify(apic, reg); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } } @@ -480,22 +543,35 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) int first_free_entry; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; BUG_ON(irq >= nr_irqs); - irq_cfg_alloc(irq); + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); + return; + } - while (entry->next) - entry = irq_2_pin + entry->next; + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= pin_map_size) - panic("io_apic.c: ran out of irq_2_pin entries!"); + entry = entry->next; } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; entry->apic = apic; entry->pin = pin; + printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); } /* @@ -505,17 +581,24 @@ static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; - while (1) { + while (entry) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - } - if (!entry->next) + replaced = 1; + /* every one is different, right? */ break; - entry = irq_2_pin + entry->next; + } + entry = entry->next; } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); } @@ -1326,15 +1409,16 @@ __apicdebuginit(void) print_IO_APIC(void) } printk(KERN_DEBUG "IRQ to pin mappings:\n"); for (i = 0; i < nr_irqs; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) + struct irq_cfg *cfg = irq_cfg(i); + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } printk("\n"); } @@ -1495,14 +1579,9 @@ void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; - int i, apic; + int apic; unsigned long flags; - for (i = 0; i < pin_map_size; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } - /* * The number of IO-APIC IRQ registers (== #pins): */ -- cgit v1.2.3 From 7f95ec9e4c12fd067febfd57532da1166d75d858 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:09 -0700 Subject: x86: move kstat_irqs from kstat to irq_desc based on Eric's patch ... together mold it with dyn_array for irq_desc, will allcate kstat_irqs for nr_irq_desc alltogether if needed. -- at that point nr_cpus is known already. v2: make sure system without generic_hardirqs works they don't have irq_desc v3: fix merging v4: [mingo@elte.hu] fix typo [ mingo@elte.hu ] irq: build fix fix: arch/x86/xen/spinlock.c: In function 'xen_spin_lock_slow': arch/x86/xen/spinlock.c:90: error: 'struct kernel_stat' has no member named 'irqs' Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 2 +- arch/x86/kernel/irq_32.c | 4 ++-- arch/x86/kernel/irq_64.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/xen/spinlock.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index c2160cfdec9..204884b1415 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -526,7 +526,7 @@ static void do_irq_balance(void) if (package_index == i) IRQ_DELTA(package_index, j) = 0; /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_cpu(i).irqs[j]; + value_now = (unsigned long) kstat_irqs_cpu(j, i); /* Determine the activity per processor per IRQ */ delta = value_now - LAST_CPU_IRQ(i, j); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index ede513be517..576c5df6cad 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -280,7 +280,7 @@ int show_interrupts(struct seq_file *p, void *v) any_count = kstat_irqs(i); #else for_each_online_cpu(j) - any_count |= kstat_cpu(j).irqs[i]; + any_count |= kstat_irqs_cpu(i, j); #endif action = desc->action; if (!action && !any_count) @@ -290,7 +290,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %8s", desc->chip->name); seq_printf(p, "-%-8s", desc->name); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 738eb65a924..4a0a4eb44dc 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -90,7 +90,7 @@ int show_interrupts(struct seq_file *p, void *v) any_count = kstat_irqs(i); #else for_each_online_cpu(j) - any_count |= kstat_cpu(j).irqs[i]; + any_count |= kstat_irqs_cpu(i, j); #endif action = desc->action; if (!action && !any_count) @@ -100,7 +100,7 @@ int show_interrupts(struct seq_file *p, void *v) seq_printf(p, "%10u ", kstat_irqs(i)); #else for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif seq_printf(p, " %8s", desc->chip->name); seq_printf(p, "-%-8s", desc->name); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 9d85ab38443..817aa55a120 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) /* * handle this 'virtual interrupt' as a Cobalt one now. */ - kstat_cpu(smp_processor_id()).irqs[realirq]++; + kstat_irqs_this_cpu(desc)++; if (likely(desc->action != NULL)) handle_IRQ_event(realirq, desc->action); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index dd71e3a021c..bb6bc721b13 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ - kstat_this_cpu.irqs[irq]++; + kstat_irqs_this_cpu(irq_to_desc(irq))++; out: raw_local_irq_restore(flags); -- cgit v1.2.3 From 2c6927a38f65b53b62f86158fba29a068c4e8b6a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:11 -0700 Subject: irq: replace loop with nr_irqs with for_each_irq_desc There are a handful of loops that go from 0 to nr_irqs and use get_irq_desc() on them. These would allocate all the irq_desc entries, regardless of the need for them. Use the smarter for_each_irq_desc() iterator that will only iterate over the present ones. v2: make sure arch without GENERIC_HARDIRQS work too Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 6 +++--- arch/x86/kernel/irq_64.c | 5 ++--- arch/x86/kernel/irqinit_64.c | 17 +++++------------ 3 files changed, 10 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 51ef7eb75f2..708be9724da 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1871,10 +1871,10 @@ unmask: static void ir_irq_migration(struct work_struct *work) { - int irq; + unsigned int irq; + struct irq_desc *desc; - for (irq = 0; irq < nr_irqs; irq++) { - struct irq_desc *desc = irq_to_desc(irq); + for_each_irq_desc(irq, desc) { if (desc->status & IRQ_MOVE_PENDING) { unsigned long flags; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4a0a4eb44dc..b3cf55e325f 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -224,17 +224,16 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; + struct irq_desc *desc; - for (irq = 0; irq < nr_irqs; irq++) { + for_each_irq_desc(irq, desc) { cpumask_t mask; int break_affinity = 0; int set_affinity = 1; - struct irq_desc *desc; if (irq == 2) continue; - desc = irq_to_desc(irq); /* interrupt's are disabled at this point */ spin_lock(&desc->lock); diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 0744b49b4d1..cd9f42d028d 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -142,25 +142,18 @@ void __init init_ISA_irqs(void) init_bsp_APIC(); init_8259A(0); - for (i = 0; i < nr_irqs; i++) { + for (i = 0; i < 16; i++) { struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; desc->action = NULL; desc->depth = 1; - if (i < 16) { - /* - * 16 old-style INTA-cycle interrupts: - */ - set_irq_chip_and_handler_name(i, &i8259A_chip, + /* + * 16 old-style INTA-cycle interrupts: + */ + set_irq_chip_and_handler_name(i, &i8259A_chip, handle_level_irq, "XT"); - } else { - /* - * 'high' PCI IRQs filled in on demand - */ - desc->chip = &no_irq_chip; - } } } -- cgit v1.2.3 From 46b8214d12c274bd4265aae482ab7ffe69d94818 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:13 -0700 Subject: x86, ioapic: replace loop with nr_irqs with for_each_irq_icfg Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 708be9724da..60d60061659 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -129,6 +129,9 @@ static void __init init_work(void *data) cfg[i-1].next = &cfg[i]; } +#define for_each_irq_cfg(cfg) \ + for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next) + static struct irq_cfg *irq_cfgx; DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); @@ -1097,20 +1100,18 @@ void __setup_vector_irq(int cpu) /* Initialize vector_irq on a new cpu */ /* This function must be called with vector_lock held */ int irq, vector; + struct irq_cfg *cfg; /* Mark the inuse vectors */ - for (irq = 0; irq < nr_irqs; ++irq) { - struct irq_cfg *cfg = irq_cfg(irq); - + for_each_irq_cfg(cfg) { if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; + irq = cfg->irq; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ for (vector = 0; vector < NR_VECTORS; ++vector) { - struct irq_cfg *cfg; - irq = per_cpu(vector_irq, cpu)[vector]; if (irq < 0) continue; @@ -1340,6 +1341,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; unsigned long flags; + struct irq_cfg *cfg; if (apic_verbosity == APIC_QUIET) return; @@ -1408,12 +1410,11 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < nr_irqs; i++) { - struct irq_cfg *cfg = irq_cfg(i); + for_each_irq_cfg(cfg) { struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", cfg->irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -2070,6 +2071,7 @@ static inline void init_IO_APIC_traps(void) { int irq; struct irq_desc *desc; + struct irq_cfg *cfg; /* * NOTE! The local APIC isn't very good at handling @@ -2082,10 +2084,8 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < nr_irqs ; irq++) { - struct irq_cfg *cfg; - - cfg = irq_cfg(irq); + for_each_irq_cfg(cfg) { + irq = cfg->irq; if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, -- cgit v1.2.3 From 7d94f7ca401dd7f445fda9a971a48aa5427b3e55 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:14 -0700 Subject: irq: remove >= nr_irqs checking with config_have_sparse_irq remove irq limit checks - nr_irqs is dynamic and we expand anytime. v2: fix checking about result irq_cfg_without_new, so could use msi again v3: use irq_desc_without_new to check irq is valid Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 9 --------- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 60d60061659..1b8cccb5ba2 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -400,7 +400,6 @@ static inline void io_apic_sync(unsigned int apic) struct irq_cfg *cfg; \ struct irq_pin_list *entry; \ \ - BUG_ON(irq >= nr_irqs); \ cfg = irq_cfg(irq); \ entry = cfg->irq_2_pin; \ for (;;) { \ @@ -480,7 +479,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) struct irq_cfg *cfg; struct irq_pin_list *entry; - BUG_ON(irq >= nr_irqs); cfg = irq_cfg(irq); entry = cfg->irq_2_pin; for (;;) { @@ -549,7 +547,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) struct irq_cfg *cfg; struct irq_pin_list *entry; - BUG_ON(irq >= nr_irqs); /* first time to refer irq_cfg, so with new */ cfg = irq_cfg_alloc(irq); entry = cfg->irq_2_pin; @@ -841,7 +838,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) best_guess = irq; } } - BUG_ON(best_guess >= nr_irqs); return best_guess; } @@ -973,7 +969,6 @@ static int pin_2_irq(int idx, int apic, int pin) irq += nr_ioapic_registers[i++]; irq += pin; } - BUG_ON(irq >= nr_irqs); return irq; } @@ -1008,7 +1003,6 @@ static int __assign_irq_vector(int irq, cpumask_t mask) int cpu; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= nr_irqs); cfg = irq_cfg(irq); /* Only try and allocate irqs on cpus that are present */ @@ -1082,7 +1076,6 @@ static void __clear_irq_vector(int irq) cpumask_t mask; int cpu, vector; - BUG_ON((unsigned)irq >= nr_irqs); cfg = irq_cfg(irq); BUG_ON(!cfg->vector); @@ -1924,8 +1917,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) struct irq_desc *desc; struct irq_cfg *cfg; irq = __get_cpu_var(vector_irq)[vector]; - if (irq >= nr_irqs) - continue; desc = irq_to_desc(irq); cfg = irq_cfg(irq); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index b3cf55e325f..a3e36336d91 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -202,7 +202,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(irq < nr_irqs)) + if (likely(__irq_to_desc(irq))) generic_handle_irq(irq); else { if (!disable_apic) -- cgit v1.2.3 From 46926b67fc663d357a1a8174328998a9e49da0b8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:15 -0700 Subject: generic: add irq_desc in function in parameter So we could remove some duplicated calling to irq_desc v2: make sure irq_desc in init/main.c is not used without generic_hardirqs Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index a3e36336d91..f58b995b30e 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -189,6 +189,7 @@ u64 arch_irq_stat(void) asmlinkage unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; /* high bit used in ret_from_ code */ unsigned vector = ~regs->orig_ax; @@ -202,8 +203,9 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - if (likely(__irq_to_desc(irq))) - generic_handle_irq(irq); + desc = __irq_to_desc(irq); + if (likely(desc)) + generic_handle_irq_desc(irq, desc); else { if (!disable_apic) ack_APIC_irq(); -- cgit v1.2.3 From 1d5f6b36c4736af1dac396d6267eb53dcc8c0021 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:16 -0700 Subject: x86: check with without_new in show_interrupts so we don't get new one that we don't need it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f58b995b30e..f337f87c1e1 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,7 +83,10 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; - struct irq_desc *desc = irq_to_desc(i); + struct irq_desc *desc = __irq_to_desc(i); + + if (!desc) + return 0; spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP -- cgit v1.2.3 From cb5bc83225a86ca53bbb889ed8439e4fd6cf44ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:17 -0700 Subject: x86_64: rename irq_desc/irq_desc_alloc change names: irq_desc() ==> irq_desc_alloc __irq_desc() ==> irq_desc Also split a few of the uses in lowlevel x86 code. v2: need to check if desc is null in smp_irq_move_cleanup Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 10 +++++++++- arch/x86/kernel/irq_64.c | 4 ++-- arch/x86/kernel/irqinit_64.c | 3 ++- 3 files changed, 13 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 1b8cccb5ba2..a054db9ef19 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1124,7 +1124,12 @@ static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; - desc = irq_to_desc(irq); + /* first time to use this irq_desc */ + if (irq < 16) + desc = irq_to_desc(irq); + else + desc = irq_to_desc_alloc(irq); + if (trigger) desc->status |= IRQ_LEVEL; else @@ -1919,6 +1924,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) irq = __get_cpu_var(vector_irq)[vector]; desc = irq_to_desc(irq); + if (!desc) + continue; + cfg = irq_cfg(irq); spin_lock(&desc->lock); if (!cfg->move_cleanup_count) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index f337f87c1e1..5d5976e0311 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -83,7 +83,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i < nr_irqs) { unsigned any_count = 0; - struct irq_desc *desc = __irq_to_desc(i); + struct irq_desc *desc = irq_to_desc(i); if (!desc) return 0; @@ -206,7 +206,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) stack_overflow_check(regs); #endif - desc = __irq_to_desc(irq); + desc = irq_to_desc(irq); if (likely(desc)) generic_handle_irq_desc(irq, desc); else { diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index cd9f42d028d..d17fbc26d96 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -143,7 +143,8 @@ void __init init_ISA_irqs(void) init_8259A(0); for (i = 0; i < 16; i++) { - struct irq_desc *desc = irq_to_desc(i); + /* first time call this irq_desc */ + struct irq_desc *desc = irq_to_desc_alloc(i); desc->status = IRQ_DISABLED; desc->action = NULL; -- cgit v1.2.3 From a2f9f43858db64cb8b45c4f6746d7a52b80d4dcb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:19 -0700 Subject: x86_64: separate irq_cfgx from irq_cfgx_free so later don't need to compare with -1U Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 92 ++++++++++++++++++++++++++++---------------- 1 file changed, 59 insertions(+), 33 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index a054db9ef19..8ab7ae01773 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -111,44 +111,44 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); } +static struct irq_cfg *irq_cfgx; +static struct irq_cfg *irq_cfgx_free; static void __init init_work(void *data) { struct dyn_array *da = data; struct irq_cfg *cfg; + int legacy_count; int i; cfg = *da->name; memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - i = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); - for (; i < *da->nr; i++) + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); for (i = 1; i < *da->nr; i++) cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = &irq_cfgx[legacy_count]; + irq_cfgx[legacy_count - 1].next = NULL; } #define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg && cfg->irq != -1U; cfg = cfg->next) + for (cfg = irq_cfgx; cfg; cfg = cfg->next) -static struct irq_cfg *irq_cfgx; DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); static struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg; - BUG_ON(irq == -1U); - - cfg = &irq_cfgx[0]; + cfg = irq_cfgx; while (cfg) { if (cfg->irq == irq) return cfg; - if (cfg->irq == -1U) - return NULL; - cfg = cfg->next; } @@ -161,44 +161,70 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) int i; int count = 0; - BUG_ON(irq == -1U); - - cfg_pri = cfg = &irq_cfgx[0]; + cfg_pri = cfg = irq_cfgx; while (cfg) { if (cfg->irq == irq) return cfg; - if (cfg->irq == -1U) { - cfg->irq = irq; - return cfg; - } cfg_pri = cfg; cfg = cfg->next; count++; } - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + if (!irq_cfgx_free) { + unsigned long phys; + unsigned long total_bytes; + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - if (after_bootmem) - cfg = kzalloc(sizeof(struct irq_cfg)*nr_irq_cfg, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(sizeof(struct irq_cfg)*nr_irq_cfg, PAGE_SIZE, 0); + total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; + if (after_bootmem) + cfg = kzalloc(total_bytes, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); + phys = __pa(cfg); + printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); - cfg->irq = irq; - cfg_pri->next = cfg; + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = cfg; + } + cfg = irq_cfgx_free; + irq_cfgx_free = irq_cfgx_free->next; + cfg->next = NULL; + if (cfg_pri) + cfg_pri->next = cfg; + else + irq_cfgx = cfg; + cfg->irq = irq; + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); +#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG + { + /* dump the results */ + struct irq_cfg *cfg; + unsigned long phys; + unsigned long bytes = sizeof(struct irq_cfg); + + printk(KERN_DEBUG "=========================== %d\n", irq); + printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); + for_each_irq_cfg(cfg) { + phys = __pa(cfg); + printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); + } + printk(KERN_DEBUG "===========================\n"); + } +#endif return cfg; } -- cgit v1.2.3 From 52b17329d6d0a4824b89206803a430915031ff23 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:20 -0700 Subject: x86_64: make /proc/interrupts work with dyn irq_desc loop with irq_desc list Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_64.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 5d5976e0311..7bd841a9c64 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -70,9 +70,27 @@ static inline void stack_overflow_check(struct pt_regs *regs) int show_interrupts(struct seq_file *p, void *v) { - int i = *(loff_t *) v, j; + int i, j; struct irqaction * action; unsigned long flags; + unsigned int entries; + struct irq_desc *desc; + int tail = 0; + +#ifdef CONFIG_HAVE_SPARSE_IRQ + desc = (struct irq_desc *)v; + entries = -1U; + i = desc->irq; + if (!desc->next) + tail = 1; +#else + entries = nr_irqs - 1; + i = *(loff_t *) v; + if (i == nr_irqs) + tail = 1; + else + desc = irq_to_desc(i); +#endif if (i == 0) { seq_printf(p, " "); @@ -81,12 +99,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < nr_irqs) { + if (i <= entries) { unsigned any_count = 0; - struct irq_desc *desc = irq_to_desc(i); - - if (!desc) - return 0; spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP @@ -116,7 +130,9 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == nr_irqs) { + } + + if (tail) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); @@ -155,6 +171,7 @@ skip: seq_printf(p, " Spurious interrupts\n"); seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); } + return 0; } -- cgit v1.2.3 From 6d50bc26836e16a9589e0b128d527c29e30d722a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:22 -0700 Subject: x86: use 28 bits irq NR for pci msi/msix and ht also print out irq no in /proc/interrups and /proc/stat in hex, so could tell bus/dev/func. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 64 ++++++++++++++++++++++++++++++++++---------- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 51 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 8ab7ae01773..b0d4abc55a1 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -2520,17 +2520,21 @@ device_initcall(ioapic_init_sysfs); /* * Dynamic irq allocate and deallocation */ -int create_irq(void) +unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - int irq; - int new; + unsigned int irq; + unsigned int new; unsigned long flags; struct irq_cfg *cfg_new; - irq = -ENOSPC; +#ifndef CONFIG_HAVE_SPARSE_IRQ + irq_want = nr_irqs - 1; +#endif + + irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (nr_irqs - 1); new >= 0; new--) { + for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; cfg_new = irq_cfg(new); @@ -2545,12 +2549,24 @@ int create_irq(void) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq >= 0) { + if (irq > 0) { dynamic_irq_init(irq); } return irq; } +int create_irq(void) +{ + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; +} + void destroy_irq(unsigned int irq) { unsigned long flags; @@ -2803,13 +2819,29 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) return 0; } +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - int irq, ret; + unsigned int irq; + int ret; + unsigned int irq_want; - irq = create_irq(); - if (irq < 0) - return irq; + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + if (irq == 0) + return -1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) @@ -2836,18 +2868,22 @@ error: int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, ret, sub_handle; + unsigned int irq; + int ret, sub_handle; struct msi_desc *desc; + unsigned int irq_want; + #ifdef CONFIG_INTR_REMAP struct intel_iommu *iommu = 0; int index = 0; #endif + irq_want = build_irq_for_pci_dev(dev) + 0x100; sub_handle = 0; list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq(); - if (irq < 0) - return irq; + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; #ifdef CONFIG_INTR_REMAP if (!intr_remapping_enabled) goto no_ir; diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 7bd841a9c64..348a11168c2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%3d: ",i); + seq_printf(p, "%#x: ",i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else -- cgit v1.2.3 From 8b8e8c1bf7275eca859fe551dfa484134eaf013b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:23 -0700 Subject: x86: remove irqbalance in kernel for 32 bit This has been deprecated for years, the user space irqbalanced utility works better with numa, has configurable policies, etc... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 8 - arch/x86/configs/i386_defconfig | 1 - arch/x86/kernel/io_apic_32.c | 402 ---------------------------------------- arch/x86/kernel/quirks.c | 3 - 4 files changed, 414 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1004888e9b1..3e0eaaa1a33 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,14 +1254,6 @@ config EFI resultant kernel should continue to boot on existing non-EFI platforms. -config IRQBALANCE - def_bool y - prompt "Enable kernel irq balancing" - depends on X86_32 && SMP && X86_IO_APIC - help - The default yes will allow the kernel to do irq load balancing. - Saying no will keep the kernel from doing irq load balancing. - config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 52d0359719d..13b8c86ae98 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -287,7 +287,6 @@ CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set CONFIG_X86_PAT=y CONFIG_EFI=y -# CONFIG_IRQBALANCE is not set CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set # CONFIG_HZ_250 is not set diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 204884b1415..668edf22606 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -371,408 +371,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) spin_unlock_irqrestore(&ioapic_lock, flags); } -#if defined(CONFIG_IRQBALANCE) -# include /* kernel_thread() */ -# include /* kstat */ -# include /* kmalloc() */ -# include - -#define IRQBALANCE_CHECK_ARCH -999 -#define MAX_BALANCED_IRQ_INTERVAL (5*HZ) -#define MIN_BALANCED_IRQ_INTERVAL (HZ/2) -#define BALANCED_IRQ_MORE_DELTA (HZ/10) -#define BALANCED_IRQ_LESS_DELTA (HZ) - -static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH; -static int physical_balance __read_mostly; -static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL; - -static struct irq_cpu_info { - unsigned long *last_irq; - unsigned long *irq_delta; - unsigned long irq; -} irq_cpu_data[NR_CPUS]; - -#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq) -#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq]) -#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq]) - -#define IDLE_ENOUGH(cpu,now) \ - (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1)) - -#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) - -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) - -static cpumask_t balance_irq_affinity_init __initdata = CPU_MASK_ALL; - -static cpumask_t *balance_irq_affinity; - - -static void __init irq_affinity_init_work(void *data) -{ - struct dyn_array *da = data; - - int i; - struct balance_irq_affinity *affinity; - - affinity = *da->name; - - for (i = 0; i < *da->nr; i++) - memcpy(&affinity[i], &balance_irq_affinity_init, - sizeof(struct balance_irq_affinity)); - -} - -DEFINE_DYN_ARRAY(balance_irq_affinity, sizeof(struct balance_irq_affinity), nr_irqs, PAGE_SIZE, irq_affinity_init_work); - - -void set_balance_irq_affinity(unsigned int irq, cpumask_t mask) -{ - balance_irq_affinity[irq] = mask; -} - -static unsigned long move(int curr_cpu, cpumask_t allowed_mask, - unsigned long now, int direction) -{ - int search_idle = 1; - int cpu = curr_cpu; - - goto inside; - - do { - if (unlikely(cpu == curr_cpu)) - search_idle = 0; -inside: - if (direction == 1) { - cpu++; - if (cpu >= NR_CPUS) - cpu = 0; - } else { - cpu--; - if (cpu == -1) - cpu = NR_CPUS-1; - } - } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) || - (search_idle && !IDLE_ENOUGH(cpu, now))); - - return cpu; -} - -static inline void balance_irq(int cpu, int irq) -{ - unsigned long now = jiffies; - cpumask_t allowed_mask; - unsigned int new_cpu; - - if (irqbalance_disabled) - return; - - cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]); - new_cpu = move(cpu, allowed_mask, now, 1); - if (cpu != new_cpu) - set_pending_irq(irq, cpumask_of_cpu(new_cpu)); -} - -static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) -{ - int i, j; - struct irq_desc *desc; - - for_each_online_cpu(i) { - for (j = 0; j < nr_irqs; j++) { - desc = irq_to_desc(j); - if (!desc->action) - continue; - /* Is it a significant load ? */ - if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) < - useful_load_threshold) - continue; - balance_irq(i, j); - } - } - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; -} - -static void do_irq_balance(void) -{ - int i, j; - unsigned long max_cpu_irq = 0, min_cpu_irq = (~0); - unsigned long move_this_load = 0; - int max_loaded = 0, min_loaded = 0; - int load; - unsigned long useful_load_threshold = balanced_irq_interval + 10; - int selected_irq; - int tmp_loaded, first_attempt = 1; - unsigned long tmp_cpu_irq; - unsigned long imbalance = 0; - cpumask_t allowed_mask, target_cpu_mask, tmp; - struct irq_desc *desc; - - for_each_possible_cpu(i) { - int package_index; - CPU_IRQ(i) = 0; - if (!cpu_online(i)) - continue; - package_index = CPU_TO_PACKAGEINDEX(i); - for (j = 0; j < nr_irqs; j++) { - unsigned long value_now, delta; - /* Is this an active IRQ or balancing disabled ? */ - desc = irq_to_desc(j); - if (!desc->action || irq_balancing_disabled(j)) - continue; - if (package_index == i) - IRQ_DELTA(package_index, j) = 0; - /* Determine the total count per processor per IRQ */ - value_now = (unsigned long) kstat_irqs_cpu(j, i); - - /* Determine the activity per processor per IRQ */ - delta = value_now - LAST_CPU_IRQ(i, j); - - /* Update last_cpu_irq[][] for the next time */ - LAST_CPU_IRQ(i, j) = value_now; - - /* Ignore IRQs whose rate is less than the clock */ - if (delta < useful_load_threshold) - continue; - /* update the load for the processor or package total */ - IRQ_DELTA(package_index, j) += delta; - - /* Keep track of the higher numbered sibling as well */ - if (i != package_index) - CPU_IRQ(i) += delta; - /* - * We have sibling A and sibling B in the package - * - * cpu_irq[A] = load for cpu A + load for cpu B - * cpu_irq[B] = load for cpu B - */ - CPU_IRQ(package_index) += delta; - } - } - /* Find the least loaded processor package */ - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (min_cpu_irq > CPU_IRQ(i)) { - min_cpu_irq = CPU_IRQ(i); - min_loaded = i; - } - } - max_cpu_irq = ULONG_MAX; - -tryanothercpu: - /* - * Look for heaviest loaded processor. - * We may come back to get the next heaviest loaded processor. - * Skip processors with trivial loads. - */ - tmp_cpu_irq = 0; - tmp_loaded = -1; - for_each_online_cpu(i) { - if (i != CPU_TO_PACKAGEINDEX(i)) - continue; - if (max_cpu_irq <= CPU_IRQ(i)) - continue; - if (tmp_cpu_irq < CPU_IRQ(i)) { - tmp_cpu_irq = CPU_IRQ(i); - tmp_loaded = i; - } - } - - if (tmp_loaded == -1) { - /* - * In the case of small number of heavy interrupt sources, - * loading some of the cpus too much. We use Ingo's original - * approach to rotate them around. - */ - if (!first_attempt && imbalance >= useful_load_threshold) { - rotate_irqs_among_cpus(useful_load_threshold); - return; - } - goto not_worth_the_effort; - } - - first_attempt = 0; /* heaviest search */ - max_cpu_irq = tmp_cpu_irq; /* load */ - max_loaded = tmp_loaded; /* processor */ - imbalance = (max_cpu_irq - min_cpu_irq) / 2; - - /* - * if imbalance is less than approx 10% of max load, then - * observe diminishing returns action. - quit - */ - if (imbalance < (max_cpu_irq >> 3)) - goto not_worth_the_effort; - -tryanotherirq: - /* if we select an IRQ to move that can't go where we want, then - * see if there is another one to try. - */ - move_this_load = 0; - selected_irq = -1; - for (j = 0; j < nr_irqs; j++) { - /* Is this an active IRQ? */ - desc = irq_to_desc(j); - if (!desc->action) - continue; - if (imbalance <= IRQ_DELTA(max_loaded, j)) - continue; - /* Try to find the IRQ that is closest to the imbalance - * without going over. - */ - if (move_this_load < IRQ_DELTA(max_loaded, j)) { - move_this_load = IRQ_DELTA(max_loaded, j); - selected_irq = j; - } - } - if (selected_irq == -1) - goto tryanothercpu; - - imbalance = move_this_load; - - /* For physical_balance case, we accumulated both load - * values in the one of the siblings cpu_irq[], - * to use the same code for physical and logical processors - * as much as possible. - * - * NOTE: the cpu_irq[] array holds the sum of the load for - * sibling A and sibling B in the slot for the lowest numbered - * sibling (A), _AND_ the load for sibling B in the slot for - * the higher numbered sibling. - * - * We seek the least loaded sibling by making the comparison - * (A+B)/2 vs B - */ - load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { - if (load > CPU_IRQ(j)) { - /* This won't change cpu_sibling_map[min_loaded] */ - load = CPU_IRQ(j); - min_loaded = j; - } - } - - cpus_and(allowed_mask, - cpu_online_map, - balance_irq_affinity[selected_irq]); - target_cpu_mask = cpumask_of_cpu(min_loaded); - cpus_and(tmp, target_cpu_mask, allowed_mask); - - if (!cpus_empty(tmp)) { - /* mark for change destination */ - set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); - - /* Since we made a change, come back sooner to - * check for more variation. - */ - balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL, - balanced_irq_interval - BALANCED_IRQ_LESS_DELTA); - return; - } - goto tryanotherirq; - -not_worth_the_effort: - /* - * if we did not find an IRQ to move, then adjust the time interval - * upward - */ - balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL, - balanced_irq_interval + BALANCED_IRQ_MORE_DELTA); - return; -} - -static int balanced_irq(void *unused) -{ - int i; - unsigned long prev_balance_time = jiffies; - long time_remaining = balanced_irq_interval; - struct irq_desc *desc; - - /* push everything to CPU 0 to give us a starting point. */ - for (i = 0 ; i < nr_irqs ; i++) { - desc = irq_to_desc(i); - desc->pending_mask = cpumask_of_cpu(0); - set_pending_irq(i, cpumask_of_cpu(0)); - } - - set_freezable(); - for ( ; ; ) { - time_remaining = schedule_timeout_interruptible(time_remaining); - try_to_freeze(); - if (time_after(jiffies, - prev_balance_time+balanced_irq_interval)) { - preempt_disable(); - do_irq_balance(); - prev_balance_time = jiffies; - time_remaining = balanced_irq_interval; - preempt_enable(); - } - } - return 0; -} - -static int __init balanced_irq_init(void) -{ - int i; - struct cpuinfo_x86 *c; - cpumask_t tmp; - - cpus_shift_right(tmp, cpu_online_map, 2); - c = &boot_cpu_data; - /* When not overwritten by the command line ask subarchitecture. */ - if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH) - irqbalance_disabled = NO_BALANCE_IRQ; - if (irqbalance_disabled) - return 0; - - /* disable irqbalance completely if there is only one processor online */ - if (num_online_cpus() < 2) { - irqbalance_disabled = 1; - return 0; - } - /* - * Enable physical balance only if more than 1 physical processor - * is present - */ - if (smp_num_siblings > 1 && !cpus_empty(tmp)) - physical_balance = 1; - - for_each_online_cpu(i) { - irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); - irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * nr_irqs, GFP_KERNEL); - if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { - printk(KERN_ERR "balanced_irq_init: out of memory"); - goto failed; - } - } - - printk(KERN_INFO "Starting balanced_irq\n"); - if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) - return 0; - printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); -failed: - for_each_possible_cpu(i) { - kfree(irq_cpu_data[i].irq_delta); - irq_cpu_data[i].irq_delta = NULL; - kfree(irq_cpu_data[i].last_irq); - irq_cpu_data[i].last_irq = NULL; - } - return 0; -} - -int __devinit irqbalance_disable(char *str) -{ - irqbalance_disabled = 1; - return 1; -} - -__setup("noirqbalance", irqbalance_disable); - -late_initcall(balanced_irq_init); -#endif /* CONFIG_IRQBALANCE */ #endif /* CONFIG_SMP */ #ifndef CONFIG_SMP diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index f6a11b9b1f9..67465ed8931 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -35,9 +35,6 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) if (!(word & (1 << 13))) { dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " "disabling irq balancing and affinity\n"); -#ifdef CONFIG_IRQBALANCE - irqbalance_disable(""); -#endif noirqdebug_setup(""); #ifdef CONFIG_PROC_FS no_irq_affinity = 1; -- cgit v1.2.3 From a1420f395d7721895c05ba3dedf150294d2c0e4d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:24 -0700 Subject: x86: add irq_cfg for 32bit it only contains vector ... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 71 ++++++++++++++++++++++++++++---------------- 1 file changed, 46 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 668edf22606..033ad953bee 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -94,6 +94,43 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); static int disable_timer_pin_1 __initdata; +struct irq_cfg { + u8 vector; +}; + + +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +static struct irq_cfg irq_cfg_legacy[] __initdata = { + [0] = { .vector = FIRST_DEVICE_VECTOR, }, +}; + +static void __init init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_cfg *cfg; + int legacy_count; + int i; + + cfg = *da->name; + + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + + BUG_ON(legacy_count > nr_irqs); + + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); +} + +static struct irq_cfg *irq_cfgx; +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + if (irq >= nr_irqs) + return NULL; + + return &irq_cfgx[irq]; +} + /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -794,22 +831,6 @@ static inline int IO_APIC_irq_trigger(int irq) return 0; } -/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */ -static u8 irq_vector_init_first __initdata = FIRST_DEVICE_VECTOR; -static u8 *irq_vector; - -static void __init irq_vector_init_work(void *data) -{ - struct dyn_array *da = data; - - u8 *irq_vec; - - irq_vec = *da->name; - - irq_vec[0] = irq_vector_init_first; -} - -DEFINE_DYN_ARRAY(irq_vector, sizeof(u8), nr_irqs, PAGE_SIZE, irq_vector_init_work); static int __assign_irq_vector(int irq) { @@ -818,8 +839,8 @@ static int __assign_irq_vector(int irq) BUG_ON((unsigned)irq >= nr_irqs); - if (irq_vector[irq] > 0) - return irq_vector[irq]; + if (irq_cfg(irq)->vector > 0) + return irq_cfg(irq)->vector; vector = current_vector; offset = current_offset; @@ -836,7 +857,7 @@ next: current_vector = vector; current_offset = offset; - irq_vector[irq] = vector; + irq_cfg(irq)->vector = vector; return vector; } @@ -1598,7 +1619,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro */ - i = irq_vector[irq]; + i = irq_cfg(irq)->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); @@ -1615,7 +1636,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_vector[irq]); + send_IPI_self(irq_cfg(irq)->vector); return 1; } @@ -1651,7 +1672,7 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for (irq = 0; irq < nr_irqs ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_vector[irq]) { + if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2102,7 +2123,7 @@ int create_irq(void) for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_vector[new] != 0) + if (irq_cfg(new)->vector != 0) continue; vector = __assign_irq_vector(new); if (likely(vector > 0)) @@ -2125,8 +2146,8 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - clear_bit(irq_vector[irq], used_vectors); - irq_vector[irq] = 0; + clear_bit(irq_cfg(irq)->vector, used_vectors); + irq_cfg(irq)->vector = 0; spin_unlock_irqrestore(&vector_lock, flags); } -- cgit v1.2.3 From da51a821314dd0ec7126f2bf9a62117146fbb6b9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:25 -0700 Subject: x86: make 32bit use irq_cfg_alloc, etc Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 180 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 160 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 033ad953bee..5a83d7f5b14 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -94,41 +94,172 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); static int disable_timer_pin_1 __initdata; +struct irq_cfg; + struct irq_cfg { + unsigned int irq; + struct irq_cfg *next; u8 vector; }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .vector = FIRST_DEVICE_VECTOR, }, + [0] = { .irq = 0, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .vector = IRQ15_VECTOR, }, }; +static struct irq_cfg irq_cfg_init = { .irq = -1U, }; +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; +} + +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +static void init_one_irq_cfg(struct irq_cfg *cfg) +{ + memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); +} + +static struct irq_cfg *irq_cfgx; +static struct irq_cfg *irq_cfgx_free; static void __init init_work(void *data) { - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; + struct dyn_array *da = data; + struct irq_cfg *cfg; + int legacy_count; + int i; + + cfg = *da->name; - cfg = *da->name; + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (i = legacy_count; i < *da->nr; i++) + init_one_irq_cfg(&cfg[i]); - BUG_ON(legacy_count > nr_irqs); + for (i = 1; i < *da->nr; i++) + cfg[i-1].next = &cfg[i]; - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + irq_cfgx_free = &irq_cfgx[legacy_count]; + irq_cfgx[legacy_count - 1].next = NULL; } -static struct irq_cfg *irq_cfgx; -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); +#define for_each_irq_cfg(cfg) \ + for (cfg = irq_cfgx; cfg; cfg = cfg->next) + +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); static struct irq_cfg *irq_cfg(unsigned int irq) { - if (irq >= nr_irqs) - return NULL; + struct irq_cfg *cfg; + + cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; - return &irq_cfgx[irq]; + cfg = cfg->next; + } + + return NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + struct irq_cfg *cfg, *cfg_pri; + int i; + int count = 0; + + cfg_pri = cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + cfg_pri = cfg; + cfg = cfg->next; + count++; + } + + if (!irq_cfgx_free) { + unsigned long phys; + unsigned long total_bytes; + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + + total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; + if (after_bootmem) + cfg = kzalloc(total_bytes, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); + + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); + + phys = __pa(cfg); + printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); + + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = cfg; + } + + cfg = irq_cfgx_free; + irq_cfgx_free = irq_cfgx_free->next; + cfg->next = NULL; + if (cfg_pri) + cfg_pri->next = cfg; + else + irq_cfgx = cfg; + cfg->irq = irq; + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); + +#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG + { + /* dump the results */ + struct irq_cfg *cfg; + unsigned long phys; + unsigned long bytes = sizeof(struct irq_cfg); + + printk(KERN_DEBUG "=========================== %d\n", irq); + printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); + for_each_irq_cfg(cfg) { + phys = __pa(cfg); + printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); + } + printk(KERN_DEBUG "===========================\n"); + } +#endif + return cfg; } /* @@ -254,6 +385,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { struct irq_pin_list *entry = irq_2_pin + irq; + irq_cfg_alloc(irq); while (entry->next) entry = irq_2_pin + entry->next; @@ -836,11 +968,13 @@ static int __assign_irq_vector(int irq) { static int current_vector = FIRST_DEVICE_VECTOR, current_offset; int vector, offset; + struct irq_cfg *cfg; BUG_ON((unsigned)irq >= nr_irqs); - if (irq_cfg(irq)->vector > 0) - return irq_cfg(irq)->vector; + cfg = irq_cfg(irq); + if (cfg->vector > 0) + return cfg->vector; vector = current_vector; offset = current_offset; @@ -857,7 +991,7 @@ next: current_vector = vector; current_offset = offset; - irq_cfg(irq)->vector = vector; + cfg->vector = vector; return vector; } @@ -1659,6 +1793,7 @@ static inline void init_IO_APIC_traps(void) { int irq; struct irq_desc *desc; + struct irq_cfg *cfg; /* * NOTE! The local APIC isn't very good at handling @@ -1671,8 +1806,9 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for (irq = 0; irq < nr_irqs ; irq++) { - if (IO_APIC_IRQ(irq) && !irq_cfg(irq)->vector) { + for_each_irq_cfg(cfg) { + irq = cfg->irq; + if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, * so default to an old-fashioned 8259 @@ -2117,14 +2253,18 @@ int create_irq(void) /* Allocate an unused irq */ int irq, new, vector = 0; unsigned long flags; + struct irq_cfg *cfg_new; irq = -ENOSPC; spin_lock_irqsave(&vector_lock, flags); for (new = (nr_irqs - 1); new >= 0; new--) { if (platform_legacy_irq(new)) continue; - if (irq_cfg(new)->vector != 0) + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) continue; + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); vector = __assign_irq_vector(new); if (likely(vector > 0)) irq = new; -- cgit v1.2.3 From 0f978f4505e96227a89b3c9447552aca983c6b57 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:26 -0700 Subject: x86: make 32bit to use irq_2_pin in irq_cfg so it is more like 64 bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 152 +++++++++++++++++++++++++++++++++---------- 1 file changed, 117 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 5a83d7f5b14..59d2e8a273e 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -95,10 +95,11 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); static int disable_timer_pin_1 __initdata; struct irq_cfg; - +struct irq_pin_list; struct irq_cfg { unsigned int irq; struct irq_cfg *next; + struct irq_pin_list *irq_2_pin; u8 vector; }; @@ -275,11 +276,66 @@ int pin_map_size; * between pins and IRQs. */ -static struct irq_pin_list { - int apic, pin, next; -} *irq_2_pin; +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *irq_2_pin_head; +/* fill one page ? */ +static int nr_irq_2_pin = 0x100; +static struct irq_pin_list *irq_2_pin_ptr; +static void __init irq_2_pin_init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_pin_list *pin; + int i; + + pin = *da->name; + + for (i = 1; i < *da->nr; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} +DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin; + int i; + + pin = irq_2_pin_ptr; + + if (pin) { + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); + + if (after_bootmem) + pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, + GFP_ATOMIC); + else + pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * + nr_irq_2_pin, PAGE_SIZE, 0); + + if (!pin) + panic("can not get more irq_2_pin\n"); -DEFINE_DYN_ARRAY(irq_2_pin, sizeof(struct irq_pin_list), pin_map_size, 16, NULL); + for (i = 1; i < nr_irq_2_pin; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = pin->next; + pin->next = NULL; + + return pin; +} struct io_apic { unsigned int index; @@ -383,20 +439,34 @@ static void ioapic_mask_entry(int apic, int pin) */ static void add_pin_to_irq(unsigned int irq, int apic, int pin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); + return; + } - irq_cfg_alloc(irq); - while (entry->next) - entry = irq_2_pin + entry->next; + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; - if (entry->pin != -1) { - entry->next = first_free_entry; - entry = irq_2_pin + entry->next; - if (++first_free_entry >= pin_map_size) - panic("io_apic.c: whoops"); + entry = entry->next; } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; entry->apic = apic; entry->pin = pin; + printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); } /* @@ -406,35 +476,45 @@ static void __init replace_pin_at_irq(unsigned int irq, int oldapic, int oldpin, int newapic, int newpin) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; - while (1) { + while (entry) { if (entry->apic == oldapic && entry->pin == oldpin) { entry->apic = newapic; entry->pin = newpin; - } - if (!entry->next) + replaced = 1; + /* every one is different, right? */ break; - entry = irq_2_pin + entry->next; + } + entry = entry->next; } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); } static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) { - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_cfg *cfg; + struct irq_pin_list *entry; unsigned int pin, reg; + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; for (;;) { - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; reg = io_apic_read(entry->apic, 0x10 + pin*2); reg &= ~disable; reg |= enable; io_apic_modify(entry->apic, 0x10 + pin*2, reg); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } } @@ -509,13 +589,18 @@ static void clear_IO_APIC(void) #ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { + struct irq_cfg *cfg; unsigned long flags; int pin; - struct irq_pin_list *entry = irq_2_pin + irq; + struct irq_pin_list *entry; unsigned int apicid_value; cpumask_t tmp; struct irq_desc *desc; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + cpus_and(tmp, cpumask, cpu_online_map); if (cpus_empty(tmp)) tmp = TARGET_CPUS; @@ -527,13 +612,13 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) apicid_value = apicid_value << 24; spin_lock_irqsave(&ioapic_lock, flags); for (;;) { - pin = entry->pin; - if (pin == -1) + if (!entry) break; + pin = entry->pin; io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } desc = irq_to_desc(irq); desc->affinity = cpumask; @@ -1152,6 +1237,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_02 reg_02; union IO_APIC_reg_03 reg_03; unsigned long flags; + struct irq_cfg *cfg; if (apic_verbosity == APIC_QUIET) return; @@ -1240,16 +1326,16 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for (i = 0; i < nr_irqs; i++) { - struct irq_pin_list *entry = irq_2_pin + i; - if (entry->pin < 0) + for_each_irq_cfg(cfg) { + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) continue; printk(KERN_DEBUG "IRQ%d ", i); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) break; - entry = irq_2_pin + entry->next; + entry = entry->next; } printk("\n"); } @@ -1420,10 +1506,6 @@ static void __init enable_IO_APIC(void) int i, apic; unsigned long flags; - for (i = 0; i < pin_map_size; i++) { - irq_2_pin[i].pin = -1; - irq_2_pin[i].next = 0; - } if (!pirqs_enabled) for (i = 0; i < MAX_PIRQS; i++) pirq_entries[i] = -1; -- cgit v1.2.3 From 199751d715bba5b469ea22adadc68a4166bfa4f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:27 -0700 Subject: x86: make 32 bit to use sparse_irq but actually irq still needs to be less than NR_IRQS, because interrupt[NR_IRQS] in entry.S. need to enable per_cpu vector... Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/kernel/io_apic_32.c | 63 ++++++++++++++++++++++++++++++-------------- arch/x86/kernel/irq_32.c | 37 +++++++++++++++++++------- arch/x86/kernel/irqinit_32.c | 7 +++++ 4 files changed, 79 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3e0eaaa1a33..b157e94637b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,7 +34,7 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_DYN_ARRAY - select HAVE_SPARSE_IRQ if X86_64 + select HAVE_SPARSE_IRQ config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 59d2e8a273e..66c0a91362a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -595,7 +595,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) struct irq_pin_list *entry; unsigned int apicid_value; cpumask_t tmp; - struct irq_desc *desc; cfg = irq_cfg(irq); @@ -620,8 +619,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = entry->next; } - desc = irq_to_desc(irq); - desc->affinity = cpumask; + irq_to_desc(irq)->affinity = cpumask; spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -1055,8 +1053,6 @@ static int __assign_irq_vector(int irq) int vector, offset; struct irq_cfg *cfg; - BUG_ON((unsigned)irq >= nr_irqs); - cfg = irq_cfg(irq); if (cfg->vector > 0) return cfg->vector; @@ -1103,7 +1099,12 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) { struct irq_desc *desc; - desc = irq_to_desc(irq); + /* first time to use this irq_desc */ + if (irq < 16) + desc = irq_to_desc(irq); + else + desc = irq_to_desc_alloc(irq); + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) { desc->status |= IRQ_LEVEL; @@ -2330,16 +2331,19 @@ device_initcall(ioapic_init_sysfs); /* * Dynamic irq allocate and deallocation */ -int create_irq(void) +unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - int irq, new, vector = 0; + unsigned int irq, new, vector = 0; unsigned long flags; struct irq_cfg *cfg_new; - irq = -ENOSPC; + /* only can use bus/dev/fn.. when per_cpu vector is used */ + irq_want = nr_irqs - 1; + + irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (nr_irqs - 1); new >= 0; new--) { + for (new = (nr_irqs - 1); new > 0; new--) { if (platform_legacy_irq(new)) continue; cfg_new = irq_cfg(new); @@ -2354,13 +2358,18 @@ int create_irq(void) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq >= 0) { + if (irq > 0) { set_intr_gate(vector, interrupt[irq]); dynamic_irq_init(irq); } return irq; } +int create_irq(void) +{ + return create_irq_nr(nr_irqs - 1); +} + void destroy_irq(unsigned int irq) { unsigned long flags; @@ -2415,7 +2424,6 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp; int vector; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2435,8 +2443,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; + irq_to_desc(irq)->affinity = mask; } #endif /* CONFIG_SMP */ @@ -2455,13 +2462,31 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { struct msi_msg msg; int irq, ret; - irq = create_irq(); - if (irq < 0) - return irq; + + unsigned int irq_want; + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + + if (irq == 0) + return -1; ret = msi_compose_msg(dev, irq, &msg); if (ret < 0) { @@ -2510,7 +2535,6 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { unsigned int dest; cpumask_t tmp; - struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2521,8 +2545,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(mask); target_ht_irq(irq, dest); - desc = irq_to_desc(irq); - desc->affinity = mask; + irq_to_desc(irq)->affinity = mask; } #endif diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 576c5df6cad..0a57e39159a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -224,9 +224,10 @@ unsigned int do_IRQ(struct pt_regs *regs) struct pt_regs *old_regs; /* high bit used in ret_from_ code */ int overflow, irq = ~regs->orig_ax; - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; - if (unlikely((unsigned)irq >= nr_irqs)) { + desc = irq_to_desc(irq); + if (unlikely(!desc)) { printk(KERN_EMERG "%s: cannot handle IRQ %d\n", __func__, irq); BUG(); @@ -263,6 +264,24 @@ int show_interrupts(struct seq_file *p, void *v) int i = *(loff_t *) v, j; struct irqaction * action; unsigned long flags; + unsigned int entries; + struct irq_desc *desc; + int tail = 0; + +#ifdef CONFIG_HAVE_SPARSE_IRQ + desc = (struct irq_desc *)v; + entries = -1U; + i = desc->irq; + if (!desc->next) + tail = 1; +#else + entries = nr_irqs - 1; + i = *(loff_t *) v; + if (i == nr_irqs) + tail = 1; + else + desc = irq_to_desc(i); +#endif if (i == 0) { seq_printf(p, " "); @@ -271,9 +290,8 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i < nr_irqs) { + if (i <= entries) { unsigned any_count = 0; - struct irq_desc *desc = irq_to_desc(i); spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP @@ -285,7 +303,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%3d: ",i); + seq_printf(p, "%#x: ",i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else @@ -304,7 +322,9 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); skip: spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == nr_irqs) { + } + + if (tail) { seq_printf(p, "NMI: "); for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); @@ -396,15 +416,14 @@ void fixup_irqs(cpumask_t map) { unsigned int irq; static int warned; + struct irq_desc *desc; - for (irq = 0; irq < nr_irqs; irq++) { + for_each_irq_desc(irq, desc) { cpumask_t mask; - struct irq_desc *desc; if (irq == 2) continue; - desc = irq_to_desc(irq); cpus_and(mask, desc->affinity, map); if (any_online_cpu(mask) == NR_CPUS) { printk("Breaking affinity for irq %i\n", irq); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 65c1c950770..ded09ac2642 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -69,6 +69,13 @@ void __init init_ISA_irqs (void) * 16 old-style INTA-cycle interrupts: */ for (i = 0; i < 16; i++) { + /* first time call this irq_desc */ + struct irq_desc *desc = irq_to_desc_alloc(i); + + desc->status = IRQ_DISABLED; + desc->action = NULL; + desc->depth = 1; + set_irq_chip_and_handler_name(i, &i8259A_chip, handle_level_irq, "XT"); } -- cgit v1.2.3 From 497c9a195db918d3f035e8cb3021e5d4d035516e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:28 -0700 Subject: x86: make 32bit support per_cpu vector so we can merge io_apic_32.c and io_apic_64.c v2: Use cpu_online_map as target cpus for bigsmp, just like 64-bit is doing. Also remove some unused TARGET_CPUS macro. v3: need to check if desc is null in smp_irq_move_cleanup also migration needs to reset vector too, so copy __target_IO_APIC_irq from 64bit. (the duplication will go away once the two files are unified.) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 2 +- arch/x86/kernel/io_apic_32.c | 719 ++++++++++++++++++++++++++--------------- arch/x86/kernel/irq_32.c | 18 +- arch/x86/kernel/irqinit_32.c | 41 ++- arch/x86/lguest/boot.c | 2 +- arch/x86/mach-generic/bigsmp.c | 4 + arch/x86/mach-generic/es7000.c | 14 + arch/x86/mach-generic/numaq.c | 14 + arch/x86/mach-generic/summit.c | 14 + 9 files changed, 547 insertions(+), 281 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index b21fbfaffe3..4d82171d0f9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -629,7 +629,7 @@ ENTRY(interrupt) ENTRY(irq_entries_start) RING0_INT_FRAME vector=0 -.rept NR_IRQS +.rept NR_VECTORS ALIGN .if vector CFI_ADJUST_CFA_OFFSET -4 diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 66c0a91362a..ea33d3c7497 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -48,6 +48,7 @@ #include #include +#include #include #include @@ -60,7 +61,7 @@ atomic_t irq_mis_count; static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; static DEFINE_SPINLOCK(ioapic_lock); -DEFINE_SPINLOCK(vector_lock); +static DEFINE_SPINLOCK(vector_lock); int timer_through_8259 __initdata; @@ -100,28 +101,32 @@ struct irq_cfg { unsigned int irq; struct irq_cfg *next; struct irq_pin_list *irq_2_pin; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; u8 vector; + u8 move_in_progress : 1; }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .irq = 0, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .vector = IRQ15_VECTOR, }, + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; static struct irq_cfg irq_cfg_init = { .irq = -1U, }; @@ -263,6 +268,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) return cfg; } +static int assign_irq_vector(int irq, cpumask_t mask); /* * Rough estimation of how many shared IRQs there are, can * be changed anytime. @@ -432,6 +438,65 @@ static void ioapic_mask_entry(int apic, int pin) spin_unlock_irqrestore(&ioapic_lock, flags); } +#ifdef CONFIG_SMP +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + int apic, pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; + io_apic_write(apic, 0x11 + pin*2, dest); + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin *2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cfg = irq_cfg(irq); + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + irq_to_desc(irq)->affinity = mask; + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#endif /* CONFIG_SMP */ + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -586,45 +651,6 @@ static void clear_IO_APIC(void) clear_IO_APIC_pin(apic, pin); } -#ifdef CONFIG_SMP -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) -{ - struct irq_cfg *cfg; - unsigned long flags; - int pin; - struct irq_pin_list *entry; - unsigned int apicid_value; - cpumask_t tmp; - - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - - cpus_and(tmp, cpumask, cpu_online_map); - if (cpus_empty(tmp)) - tmp = TARGET_CPUS; - - cpus_and(cpumask, tmp, CPU_MASK_ALL); - - apicid_value = cpu_mask_to_apicid(cpumask); - /* Prepare to do the io_apic_write */ - apicid_value = apicid_value << 24; - spin_lock_irqsave(&ioapic_lock, flags); - for (;;) { - if (!entry) - break; - pin = entry->pin; - io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value); - if (!entry->next) - break; - entry = entry->next; - } - irq_to_desc(irq)->affinity = cpumask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#endif /* CONFIG_SMP */ - #ifndef CONFIG_SMP void send_IPI_self(int vector) { @@ -789,32 +815,6 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - #if defined(CONFIG_EISA) || defined(CONFIG_MCA) /* * EISA Edge/Level control register, ELCR @@ -1046,47 +1046,138 @@ static inline int IO_APIC_irq_trigger(int irq) return 0; } +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} -static int __assign_irq_vector(int irq) +void unlock_vector_lock(void) { - static int current_vector = FIRST_DEVICE_VECTOR, current_offset; - int vector, offset; - struct irq_cfg *cfg; + spin_unlock(&vector_lock); +} - cfg = irq_cfg(irq); - if (cfg->vector > 0) - return cfg->vector; +static int __assign_irq_vector(int irq, cpumask_t mask) +{ + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + unsigned int old_vector; + int cpu; + struct irq_cfg *cfg; - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (vector == current_vector) - return -ENOSPC; - if (test_and_set_bit(vector, used_vectors)) - goto next; + cfg = irq_cfg(irq); - current_vector = vector; - current_offset = offset; - cfg->vector = vector; + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); - return vector; -} + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; -static int assign_irq_vector(int irq) -{ + old_vector = cfg->vector; + if (old_vector) { + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) + return 0; + } + + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; + int new_cpu; + int vector, offset; + + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); + + vector = current_vector; + offset = current_offset; +next: + vector += 8; + if (vector >= first_system_vector) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(current_vector == vector)) + continue; + if (vector == SYSCALL_VECTOR) + goto next; + + for_each_cpu_mask_nr(new_cpu, new_mask) + if (per_cpu(vector_irq, new_cpu)[vector] != -1) + goto next; + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + } + for_each_cpu_mask_nr(new_cpu, new_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + return 0; + } + return -ENOSPC; +} + +static int assign_irq_vector(int irq, cpumask_t mask) +{ + int err; unsigned long flags; - int vector; spin_lock_irqsave(&vector_lock, flags); - vector = __assign_irq_vector(irq); + err = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); - return vector; + return err; +} + +static void __clear_irq_vector(int irq) +{ + struct irq_cfg *cfg; + cpumask_t mask; + int cpu, vector; + + cfg = irq_cfg(irq); + BUG_ON(!cfg->vector); + + vector = cfg->vector; + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + cfg->vector = 0; + cpus_clear(cfg->domain); +} + +void __setup_vector_irq(int cpu) +{ + /* Initialize vector_irq on a new cpu */ + /* This function must be called with vector_lock held */ + int irq, vector; + struct irq_cfg *cfg; + + /* Mark the inuse vectors */ + for_each_irq_cfg(cfg) { + if (!cpu_isset(cpu, cfg->domain)) + continue; + vector = cfg->vector; + irq = cfg->irq; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq < 0) + continue; + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } } static struct irq_chip ioapic_chip; @@ -1095,7 +1186,7 @@ static struct irq_chip ioapic_chip; #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 -static void ioapic_register_intr(int irq, int vector, unsigned long trigger) +static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; @@ -1115,79 +1206,109 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); } - set_intr_gate(vector, interrupt[irq]); } -static void __init setup_IO_APIC_irqs(void) +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) { + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest.logical.logical_dest = destination; + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + + return 0; +} + +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, + int trigger, int polarity) +{ + struct irq_cfg *cfg; struct IO_APIC_route_entry entry; - int apic, pin, idx, irq, first_notcon = 1, vector; + cpumask_t mask; + + if (!IO_APIC_IRQ(irq)) + return; + + cfg = irq_cfg(irq); + + mask = TARGET_CPUS; + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(mask, cfg->domain, mask); + + apic_printk(APIC_VERBOSE,KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + irq, trigger, polarity); + + + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } + + ioapic_register_intr(irq, trigger); + if (irq < 16) + disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - /* - * add it to the IO-APIC irq-routing table: - */ - memset(&entry, 0, sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.mask = 0; /* enable IRQ */ - entry.dest.logical.logical_dest = - cpu_mask_to_apicid(TARGET_CPUS); - - idx = find_irq_entry(apic, pin, mp_INT); + idx = find_irq_entry(apic,pin,mp_INT); if (idx == -1) { if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - " IO-APIC (apicid-pin) %d-%d", - mp_ioapics[apic].mp_apicid, - pin); + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); first_notcon = 0; } else - apic_printk(APIC_VERBOSE, ", %d-%d", - mp_ioapics[apic].mp_apicid, pin); + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); continue; } - if (!first_notcon) { apic_printk(APIC_VERBOSE, " not connected.\n"); first_notcon = 1; } - entry.trigger = irq_trigger(idx); - entry.polarity = irq_polarity(idx); - - if (irq_trigger(idx)) { - entry.trigger = 1; - entry.mask = 1; - } - irq = pin_2_irq(idx, apic, pin); - /* - * skip adding the timer int on secondary nodes, which causes - * a small but painful rift in the time-space continuum - */ - if (multi_timer_check(apic, irq)) - continue; - else - add_pin_to_irq(irq, apic, pin); - if (!apic && !IO_APIC_IRQ(irq)) - continue; + if (multi_timer_check(apic, irq)) + continue; - if (IO_APIC_IRQ(irq)) { - vector = assign_irq_vector(irq); - entry.vector = vector; - ioapic_register_intr(irq, vector, IOAPIC_AUTO); + add_pin_to_irq(irq, apic, pin); - if (!apic && (irq < 16)) - disable_8259A_irq(irq); - } - ioapic_write_entry(apic, pin, entry); + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); } } @@ -1221,7 +1342,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, * The timer IRQ doesn't have to know that behind the * scene we may have a 8259A-master in AEOI mode ... */ - ioapic_register_intr(0, vector, IOAPIC_EDGE); + ioapic_register_intr(0, IOAPIC_EDGE); /* * Add it to the IO-APIC irq-routing table: @@ -1805,8 +1926,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +static void irq_complete_move(unsigned int irq); static void ack_ioapic_irq(unsigned int irq) { + irq_complete_move(irq); move_native_irq(irq); ack_APIC_irq(); } @@ -1816,6 +1939,7 @@ static void ack_ioapic_quirk_irq(unsigned int irq) unsigned long v; int i; + irq_complete_move(irq); move_native_irq(irq); /* * It appears there is an erratum which affects at least version 0x11 @@ -1858,6 +1982,64 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } +#ifdef CONFIG_SMP +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } +} +#else +static inline void irq_complete_move(unsigned int irq) {} +#endif + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, @@ -1940,7 +2122,7 @@ static struct irq_chip lapic_chip __read_mostly = { .ack = ack_lapic_irq, }; -static void lapic_register_intr(int irq, int vector) +static void lapic_register_intr(int irq) { struct irq_desc *desc; @@ -1948,7 +2130,6 @@ static void lapic_register_intr(int irq, int vector) desc->status &= ~IRQ_LEVEL; set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); - set_intr_gate(vector, interrupt[irq]); } static void __init setup_nmi(void) @@ -2036,9 +2217,9 @@ static inline void __init unlock_ExtINT_logic(void) */ static inline void __init check_timer(void) { + struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; int no_pin1 = 0; - int vector; unsigned int ver; unsigned long flags; @@ -2051,8 +2232,7 @@ static inline void __init check_timer(void) * get/set the timer IRQ vector: */ disable_8259A_irq(0); - vector = assign_irq_vector(0); - set_intr_gate(vector, interrupt[0]); + assign_irq_vector(0, TARGET_CPUS); /* * As IRQ0 is to be enabled in the 8259A, the virtual @@ -2074,7 +2254,7 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " "apic1=%d pin1=%d apic2=%d pin2=%d\n", - vector, apic1, pin1, apic2, pin2); + cfg->vector, apic1, pin1, apic2, pin2); /* * Some BIOS writers are clueless and report the ExtINTA @@ -2098,7 +2278,7 @@ static inline void __init check_timer(void) */ if (no_pin1) { add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, vector); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); if (timer_irq_works()) { @@ -2123,7 +2303,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, vector); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); unmask_IO_APIC_irq(0); enable_8259A_irq(0); if (timer_irq_works()) { @@ -2154,8 +2334,8 @@ static inline void __init check_timer(void) apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); - lapic_register_intr(0, vector); - apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ + lapic_register_intr(0); + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ enable_8259A_irq(0); if (timer_irq_works()) { @@ -2163,7 +2343,7 @@ static inline void __init check_timer(void) goto out; } disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); apic_printk(APIC_QUIET, KERN_INFO @@ -2207,12 +2387,6 @@ out: void __init setup_IO_APIC(void) { - int i; - - /* Reserve all the system vectors. */ - for (i = first_system_vector; i < NR_VECTORS; i++) - set_bit(i, used_vectors); - enable_IO_APIC(); io_apic_irqs = ~PIC_IRQS; @@ -2334,12 +2508,14 @@ device_initcall(ioapic_init_sysfs); unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - unsigned int irq, new, vector = 0; + unsigned int irq, new; unsigned long flags; struct irq_cfg *cfg_new; +#ifndef CONFIG_HAVE_SPARSE_IRQ /* only can use bus/dev/fn.. when per_cpu vector is used */ irq_want = nr_irqs - 1; +#endif irq = 0; spin_lock_irqsave(&vector_lock, flags); @@ -2351,15 +2527,13 @@ unsigned int create_irq_nr(unsigned int irq_want) continue; if (!cfg_new) cfg_new = irq_cfg_alloc(new); - vector = __assign_irq_vector(new); - if (likely(vector > 0)) + if (__assign_irq_vector(new, TARGET_CPUS) == 0) irq = new; break; } spin_unlock_irqrestore(&vector_lock, flags); if (irq > 0) { - set_intr_gate(vector, interrupt[irq]); dynamic_irq_init(irq); } return irq; @@ -2377,8 +2551,7 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); spin_lock_irqsave(&vector_lock, flags); - clear_bit(irq_cfg(irq)->vector, used_vectors); - irq_cfg(irq)->vector = 0; + __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); } @@ -2388,57 +2561,65 @@ void destroy_irq(unsigned int irq) #ifdef CONFIG_PCI_MSI static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) { - int vector; + struct irq_cfg *cfg; + int err; unsigned dest; + cpumask_t tmp; - vector = assign_irq_vector(irq); - if (vector >= 0) { - dest = cpu_mask_to_apicid(TARGET_CPUS); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? -MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (err) + return err; - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? -MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(vector); - } - return vector; + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); + + return err; } #ifdef CONFIG_SMP static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg; struct msi_msg msg; unsigned int dest; cpumask_t tmp; - int vector; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; + return; - vector = assign_irq_vector(irq); - if (vector < 0) + if (assign_irq_vector(irq, mask)) return; - dest = cpu_mask_to_apicid(mask); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); read_msi_msg(irq, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(vector); + msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); @@ -2517,15 +2698,15 @@ void arch_teardown_msi_irq(unsigned int irq) #ifdef CONFIG_SMP -static void target_ht_irq(unsigned int irq, unsigned int dest) +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) { struct ht_irq_msg msg; fetch_ht_irq_msg(irq, &msg); - msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK); + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest); + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); write_ht_irq_msg(irq, &msg); @@ -2533,18 +2714,22 @@ static void target_ht_irq(unsigned int irq, unsigned int dest) static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) { + struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) - tmp = TARGET_CPUS; + return; - cpus_and(mask, tmp, CPU_MASK_ALL); + if (assign_irq_vector(irq, mask)) + return; - dest = cpu_mask_to_apicid(mask); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); - target_ht_irq(irq, dest); + target_ht_irq(irq, dest, cfg->vector); irq_to_desc(irq)->affinity = mask; } #endif @@ -2562,16 +2747,18 @@ static struct irq_chip ht_irq_chip = { int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) { - int vector; + struct irq_cfg *cfg; + int err; + cpumask_t tmp; - vector = assign_irq_vector(irq); - if (vector >= 0) { + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if ( !err) { struct ht_irq_msg msg; unsigned dest; - cpumask_t tmp; - cpus_clear(tmp); - cpu_set(vector >> 8, tmp); + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); @@ -2579,7 +2766,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) msg.address_lo = HT_IRQ_LOW_BASE | HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(vector) | + HT_IRQ_LOW_VECTOR(cfg->vector) | ((INT_DEST_MODE == 0) ? HT_IRQ_LOW_DM_PHYSICAL : HT_IRQ_LOW_DM_LOGICAL) | @@ -2594,7 +2781,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) set_irq_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); } - return vector; + return err; } #endif /* CONFIG_HT_IRQ */ @@ -2705,50 +2892,21 @@ int __init io_apic_get_redir_entries(int ioapic) } -int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low) +int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity) { - struct IO_APIC_route_entry entry; - if (!IO_APIC_IRQ(irq)) { printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } - /* - * Generate a PCI IRQ routing entry and program the IOAPIC accordingly. - * Note that we mask (disable) IRQs now -- these get enabled when the - * corresponding device driver registers for this IRQ. - */ - - memset(&entry, 0, sizeof(entry)); - - entry.delivery_mode = INT_DELIVERY_MODE; - entry.dest_mode = INT_DEST_MODE; - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.trigger = edge_level; - entry.polarity = active_high_low; - entry.mask = 1; - /* * IRQs < 16 are already in the irq_2_pin[] map */ if (irq >= 16) add_pin_to_irq(irq, ioapic, pin); - entry.vector = assign_irq_vector(irq); - - apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry " - "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic, - mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq, - edge_level, active_high_low); - - ioapic_register_intr(irq, entry.vector, edge_level); - - if (!ioapic && (irq < 16)) - disable_8259A_irq(irq); - - ioapic_write_entry(ioapic, pin, entry); + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); return 0; } @@ -2774,6 +2932,47 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #endif /* CONFIG_ACPI */ +/* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online + * so mask in all cases should simply be TARGET_CPUS + */ +#ifdef CONFIG_SMP +void __init setup_ioapic_dest(void) +{ + int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; + struct irq_desc *desc; + + if (skip_ioapic_setup == 1) + return; + + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + cfg = irq_cfg(irq); + if (!cfg->vector) + setup_IO_APIC_irq(ioapic, pin, irq, + irq_trigger(irq_entry), + irq_polarity(irq_entry)); + else { + desc = irq_to_desc(irq); + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } + } + + } +} +#endif + static int __init parse_disable_timer_pin_1(char *arg) { disable_timer_pin_1 = 1; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 0a57e39159a..b51ffdcfa31 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -223,21 +223,25 @@ unsigned int do_IRQ(struct pt_regs *regs) { struct pt_regs *old_regs; /* high bit used in ret_from_ code */ - int overflow, irq = ~regs->orig_ax; + int overflow; + unsigned vector = ~regs->orig_ax; struct irq_desc *desc; + unsigned irq; - desc = irq_to_desc(irq); - if (unlikely(!desc)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d\n", - __func__, irq); - BUG(); - } old_regs = set_irq_regs(regs); irq_enter(); + irq = __get_cpu_var(vector_irq)[vector]; overflow = check_stack_overflow(); + desc = irq_to_desc(irq); + if (unlikely(!desc)) { + printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n", + __func__, irq, vector); + BUG(); + } + if (!execute_on_irq_stack(overflow, desc, irq)) { if (unlikely(overflow)) print_stack_overflow(); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index ded09ac2642..9092103a18e 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -90,6 +90,27 @@ static struct irqaction irq2 = { .name = "cascade", }; +DEFINE_PER_CPU(vector_irq_t, vector_irq) = { + [0 ... IRQ0_VECTOR - 1] = -1, + [IRQ0_VECTOR] = 0, + [IRQ1_VECTOR] = 1, + [IRQ2_VECTOR] = 2, + [IRQ3_VECTOR] = 3, + [IRQ4_VECTOR] = 4, + [IRQ5_VECTOR] = 5, + [IRQ6_VECTOR] = 6, + [IRQ7_VECTOR] = 7, + [IRQ8_VECTOR] = 8, + [IRQ9_VECTOR] = 9, + [IRQ10_VECTOR] = 10, + [IRQ11_VECTOR] = 11, + [IRQ12_VECTOR] = 12, + [IRQ13_VECTOR] = 13, + [IRQ14_VECTOR] = 14, + [IRQ15_VECTOR] = 15, + [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 +}; + /* Overridden in paravirt.c */ void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); @@ -105,22 +126,14 @@ void __init native_init_IRQ(void) * us. (some of these will be overridden and become * 'special' SMP interrupts) */ - for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; - if (i >= nr_irqs) - break; + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* SYSCALL_VECTOR was reserved in trap_init. */ - if (!test_bit(vector, used_vectors)) - set_intr_gate(vector, interrupt[i]); + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i]); } -#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) - /* - * IRQ0 must be given a fixed assignment and initialized, - * because it's used before the IO-APIC is set up. - */ - set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) /* * The reschedule interrupt is a CPU-to-CPU reschedule-helper * IPI, driven by wakeup. @@ -135,6 +148,9 @@ void __init native_init_IRQ(void) /* IPI for single call function */ set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); + + /* Low priority IPI to cleanup after moving an irq */ + set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); #endif #ifdef CONFIG_X86_LOCAL_APIC @@ -168,3 +184,4 @@ void __init native_init_IRQ(void) irq_ctx_init(smp_processor_id()); } + diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 65f0b8a47be..48ee4f9435f 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -582,7 +582,7 @@ static void __init lguest_init_IRQ(void) for (i = 0; i < LGUEST_IRQS; i++) { int vector = FIRST_EXTERNAL_VECTOR + i; if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, interrupt[i]); + set_intr_gate(vector, interrupt[vector]); set_irq_chip_and_handler_name(i, &lguest_irq_controller, handle_level_irq, "level"); diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index df37fc9d6a2..3c3b471ea49 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c @@ -41,6 +41,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { { } }; +static cpumask_t vector_allocation_domain(int cpu) +{ + return cpumask_of_cpu(cpu); +} static int probe_bigsmp(void) { diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 6513d41ea21..28459cab3dd 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c @@ -75,4 +75,18 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) } #endif +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 8cf58394975..71a309b122e 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c @@ -38,4 +38,18 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6ad6b67a723..6272b5e69da 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c @@ -23,4 +23,18 @@ static int probe_summit(void) return 0; } +static cpumask_t vector_allocation_domain(int cpu) +{ + /* Careful. Some cpus do not strictly honor the set of cpus + * specified in the interrupt destination when using lowest + * priority interrupt delivery mode. + * + * In particular there was a hyperthreading cpu observed to + * deliver interrupts to the wrong hyperthread when only one + * hyperthread was specified in the interrupt desitination. + */ + cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; + return domain; +} + struct genapic apic_summit = APIC_INIT("summit", probe_summit); -- cgit v1.2.3 From 7a959cff725872ce9c3a534f10724d7bb2cb3c4a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:32 -0700 Subject: x86: add debug info for 32bit sparse_irq so could figure out bugs where we get an interrupt, but vector_irq is not initialized yet. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 8 ++++++-- arch/x86/kernel/io_apic_64.c | 2 ++ arch/x86/kernel/irq_32.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index ea33d3c7497..3001924bdd3 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1114,8 +1114,12 @@ next: cfg->move_in_progress = 1; cfg->old_domain = cfg->domain; } - for_each_cpu_mask_nr(new_cpu, new_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; + printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector); + for_each_cpu_mask_nr(new_cpu, new_mask) { + per_cpu(vector_irq, new_cpu)[vector] = irq; + printk(KERN_CONT " %d ", new_cpu); + } + printk(KERN_CONT "\n"); cfg->vector = vector; cfg->domain = domain; return 0; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b0d4abc55a1..30d2e381131 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -1394,6 +1394,8 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b51ffdcfa31..cc929f2f84f 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -237,8 +237,8 @@ unsigned int do_IRQ(struct pt_regs *regs) desc = irq_to_desc(irq); if (unlikely(!desc)) { - printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x\n", - __func__, irq, vector); + printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", + __func__, irq, vector, smp_processor_id()); BUG(); } -- cgit v1.2.3 From d83e94acd95789829804fd9e442bd18975f4dc89 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:33 -0700 Subject: x86, io-apic: remove union about dest for log/phy let user decide the meaning of the bits. This unifies the 32-bit and 64-bit io-apic code a bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 3001924bdd3..353e586822a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1224,7 +1224,7 @@ static int setup_ioapic_entry(int apic, int irq, entry->delivery_mode = INT_DELIVERY_MODE; entry->dest_mode = INT_DEST_MODE; - entry->dest.logical.logical_dest = destination; + entry->dest = destination; entry->mask = 0; /* enable IRQ */ entry->trigger = trigger; @@ -1336,7 +1336,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, */ entry.dest_mode = INT_DEST_MODE; entry.mask = 1; /* mask IRQ now */ - entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.dest = cpu_mask_to_apicid(TARGET_CPUS); entry.delivery_mode = INT_DELIVERY_MODE; entry.polarity = 0; entry.trigger = 0; @@ -1425,19 +1425,15 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" - " Stat Dest Deli Vect: \n"); + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" + " Stat Dmod Deli Vect: \n"); for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; entry = ioapic_read_entry(apic, i); - printk(KERN_DEBUG " %02x %03X %02X ", - i, - entry.dest.logical.logical_dest, - entry.dest.physical.physical_dest - ); + printk(KERN_DEBUG " %02x %02X ", i, entry.dest); printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", entry.mask, @@ -1717,7 +1713,7 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest.physical.physical_dest = read_apic_id(); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: @@ -2185,7 +2181,7 @@ static inline void __init unlock_ExtINT_logic(void) entry1.dest_mode = 0; /* physical delivery */ entry1.mask = 0; /* unmask IRQ now */ - entry1.dest.physical.physical_dest = hard_smp_processor_id(); + entry1.dest = hard_smp_processor_id(); entry1.delivery_mode = dest_ExtINT; entry1.polarity = entry0.polarity; entry1.trigger = 0; -- cgit v1.2.3 From 1d02519242c23450b043e5e8a9e3cb84a8666fe3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:34 -0700 Subject: x86: ordering functions in io_apic_32.c prepare for unification: try to make functions be of the same order to io_apic_64.c. v2: add calling setup_msi_irq back to arch_setup_msi_irq Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 178 +++++++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 84 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 353e586822a..9531ef33362 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -1029,23 +1029,6 @@ static int pin_2_irq(int idx, int apic, int pin) return irq; } -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} - void lock_vector_lock(void) { /* Used to the online set of cpus does not change @@ -1190,6 +1173,23 @@ static struct irq_chip ioapic_chip; #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} + static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; @@ -1926,55 +1926,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } -static void irq_complete_move(unsigned int irq); -static void ack_ioapic_irq(unsigned int irq) -{ - irq_complete_move(irq); - move_native_irq(irq); - ack_APIC_irq(); -} - -static void ack_ioapic_quirk_irq(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} - static int ioapic_retrigger_irq(unsigned int irq) { send_IPI_self(irq_cfg(irq)->vector); @@ -2040,13 +1991,61 @@ static void irq_complete_move(unsigned int irq) static inline void irq_complete_move(unsigned int irq) {} #endif +static void ack_apic_edge(unsigned int irq) +{ + irq_complete_move(irq); + move_native_irq(irq); + ack_APIC_irq(); +} + +static void ack_apic_level(unsigned int irq) +{ + unsigned long v; + int i; + + irq_complete_move(irq); + move_native_irq(irq); +/* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, - .ack = ack_ioapic_irq, - .eoi = ack_ioapic_quirk_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, #ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity_irq, #endif @@ -2094,11 +2093,6 @@ static inline void init_IO_APIC_traps(void) * The local APIC irq-chip implementation: */ -static void ack_lapic_irq(unsigned int irq) -{ - ack_APIC_irq(); -} - static void mask_lapic_irq(unsigned int irq) { unsigned long v; @@ -2115,6 +2109,11 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } +static void ack_lapic_irq(unsigned int irq) +{ + ack_APIC_irq(); +} + static struct irq_chip lapic_chip __read_mostly = { .name = "local-APIC", .mask = mask_lapic_irq, @@ -2636,13 +2635,31 @@ static struct irq_chip msi_chip = { .name = "PCI-MSI", .unmask = unmask_msi_irq, .mask = mask_msi_irq, - .ack = ack_ioapic_irq, + .ack = ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = set_msi_irq_affinity, #endif .retrigger = ioapic_retrigger_irq, }; + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) { unsigned int irq; @@ -2657,7 +2674,6 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - struct msi_msg msg; int irq, ret; unsigned int irq_want; @@ -2669,17 +2685,11 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) if (irq == 0) return -1; - ret = msi_compose_msg(dev, irq, &msg); + ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; - } - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, - "edge"); + } return 0; } @@ -2738,7 +2748,7 @@ static struct irq_chip ht_irq_chip = { .name = "PCI-HT", .mask = mask_ht_irq, .unmask = unmask_ht_irq, - .ack = ack_ioapic_irq, + .ack = ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = set_ht_irq_affinity, #endif -- cgit v1.2.3 From 8ea5371baa82db452a8d93e9977b418d30944e32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:35 -0700 Subject: x86: ordering functions in io_apic_64.c try to make functions have the same order between 32-bit and 64-bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 67 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 30d2e381131..07e1e45ee02 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -409,40 +409,6 @@ static bool io_apic_level_ack_pending(unsigned int irq) return false; } -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -627,6 +593,39 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ + \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ + for (;;) { \ + unsigned int reg; \ + if (!entry) \ + break; \ + pin = entry->pin; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + FINAL; \ + if (!entry->next) \ + break; \ + entry = entry->next; \ + } \ +} #define DO_ACTION(name,R,ACTION, FINAL) \ \ -- cgit v1.2.3 From efa2559f65167989f1893cb065e3126d4f13ba60 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:36 -0700 Subject: x86: order variables in io_apic_xx.c move first_system_vector to apic_64.c. also add #ifdef CONFIG_INTR_REMAP to prepare 32 bit to use same file. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 5 ++ arch/x86/kernel/io_apic_32.c | 79 ++++++++++----------- arch/x86/kernel/io_apic_64.c | 160 +++++++++++++++++++++++-------------------- 3 files changed, 127 insertions(+), 117 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 94ddb69ae15..4d7a188025b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,10 @@ int x2apic_preenabled; int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + /* * Debug level, exported for io_apic.c */ diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 9531ef33362..3010bdd3352 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -54,24 +54,22 @@ #define __apicdebuginit(type) static type __init -int (*ioapic_renumber_irq)(int ioapic, int irq); -atomic_t irq_mis_count; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int timer_through_8259 __initdata; - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes */ int sis_apic_bug = -1; +static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); + int first_free_entry; +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +int pin_map_size; + /* * # of IRQ routing registers */ @@ -93,7 +91,15 @@ int mp_bus_id_to_type[MAX_MP_BUSSES]; DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); -static int disable_timer_pin_1 __initdata; +int skip_ioapic_setup; + +static int __init parse_noapic(char *arg) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); struct irq_cfg; struct irq_pin_list; @@ -268,13 +274,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) return cfg; } -static int assign_irq_vector(int irq, cpumask_t mask); -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - /* * This is performance-critical, we want to do it O(1) * @@ -465,6 +464,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) entry = entry->next; } } + +static int assign_irq_vector(int irq, cpumask_t mask); + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { struct irq_cfg *cfg; @@ -677,7 +679,6 @@ void send_IPI_self(int vector) #define MAX_PIRQS 8 static int pirq_entries [MAX_PIRQS]; static int pirqs_enabled; -int skip_ioapic_setup; static int __init ioapic_pirq_setup(char *str) { @@ -981,6 +982,7 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } +int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -1621,6 +1623,9 @@ __apicdebuginit(int) print_all_ICs(void) fs_initcall(print_all_ICs); +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + static void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; @@ -1998,6 +2003,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { unsigned long v; @@ -2208,6 +2214,17 @@ static inline void __init unlock_ExtINT_logic(void) ioapic_write_entry(apic, pin, entry0); } +static int disable_timer_pin_1 __initdata; + +static int __init parse_disable_timer_pin_1(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", parse_disable_timer_pin_1); + +int timer_through_8259 __initdata; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2983,28 +3000,6 @@ void __init setup_ioapic_dest(void) } #endif -static int __init parse_disable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); - -static int __init parse_enable_timer_pin_1(char *arg) -{ - disable_timer_pin_1 = -1; - return 0; -} -early_param("enable_timer_pin_1", parse_enable_timer_pin_1); - -static int __init parse_noapic(char *arg) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 07e1e45ee02..847aa798764 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -57,6 +57,47 @@ #define __apicdebuginit(type) static type __init +int ioapic_force; + +int sis_apic_bug; /* not actually supported, dummy for compile */ + +static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); + +int first_free_entry; +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +int pin_map_size; + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* I/O APIC entries */ +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +int nr_ioapics; + +/* MP IRQ source entries */ +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* # of MP IRQ source entries */ +int mp_irq_entries; + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +int skip_ioapic_setup; + +static int __init parse_noapic(char *str) +{ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); + + struct irq_cfg; struct irq_pin_list; struct irq_cfg { @@ -228,53 +269,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) return cfg; } -static int assign_irq_vector(int irq, cpumask_t mask); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -int sis_apic_bug; /* not actually supported, dummy for compile */ - -static int no_timer_check; - -static int disable_timer_pin_1 __initdata; - -int timer_through_8259 __initdata; - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC RTE contents at the OS boot up */ -struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ - -int pin_map_size; - /* * This is performance-critical, we want to do it O(1) * @@ -481,12 +475,16 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) apic = entry->apic; pin = entry->pin; +#ifdef CONFIG_INTR_REMAP /* * With interrupt-remapping, destination information comes * from interrupt-remapping table entry. */ if (!irq_remapped(irq)) io_apic_write(apic, 0x11 + pin*2, dest); +#else + io_apic_write(apic, 0x11 + pin*2, dest); +#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; @@ -497,6 +495,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) } } +static int assign_irq_vector(int irq, cpumask_t mask); + static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { struct irq_cfg *cfg = irq_cfg(irq); @@ -533,7 +533,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) * shared ISA-space IRQs, so we have to support them. We are super * fast in the common case, and fast for shared ISA-space IRQs. */ -int first_free_entry; static void add_pin_to_irq(unsigned int irq, int apic, int pin) { struct irq_cfg *cfg; @@ -679,6 +678,10 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + /* * Saves and masks all the unmasked IO-APIC RTE's */ @@ -741,25 +744,7 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping) */ restore_IO_APIC_setup(); } - -int skip_ioapic_setup; -int ioapic_force; - -static int __init parse_noapic(char *str) -{ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 1; -} -__setup("disable_timer_pin_1", disable_timer_pin_setup); - +#endif /* * Find the IRQ entry number of a certain pin. @@ -1327,8 +1312,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) return; +#endif memset(&entry, 0, sizeof(entry)); @@ -1601,6 +1588,9 @@ __apicdebuginit(int) print_all_ICs(void) fs_initcall(print_all_ICs); +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; @@ -1695,6 +1685,15 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +static int no_timer_check; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + /* * There is a nasty bug in some older SMP boards, their mptable lies * about the timer IRQ. We do the following to work around the situation: @@ -1708,6 +1707,9 @@ static int __init timer_irq_works(void) unsigned long t1 = jiffies; unsigned long flags; + if (no_timer_check) + return 1; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ @@ -2239,6 +2241,17 @@ static inline void __init unlock_ExtINT_logic(void) ioapic_write_entry(apic, pin, entry0); } +static int disable_timer_pin_1 __initdata; +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ +static int __init disable_timer_pin_setup(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", disable_timer_pin_setup); + +int timer_through_8259 __initdata; + /* * This code may look a bit paranoid, but it's supposed to cooperate with * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ @@ -2286,8 +2299,10 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("BIOS bug: timer not connected to IO-APIC"); +#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2305,7 +2320,7 @@ static inline void __init check_timer(void) setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); } unmask_IO_APIC_irq(0); - if (!no_timer_check && timer_irq_works()) { + if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); enable_8259A_irq(0); @@ -2314,8 +2329,10 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } +#ifdef CONFIG_INTR_REMAP if (intr_remapping_enabled) panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2391,13 +2408,6 @@ out: local_irq_restore(flags); } -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - /* * Traditionally ISA IRQ2 is the cascade IRQ, and is not available * to devices. However there may be an I/O APIC pin available for -- cgit v1.2.3 From d4057bdb6a3bb85dd44f9f39f41eac53696fd637 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:38 -0700 Subject: x86: make headers files the same in io_apic_xx.c also make no_timer_check to be global on 64 bit, because vmi_32 is using that. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 15 ++++++++++++--- arch/x86/kernel/io_apic_64.c | 12 +++++++++--- 2 files changed, 21 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 3010bdd3352..48184e126f2 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -25,28 +25,37 @@ #include #include #include -#include +#include #include #include #include #include #include -#include #include #include #include #include -#include /* time_after() */ +#include /* time_after() */ +#ifdef CONFIG_ACPI +#include +#endif +#include +#include +#include #include #include #include +#include +#include +#include #include #include #include #include #include #include +#include #include #include diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 847aa798764..3c66e5a8e89 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -27,12 +27,15 @@ #include #include #include +#include #include +#include #include #include #include -#include -#include +#include +#include +#include /* time_after() */ #ifdef CONFIG_ACPI #include #endif @@ -46,14 +49,17 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include +#include #define __apicdebuginit(type) static type __init @@ -1685,7 +1691,7 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } -static int no_timer_check; +int no_timer_check __initdata; static int __init notimercheck(char *s) { -- cgit v1.2.3 From f876d213a59c363d2492e399cc6c24edd6f3c368 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:39 -0700 Subject: x86: make 64 handle sis_apic_bug like the 32 bit do we have 64bit system with sis chipset? [ mingo@elte.hu: nope, the problem chipset was 32-bit only. The code symmetry is good nevertheless. ] Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_64.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 3c66e5a8e89..915af9b87aa 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -65,7 +65,11 @@ int ioapic_force; -int sis_apic_bug; /* not actually supported, dummy for compile */ +/* + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes + */ +int sis_apic_bug = -1; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); @@ -373,9 +377,11 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. */ -static inline void io_apic_modify(unsigned int apic, unsigned int value) +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -494,7 +500,7 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; - io_apic_modify(apic, reg); + io_apic_modify(apic, 0x10 + pin*2, reg); if (!entry->next) break; entry = entry->next; @@ -624,7 +630,7 @@ static inline void io_apic_sync(unsigned int apic) pin = entry->pin; \ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ reg ACTION; \ - io_apic_modify(entry->apic, reg); \ + io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ FINAL; \ if (!entry->next) \ break; \ @@ -2450,6 +2456,20 @@ void __init setup_IO_APIC(void) check_timer(); } +/* + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path + */ + +static int __init io_apic_bug_finalize(void) +{ + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; +} + +late_initcall(io_apic_bug_finalize); + struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; -- cgit v1.2.3 From aa45f97b1bb40adae1288669e73350907ffae85e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:40 -0700 Subject: x86: remove ioapic_force no user left. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 1 - arch/x86/kernel/io_apic_64.c | 2 -- 2 files changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4d7a188025b..cd860856a0b 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1813,7 +1813,6 @@ static int __init apic_set_verbosity(char *arg) if (!arg) { #ifdef CONFIG_X86_64 skip_ioapic_setup = 0; - ioapic_force = 1; return 0; #endif return -EINVAL; diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 915af9b87aa..b70fd823218 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -63,8 +63,6 @@ #define __apicdebuginit(type) static type __init -int ioapic_force; - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes -- cgit v1.2.3 From 047c8fdb8718890e3340073b178d0859d0c7f91f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:41 -0700 Subject: x86: make io_apic_64.c and io_apic_32.c the same all the same except INTR_REMAPPING related and ioapic io resource. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 213 +++++++++++++- arch/x86/kernel/io_apic_64.c | 663 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 829 insertions(+), 47 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 48184e126f2..3ed36041c81 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -123,7 +123,6 @@ struct irq_cfg { u8 move_in_progress : 1; }; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ static struct irq_cfg irq_cfg_legacy[] __initdata = { [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, @@ -391,6 +390,38 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } +#ifdef CONFIG_X86_64 +static bool io_apic_level_ack_pending(unsigned int irq) +{ + struct irq_pin_list *entry; + unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + int pin; + + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + if (reg & IO_APIC_REDIR_REMOTE_IRR) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return true; + } + if (!entry->next) + break; + entry = entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return false; +} +#endif + union entry_union { struct { u32 w1, w2; }; struct IO_APIC_route_entry entry; @@ -483,17 +514,15 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) unsigned int dest; cpumask_t tmp; - cfg = irq_cfg(irq); - cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) return; + cfg = irq_cfg(irq); if (assign_irq_vector(irq, mask)) return; cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); /* * Only the high 8 bits are valid. @@ -572,6 +601,54 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } +#ifdef CONFIG_X86_64 +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ + \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ + for (;;) { \ + unsigned int reg; \ + if (!entry) \ + break; \ + pin = entry->pin; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ + FINAL; \ + if (!entry->next) \ + break; \ + entry = entry->next; \ + } \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + +/* mask = 0 */ +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) + +#else + static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) { struct irq_cfg *cfg; @@ -620,6 +697,8 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq) IO_APIC_REDIR_MASKED); } +#endif + static void mask_IO_APIC_irq(unsigned int irq) { unsigned long flags; @@ -1055,6 +1134,17 @@ void unlock_vector_lock(void) static int __assign_irq_vector(int irq, cpumask_t mask) { + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; unsigned int old_vector; int cpu; @@ -1095,9 +1185,13 @@ next: } if (unlikely(current_vector == vector)) continue; - if (vector == SYSCALL_VECTOR) +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) goto next; - +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; @@ -1184,6 +1278,7 @@ static struct irq_chip ioapic_chip; #define IOAPIC_EDGE 0 #define IOAPIC_LEVEL 1 +#ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { int apic, idx, pin; @@ -1200,6 +1295,12 @@ static inline int IO_APIC_irq_trigger(int irq) */ return 0; } +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif static void ioapic_register_intr(int irq, unsigned long trigger) { @@ -1212,15 +1313,18 @@ static void ioapic_register_intr(int irq, unsigned long trigger) desc = irq_to_desc_alloc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) { + trigger == IOAPIC_LEVEL) desc->status |= IRQ_LEVEL; + else + desc->status &= ~IRQ_LEVEL; + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); - } else { - desc->status &= ~IRQ_LEVEL; + else set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_edge_irq, "edge"); - } } static int setup_ioapic_entry(int apic, int irq, @@ -1662,7 +1766,6 @@ static void __init enable_IO_APIC(void) struct IO_APIC_route_entry entry; entry = ioapic_read_entry(apic, pin); - /* If the interrupt line is enabled and in ExtInt mode * I have found the pin where the i8259 is connected. */ @@ -2012,6 +2115,60 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +#ifdef CONFIG_X86_64 +static void ack_apic_level(unsigned int irq) +{ + int do_unmask_irq = 0; + + irq_complete_move(irq); +#ifdef CONFIG_GENERIC_PENDING_IRQ + /* If we are moving the irq we need to mask it */ + if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } +#endif + + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propagate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq, desc); + unmask_IO_APIC_irq(irq); + } +} +#else atomic_t irq_mis_count; static void ack_apic_level(unsigned int irq) { @@ -2053,6 +2210,7 @@ static void ack_apic_level(unsigned int irq) spin_unlock(&ioapic_lock); } } +#endif static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2224,7 +2382,7 @@ static inline void __init unlock_ExtINT_logic(void) } static int disable_timer_pin_1 __initdata; - +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ static int __init parse_disable_timer_pin_1(char *arg) { disable_timer_pin_1 = 1; @@ -2244,9 +2402,9 @@ static inline void __init check_timer(void) { struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; - int no_pin1 = 0; - unsigned int ver; unsigned long flags; + unsigned int ver; + int no_pin1 = 0; local_irq_save(flags); @@ -2550,6 +2708,7 @@ unsigned int create_irq_nr(unsigned int irq_want) cfg_new = irq_cfg(new); if (cfg_new && cfg_new->vector != 0) continue; + /* check if need to create one */ if (!cfg_new) cfg_new = irq_cfg_alloc(new); if (__assign_irq_vector(new, TARGET_CPUS) == 0) @@ -2720,6 +2879,32 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) return 0; } +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int irq; + int ret, sub_handle; + struct msi_desc *desc; + unsigned int irq_want; + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; + +error: + destroy_irq(irq); + return ret; +} + + void arch_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index b70fd823218..940c4167b32 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c @@ -94,18 +94,22 @@ struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; static int __init parse_noapic(char *str) { + /* disable IO-APIC */ disable_ioapic_setup(); return 0; } early_param("noapic", parse_noapic); - struct irq_cfg; struct irq_pin_list; struct irq_cfg { @@ -374,6 +378,8 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i /* * Re-write a value: to be used for read-modify-write * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index register */ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { @@ -383,6 +389,7 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } +#ifdef CONFIG_X86_64 static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; @@ -412,6 +419,7 @@ static bool io_apic_level_ack_pending(unsigned int irq) return false; } +#endif union entry_union { struct { u32 w1, w2; }; @@ -509,7 +517,7 @@ static int assign_irq_vector(int irq, cpumask_t mask); static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg(irq); + struct irq_cfg *cfg; unsigned long flags; unsigned int dest; cpumask_t tmp; @@ -519,12 +527,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) if (cpus_empty(tmp)) return; + cfg = irq_cfg(irq); if (assign_irq_vector(irq, mask)) return; cpus_and(tmp, cfg->domain, mask); dest = cpu_mask_to_apicid(tmp); - /* * Only the high 8 bits are valid. */ @@ -536,7 +544,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } -#endif +#endif /* CONFIG_SMP */ /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are @@ -602,6 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } +#ifdef CONFIG_X86_64 /* * Synchronize the IO-APIC and the CPU by doing * a dummy read from the IO-APIC @@ -647,6 +656,58 @@ DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) /* mask = 0 */ DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) +#else + +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +{ + struct irq_cfg *cfg; + struct irq_pin_list *entry; + unsigned int pin, reg; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + reg &= ~disable; + reg |= enable; + io_apic_modify(entry->apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +/* mask = 1 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); +} + +/* mask = 0 */ +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); +} + +/* mask = 1, trigger = 0 */ +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER); +} + +/* mask = 0, trigger = 1 */ +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED); +} + +#endif + static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -688,6 +749,64 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) +void send_IPI_self(int vector) +{ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write(APIC_ICR, cfg); +} +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ + +#ifdef CONFIG_X86_32 +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +static int pirq_entries [MAX_PIRQS]; +static int pirqs_enabled; + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, KERN_INFO + "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + #ifdef CONFIG_INTR_REMAP /* I/O APIC RTE contents at the OS boot up */ static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; @@ -861,18 +980,51 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) return best_guess; } +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +/* + * EISA Edge/Level control register, ELCR + */ +static int EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, KERN_INFO + "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +#endif + /* ISA interrupts are always polarity zero edge triggered, * when listed as conforming in the MP table. */ #define default_ISA_trigger(idx) (0) #define default_ISA_polarity(idx) (0) +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_polarity(idx) default_ISA_polarity(idx) + /* PCI interrupts are always polarity one level triggered, * when listed as conforming in the MP table. */ #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) default_ISA_polarity(idx) + static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mp_srcbus; @@ -930,6 +1082,36 @@ static int MPBIOS_trigger(int idx) trigger = default_ISA_trigger(idx); else trigger = default_PCI_trigger(idx); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif break; case 1: /* edge */ { @@ -967,6 +1149,7 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } +int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { int irq, i; @@ -988,7 +1171,32 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } + +#ifdef CONFIG_X86_32 + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + apic_printk(APIC_VERBOSE, KERN_DEBUG + "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } +#endif + return irq; } @@ -1058,8 +1266,13 @@ next: } if (unlikely(current_vector == vector)) continue; +#ifdef CONFIG_X86_64 if (vector == IA32_SYSCALL_VECTOR) goto next; +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif for_each_cpu_mask_nr(new_cpu, new_mask) if (per_cpu(vector_irq, new_cpu)[vector] != -1) goto next; @@ -1140,6 +1353,34 @@ static struct irq_chip ioapic_chip; static struct irq_chip ir_ioapic_chip; #endif +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#ifdef CONFIG_X86_32 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif + static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; @@ -1150,7 +1391,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger) else desc = irq_to_desc_alloc(irq); - if (trigger) + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) desc->status |= IRQ_LEVEL; else desc->status &= ~IRQ_LEVEL; @@ -1168,7 +1410,8 @@ static void ioapic_register_intr(int irq, unsigned long trigger) return; } #endif - if (trigger) + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, handle_fasteoi_irq, "fasteoi"); @@ -1303,6 +1546,10 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic, pin); +#ifdef CONFIG_X86_32 + if (multi_timer_check(apic, irq)) + continue; +#endif add_pin_to_irq(irq, apic, pin); setup_IO_APIC_irq(apic, pin, irq, @@ -1360,6 +1607,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; + union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; @@ -1384,6 +1632,8 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); @@ -1399,11 +1649,27 @@ __apicdebuginit(void) print_IO_APIC(void) printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - if (reg_01.bits.version >= 0x10) { + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); } + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && + reg_03.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); + } + printk(KERN_DEBUG ".... IRQ redirection table:\n"); printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" @@ -1475,7 +1741,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base) __apicdebuginit(void) print_local_APIC(void *dummy) { unsigned int v, ver, maxlvt; - unsigned long icr; + u64 icr; if (apic_verbosity == APIC_QUIET) return; @@ -1492,11 +1758,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); - v = apic_read(APIC_PROCPRI); - printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + v = apic_read(APIC_PROCPRI); + printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); + } v = apic_read(APIC_EOI); printk(KERN_DEBUG "... APIC EOI: %08x\n", v); @@ -1516,8 +1784,13 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } icr = apic_icr_read(); printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); @@ -1608,6 +1881,13 @@ void __init enable_IO_APIC(void) int apic; unsigned long flags; +#ifdef CONFIG_X86_32 + int i; + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; +#endif + /* * The number of IO-APIC IRQ registers (== #pins): */ @@ -1636,6 +1916,10 @@ void __init enable_IO_APIC(void) } found_i8259: /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ i8259_pin = find_isa_irq_pin(0, mp_ExtINT); i8259_apic = find_isa_irq_apic(0, mp_ExtINT); /* Trust the MP table if nothing is setup in the hardware */ @@ -1695,6 +1979,122 @@ void disable_IO_APIC(void) disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +#ifdef CONFIG_X86_32 +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc(void) +{ + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) + return; + + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + /* + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. + */ + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mp_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.bits.ID); + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(phys_id_present_map, + mp_ioapics[apic].mp_apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mp_apicid); + for (i = 0; i < get_physical_broadcast(); i++) + if (!physid_isset(i, phys_id_present_map)) + break; + if (i >= get_physical_broadcast()) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + physid_set(i, phys_id_present_map); + mp_ioapics[apic].mp_apicid = i; + } else { + physid_mask_t tmp; + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + apic_printk(APIC_VERBOSE, "Setting %d in the " + "phys_id_present_map\n", + mp_ioapics[apic].mp_apicid); + physids_or(phys_id_present_map, phys_id_present_map, tmp); + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mp_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + apic_printk(APIC_VERBOSE, KERN_INFO + "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mp_apicid); + + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE, " ok.\n"); + } +} +#endif + int no_timer_check __initdata; static int __init notimercheck(char *s) @@ -1780,8 +2180,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { + struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; @@ -1791,6 +2193,14 @@ static int ioapic_retrigger_irq(unsigned int irq) return 1; } +#else +static int ioapic_retrigger_irq(unsigned int irq) +{ + send_IPI_self(irq_cfg(irq)->vector); + + return 1; +} +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1952,7 +2362,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; ack_APIC_irq(); +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); me = smp_processor_id(); @@ -2024,6 +2436,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } +#ifdef CONFIG_X86_64 static void ack_apic_level(unsigned int irq) { int do_unmask_irq = 0; @@ -2076,6 +2489,49 @@ static void ack_apic_level(unsigned int irq) unmask_IO_APIC_irq(irq); } } +#else +atomic_t irq_mis_count; +static void ack_apic_level(unsigned int irq) +{ + unsigned long v; + int i; + + irq_complete_move(irq); + move_native_irq(irq); + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} +#endif static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", @@ -2141,20 +2597,24 @@ static inline void init_IO_APIC_traps(void) } } -static void unmask_lapic_irq(unsigned int irq) +/* + * The local APIC irq-chip implementation: + */ + +static void mask_lapic_irq(unsigned int irq) { unsigned long v; v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); } -static void mask_lapic_irq(unsigned int irq) +static void unmask_lapic_irq(unsigned int irq) { unsigned long v; v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } static void ack_lapic_irq (unsigned int irq) @@ -2182,19 +2642,19 @@ static void lapic_register_intr(int irq) static void __init setup_nmi(void) { /* - * Dirty trick to enable the NMI watchdog ... + * Dirty trick to enable the NMI watchdog ... * We put the 8259A master into AEOI mode and * unmask on all local APICs LVT0 as NMI. * * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') * is from Maciej W. Rozycki - so we do not have to EOI from * the NMI handler or the timer interrupt. - */ - printk(KERN_INFO "activating NMI Watchdog ..."); + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); enable_NMI_through_LVT0(); - printk(" done.\n"); + apic_printk(APIC_VERBOSE, " done.\n"); } /* @@ -2211,12 +2671,17 @@ static inline void __init unlock_ExtINT_logic(void) unsigned char save_control, save_freq_select; pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) { + WARN_ON_ONCE(1); + return; + } apic = find_isa_irq_apic(8, mp_INT); - if (pin == -1) + if (apic == -1) { + WARN_ON_ONCE(1); return; + } entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2268,17 +2733,21 @@ int timer_through_8259 __initdata; * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. * - * FIXME: really need to revamp this for modern platforms only. + * FIXME: really need to revamp this for all platforms. */ static inline void __init check_timer(void) { struct irq_cfg *cfg = irq_cfg(0); int apic1, pin1, apic2, pin2; unsigned long flags; + unsigned int ver; int no_pin1 = 0; local_irq_save(flags); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + /* * get/set the timer IRQ vector: */ @@ -2287,10 +2756,18 @@ static inline void __init check_timer(void) /* * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); +#ifdef CONFIG_X86_32 + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2382,6 +2859,9 @@ static inline void __init check_timer(void) "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -2435,19 +2915,29 @@ out: * the I/O APIC in all cases now. No actual device should request * it anyway. --macro */ -#define PIC_IRQS (1<<2) +#define PIC_IRQS (1 << PIC_CASCADE_IR) void __init setup_IO_APIC(void) { +#ifdef CONFIG_X86_32 + enable_IO_APIC(); +#else /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ +#endif io_apic_irqs = ~PIC_IRQS; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); @@ -3128,7 +3618,93 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) #ifdef CONFIG_ACPI -#define IO_APIC_MAX_ID 0xFE +#ifdef CONFIG_X86_32 +int __init io_apic_get_unique_id(int ioapic, int apic_id) +{ + union IO_APIC_reg_00 reg_00; + static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + physid_mask_t tmp; + unsigned long flags; + int i = 0; + + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (physids_empty(apic_id_map)) + apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= get_physical_broadcast()) { + printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.bits.ID); + apic_id = reg_00.bits.ID; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(apic_id_map, apic_id)) { + + for (i = 0; i < get_physical_broadcast(); i++) { + if (!check_apicid_used(apic_id_map, i)) + break; + } + + if (i == get_physical_broadcast()) + panic("Max apic_id exceeded!\n"); + + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + tmp = apicid_to_cpu_present(apic_id); + physids_or(apic_id_map, apic_id_map, tmp); + + if (reg_00.bits.ID != apic_id) { + reg_00.bits.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } + } + + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + + return apic_id; +} + +int __init io_apic_get_version(int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.version; +} +#endif int __init io_apic_get_redir_entries (int ioapic) { @@ -3226,6 +3802,7 @@ void __init setup_ioapic_dest(void) } #endif +#ifdef CONFIG_X86_64 #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -3261,36 +3838,56 @@ static struct resource * __init ioapic_setup_resources(void) return res; } +#endif void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - struct resource *ioapic_res; int i; +#ifdef CONFIG_X86_64 + struct resource *ioapic_res; ioapic_res = ioapic_setup_resources(); +#endif for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif } else { +#ifdef CONFIG_X86_32 +fake_ioapic_page: +#endif ioapic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %016lx (%016lx)\n", + "mapped IOAPIC to %08lx (%08lx)\n", __fix_to_virt(idx), ioapic_phys); idx++; +#ifdef CONFIG_X86_64 if (ioapic_res != NULL) { ioapic_res->start = ioapic_phys; ioapic_res->end = ioapic_phys + (4 * 1024) - 1; ioapic_res++; } +#endif } } +#ifdef CONFIG_X86_64 static int __init ioapic_insert_resources(void) { int i; @@ -3313,4 +3910,4 @@ static int __init ioapic_insert_resources(void) /* Insert the IO APIC resources after PCI initialization has occured to handle * IO APICS that are mapped in on a BAR in PCI space. */ late_initcall(ioapic_insert_resources); - +#endif -- cgit v1.2.3 From 54168ed7f2a4f3fc2780e645124ae952598da601 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Aug 2008 09:07:45 +0200 Subject: x86: make io_apic_32.c the same as io_apic_64.c Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 1521 ++++++++++++++++++++++++++++++------------ 1 file changed, 1104 insertions(+), 417 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 3ed36041c81..fba6d6ee348 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -35,7 +35,7 @@ #include #include #include -#include /* time_after() */ +#include /* time_after() */ #ifdef CONFIG_ACPI #include #endif @@ -64,8 +64,8 @@ #define __apicdebuginit(type) static type __init /* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes */ int sis_apic_bug = -1; @@ -102,7 +102,7 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; -static int __init parse_noapic(char *arg) +static int __init parse_noapic(char *str) { /* disable IO-APIC */ disable_ioapic_setup(); @@ -188,7 +188,7 @@ static void __init init_work(void *data) irq_cfgx[legacy_count - 1].next = NULL; } -#define for_each_irq_cfg(cfg) \ +#define for_each_irq_cfg(cfg) \ for (cfg = irq_cfgx; cfg; cfg = cfg->next) DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); @@ -262,7 +262,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) irq_cfgx = cfg; cfg->irq = irq; printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); - #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG { /* dump the results */ @@ -384,9 +383,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i */ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { - volatile struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); + struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -494,11 +493,20 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) apic = entry->apic; pin = entry->pin; +#ifdef CONFIG_INTR_REMAP + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); +#else io_apic_write(apic, 0x11 + pin*2, dest); +#endif reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; - io_apic_modify(apic, 0x10 + pin *2, reg); + io_apic_modify(apic, 0x10 + pin*2, reg); if (!entry->next) break; entry = entry->next; @@ -513,6 +521,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) unsigned long flags; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -529,12 +538,12 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) */ dest = SET_APIC_LOGICAL_ID(dest); + desc = irq_to_desc(irq); spin_lock_irqsave(&ioapic_lock, flags); __target_IO_APIC_irq(irq, dest, cfg->vector); - irq_to_desc(irq)->affinity = mask; + desc->affinity = mask; spin_unlock_irqrestore(&ioapic_lock, flags); } - #endif /* CONFIG_SMP */ /* @@ -699,7 +708,7 @@ static void __unmask_and_level_IO_APIC_irq(unsigned int irq) #endif -static void mask_IO_APIC_irq(unsigned int irq) +static void mask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -708,7 +717,7 @@ static void mask_IO_APIC_irq(unsigned int irq) spin_unlock_irqrestore(&ioapic_lock, flags); } -static void unmask_IO_APIC_irq(unsigned int irq) +static void unmask_IO_APIC_irq (unsigned int irq) { unsigned long flags; @@ -725,14 +734,13 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) entry = ioapic_read_entry(apic, pin); if (entry.delivery_mode == dest_SMI) return; - /* * Disable it in the IO-APIC irq-routing table: */ ioapic_mask_entry(apic, pin); } -static void clear_IO_APIC(void) +static void clear_IO_APIC (void) { int apic, pin; @@ -741,7 +749,7 @@ static void clear_IO_APIC(void) clear_IO_APIC_pin(apic, pin); } -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) void send_IPI_self(int vector) { unsigned int cfg; @@ -756,9 +764,9 @@ void send_IPI_self(int vector) */ apic_write(APIC_ICR, cfg); } -#endif /* !CONFIG_SMP */ - +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ +#ifdef CONFIG_X86_32 /* * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to * specific CPU-side IRQs. @@ -797,6 +805,75 @@ static int __init ioapic_pirq_setup(char *str) } __setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} +#endif /* * Find the IRQ entry number of a certain pin. @@ -848,7 +925,7 @@ static int __init find_isa_irq_apic(int irq, int type) } if (i < mp_irq_entries) { int apic; - for (apic = 0; apic < nr_ioapics; apic++) { + for(apic = 0; apic < nr_ioapics; apic++) { if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) return apic; } @@ -867,10 +944,10 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) { int apic, i, best_guess = -1; - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, " - "slot:%d, pin:%d.\n", bus, slot, pin); + apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); if (test_bit(bus, mp_bus_not_pci)) { - printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); return -1; } for (i = 0; i < mp_irq_entries; i++) { @@ -885,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) !mp_irqs[i].mp_irqtype && (bus == lbus) && (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq); + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); if (!(apic || IO_APIC_IRQ(irq))) continue; @@ -902,6 +979,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) } return best_guess; } + EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) @@ -918,6 +996,7 @@ static int EISA_ELCR(unsigned int irq) "Broken MPtable reports ISA irq %d\n", irq); return 0; } + #endif /* ISA interrupts are always polarity zero edge triggered, @@ -954,36 +1033,36 @@ static int MPBIOS_polarity(int idx) /* * Determine IRQ line polarity (high active or low active): */ - switch (mp_irqs[idx].mp_irqflag & 3) { - case 0: /* conforms, ie. bus-type dependent polarity */ - { - polarity = test_bit(bus, mp_bus_not_pci)? - default_ISA_polarity(idx): - default_PCI_polarity(idx); - break; - } - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ + switch (mp_irqs[idx].mp_irqflag & 3) { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } + case 0: /* conforms, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) + polarity = default_ISA_polarity(idx); + else + polarity = default_PCI_polarity(idx); + break; + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } } return polarity; } @@ -996,67 +1075,67 @@ static int MPBIOS_trigger(int idx) /* * Determine IRQ trigger mode (edge or level sensitive): */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { - case 0: /* conforms, ie. bus-type dependent */ + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) { - trigger = test_bit(bus, mp_bus_not_pci)? - default_ISA_trigger(idx): - default_PCI_trigger(idx); + case 0: /* conforms, ie. bus-type dependent */ + if (test_bit(bus, mp_bus_not_pci)) + trigger = default_ISA_trigger(idx); + else + trigger = default_PCI_trigger(idx); #if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif break; - } - case MP_BUS_EISA: /* EISA pin */ + case 1: /* edge */ { - trigger = default_EISA_trigger(idx); + trigger = 0; break; } - case MP_BUS_PCI: /* PCI pin */ + case 2: /* reserved */ { - /* set before the switch */ + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; break; } - case MP_BUS_MCA: /* MCA pin */ + case 3: /* level */ { - trigger = default_MCA_trigger(idx); + trigger = 1; break; } - default: + default: /* invalid */ { printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; + trigger = 0; break; } } -#endif - break; - } - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } return trigger; } @@ -1082,9 +1161,9 @@ static int pin_2_irq(int idx, int apic, int pin) if (mp_irqs[idx].mp_dstirq != pin) printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - if (test_bit(bus, mp_bus_not_pci)) + if (test_bit(bus, mp_bus_not_pci)) { irq = mp_irqs[idx].mp_srcbusirq; - else { + } else { /* * PCI IRQs are mapped in order */ @@ -1092,14 +1171,14 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; - - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } +#ifdef CONFIG_X86_32 /* * PCI IRQ command line redirection. Yes, limits are hardcoded. */ @@ -1116,6 +1195,8 @@ static int pin_2_irq(int idx, int apic, int pin) } } } +#endif + return irq; } @@ -1145,74 +1226,70 @@ static int __assign_irq_vector(int irq, cpumask_t mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - unsigned int old_vector; - int cpu; - struct irq_cfg *cfg; + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + unsigned int old_vector; + int cpu; + struct irq_cfg *cfg; - cfg = irq_cfg(irq); + cfg = irq_cfg(irq); - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; - old_vector = cfg->vector; - if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) - return 0; - } + old_vector = cfg->vector; + if (old_vector) { + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) + return 0; + } - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; - int new_cpu; - int vector, offset; + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; + int new_cpu; + int vector, offset; - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); - vector = current_vector; - offset = current_offset; + vector = current_vector; + offset = current_offset; next: - vector += 8; - if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; + vector += 8; + if (vector >= first_system_vector) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(current_vector == vector)) + continue; #ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; + if (vector == IA32_SYSCALL_VECTOR) + goto next; #else - if (vector == SYSCALL_VECTOR) - goto next; + if (vector == SYSCALL_VECTOR) + goto next; #endif - for_each_cpu_mask_nr(new_cpu, new_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - } - printk(KERN_DEBUG "assign_irq_vector: irq %d vector %#x cpu ", irq, vector); - for_each_cpu_mask_nr(new_cpu, new_mask) { - per_cpu(vector_irq, new_cpu)[vector] = irq; - printk(KERN_CONT " %d ", new_cpu); + for_each_cpu_mask_nr(new_cpu, new_mask) + if (per_cpu(vector_irq, new_cpu)[vector] != -1) + goto next; + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; } - printk(KERN_CONT "\n"); - cfg->vector = vector; - cfg->domain = domain; - return 0; - } - return -ENOSPC; + for_each_cpu_mask_nr(new_cpu, new_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + return 0; + } + return -ENOSPC; } static int assign_irq_vector(int irq, cpumask_t mask) @@ -1223,7 +1300,6 @@ static int assign_irq_vector(int irq, cpumask_t mask) spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, mask); spin_unlock_irqrestore(&vector_lock, flags); - return err; } @@ -1269,36 +1345,39 @@ void __setup_vector_irq(int cpu) cfg = irq_cfg(irq); if (!cpu_isset(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; - } + } } static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { - int apic, idx, pin; + int apic, idx, pin; - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; } #else static inline int IO_APIC_irq_trigger(int irq) { - return 1; + return 1; } #endif @@ -1318,13 +1397,27 @@ static void ioapic_register_intr(int irq, unsigned long trigger) else desc->status &= ~IRQ_LEVEL; +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + desc->status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, "fasteoi"); + handle_fasteoi_irq, + "fasteoi"); else set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); + handle_edge_irq, "edge"); } static int setup_ioapic_entry(int apic, int irq, @@ -1337,11 +1430,45 @@ static int setup_ioapic_entry(int apic, int irq, */ memset(entry,0,sizeof(*entry)); - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; + } - entry->mask = 0; /* enable IRQ */ + entry->mask = 0; /* enable IRQ */ entry->trigger = trigger; entry->polarity = polarity; entry->vector = vector; @@ -1351,12 +1478,11 @@ static int setup_ioapic_entry(int apic, int irq, */ if (trigger) entry->mask = 1; - return 0; } static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, - int trigger, int polarity) + int trigger, int polarity) { struct irq_cfg *cfg; struct IO_APIC_route_entry entry; @@ -1420,10 +1546,10 @@ static void __init setup_IO_APIC_irqs(void) } irq = pin_2_irq(idx, apic, pin); - +#ifdef CONFIG_X86_32 if (multi_timer_check(apic, irq)) continue; - +#endif add_pin_to_irq(irq, apic, pin); setup_IO_APIC_irq(apic, pin, irq, @@ -1443,6 +1569,11 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, { struct IO_APIC_route_entry entry; +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + return; +#endif + memset(&entry, 0, sizeof(entry)); /* @@ -1461,7 +1592,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, * The timer IRQ doesn't have to know that behind the * scene we may have a 8259A-master in AEOI mode ... */ - ioapic_register_intr(0, IOAPIC_EDGE); + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); /* * Add it to the IO-APIC irq-routing table: @@ -1501,17 +1632,18 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); + printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); @@ -1548,7 +1680,10 @@ __apicdebuginit(void) print_IO_APIC(void) entry = ioapic_read_entry(apic, i); - printk(KERN_DEBUG " %02x %02X ", i, entry.dest); + printk(KERN_DEBUG " %02x %03X ", + i, + entry.dest + ); printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", entry.mask, @@ -1567,7 +1702,7 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", i); + printk(KERN_DEBUG "IRQ%d ", cfg->irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -1614,8 +1749,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); - printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, - GET_APIC_ID(v)); + printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id()); v = apic_read(APIC_LVR); printk(KERN_INFO "... APIC VERSION: %08x\n", v); ver = GET_APIC_VERSION(v); @@ -1624,7 +1758,7 @@ __apicdebuginit(void) print_local_APIC(void *dummy) v = apic_read(APIC_TASKPRI); printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (APIC_INTEGRATED(ver)) { /* !82489DX */ v = apic_read(APIC_ARBPRI); printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, v & APIC_ARBPRI_MASK); @@ -1650,9 +1784,10 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC IRR field:\n"); print_APIC_bitfield(APIC_IRR); - if (APIC_INTEGRATED(ver)) { /* !82489DX */ - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + if (APIC_INTEGRATED(ver)) { /* !82489DX */ + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ apic_write(APIC_ESR, 0); + v = apic_read(APIC_ESR); printk(KERN_DEBUG "... APIC ESR: %08x\n", v); } @@ -1710,11 +1845,11 @@ __apicdebuginit(void) print_PIC(void) v = inb(0xa0) << 8 | inb(0x20); printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - outb(0x0b, 0xa0); - outb(0x0b, 0x20); + outb(0x0b,0xa0); + outb(0x0b,0x20); v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a, 0xa0); - outb(0x0a, 0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); spin_unlock_irqrestore(&i8259A_lock, flags); @@ -1739,16 +1874,19 @@ fs_initcall(print_all_ICs); /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -static void __init enable_IO_APIC(void) +void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; int i8259_apic, i8259_pin; - int i, apic; + int apic; unsigned long flags; +#ifdef CONFIG_X86_32 + int i; if (!pirqs_enabled) for (i = 0; i < MAX_PIRQS; i++) pirq_entries[i] = -1; +#endif /* * The number of IO-APIC IRQ registers (== #pins): @@ -1759,7 +1897,7 @@ static void __init enable_IO_APIC(void) spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } - for (apic = 0; apic < nr_ioapics; apic++) { + for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { @@ -1830,16 +1968,18 @@ void disable_IO_APIC(void) entry.dest_mode = 0; /* Physical */ entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; - entry.dest = read_apic_id(); + entry.dest = read_apic_id(); /* * Add it to the IO-APIC irq-routing table: */ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } + disconnect_bsp_APIC(ioapic_i8259.pin != -1); } +#ifdef CONFIG_X86_32 /* * function to set the IO-APIC physical IDs based on the * values stored in the MPC table. @@ -1940,8 +2080,6 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check @@ -1955,6 +2093,7 @@ static void __init setup_ioapic_ids_from_mpc(void) apic_printk(APIC_VERBOSE, " ok.\n"); } } +#endif int no_timer_check __initdata; @@ -1994,9 +2133,10 @@ static int __init timer_irq_works(void) * might have cached one ExtINT interrupt. Finally, at * least one tick may be lost due to delays. */ + + /* jiffies wrap? */ if (time_after(jiffies, t1 + 4)) return 1; - return 0; } @@ -2014,8 +2154,6 @@ static int __init timer_irq_works(void) */ /* - * Startup quirk: - * * Starting up a edge-triggered IO-APIC interrupt is * nasty - we need to make sure that we get the edge. * If it is already asserted for some reason, we need @@ -2023,9 +2161,8 @@ static int __init timer_irq_works(void) * * This is not complete - we should be able to fake * an edge even if it isn't on the 8259A... - * - * (We do this for level-triggered IRQs too - it cannot hurt.) */ + static unsigned int startup_ioapic_irq(unsigned int irq) { int was_pending = 0; @@ -2043,70 +2180,254 @@ static unsigned int startup_ioapic_irq(unsigned int irq) return was_pending; } +#ifdef CONFIG_X86_64 static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + + struct irq_cfg *cfg = irq_cfg(irq); + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); return 1; } - -#ifdef CONFIG_SMP -asmlinkage void smp_irq_move_cleanup_interrupt(void) +#else +static int ioapic_retrigger_irq(unsigned int irq) { - unsigned vector, me; - ack_APIC_irq(); - irq_enter(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + send_IPI_self(irq_cfg(irq)->vector); - desc = irq_to_desc(irq); - if (!desc) - continue; + return 1; +} +#endif - cfg = irq_cfg(irq); - spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) - goto unlock; +#ifdef CONFIG_SMP - __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; -unlock: - spin_unlock(&desc->lock); - } +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); - irq_exit(); -} +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); -static void irq_complete_move(unsigned int irq) +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) { - struct irq_cfg *cfg = irq_cfg(irq); - unsigned vector, me; + struct irq_cfg *cfg; + struct irq_desc *desc; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte; + unsigned int dest; + unsigned long flags; - if (likely(!cfg->move_in_progress)) + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) return; - vector = ~get_irq_regs()->orig_ax; - me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; + if (get_irte(irq, &irte)) + return; - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc->affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, desc->pending_mask); + + ret = 0; + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + unsigned int irq; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, desc->pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); cfg->move_in_progress = 0; } } #else static inline void irq_complete_move(unsigned int irq) {} #endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif static void ack_apic_edge(unsigned int irq) { @@ -2118,55 +2439,55 @@ static void ack_apic_edge(unsigned int irq) #ifdef CONFIG_X86_64 static void ack_apic_level(unsigned int irq) { - int do_unmask_irq = 0; + int do_unmask_irq = 0; - irq_complete_move(irq); + irq_complete_move(irq); #ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_IO_APIC_irq(irq); - } + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } #endif - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(irq)) - move_masked_irq(irq, desc); - unmask_IO_APIC_irq(irq); - } + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propagate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq); + unmask_IO_APIC_irq(irq); + } } #else atomic_t irq_mis_count; @@ -2177,25 +2498,25 @@ static void ack_apic_level(unsigned int irq) irq_complete_move(irq); move_native_irq(irq); -/* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ i = irq_cfg(irq)->vector; v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); @@ -2225,6 +2546,20 @@ static struct irq_chip ioapic_chip __read_mostly = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif static inline void init_IO_APIC_traps(void) { @@ -2282,7 +2617,7 @@ static void unmask_lapic_irq(unsigned int irq) apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); } -static void ack_lapic_irq(unsigned int irq) +static void ack_lapic_irq (unsigned int irq) { ack_APIC_irq(); } @@ -2383,12 +2718,12 @@ static inline void __init unlock_ExtINT_logic(void) static int disable_timer_pin_1 __initdata; /* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init parse_disable_timer_pin_1(char *arg) +static int __init disable_timer_pin_setup(char *arg) { disable_timer_pin_1 = 1; return 0; } -early_param("disable_timer_pin_1", parse_disable_timer_pin_1); +early_param("disable_timer_pin_1", disable_timer_pin_setup); int timer_through_8259 __initdata; @@ -2397,6 +2732,8 @@ int timer_through_8259 __initdata; * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for all platforms. */ static inline void __init check_timer(void) { @@ -2408,8 +2745,8 @@ static inline void __init check_timer(void) local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2428,7 +2765,9 @@ static inline void __init check_timer(void) */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); +#ifdef CONFIG_X86_32 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2447,6 +2786,10 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); +#endif pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2473,6 +2816,10 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " @@ -2512,7 +2859,9 @@ static inline void __init check_timer(void) "through the IO-APIC - disabling NMI Watchdog!\n"); nmi_watchdog = NMI_NONE; } +#ifdef CONFIG_X86_32 timer_ack = 0; +#endif apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -2570,17 +2919,25 @@ out: void __init setup_IO_APIC(void) { + +#ifdef CONFIG_X86_32 enable_IO_APIC(); +#else + /* + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ +#endif io_apic_irqs = ~PIC_IRQS; - printk("ENABLING IO-APIC IRQs\n"); - - /* - * Set up IO-APIC IRQ routing. - */ - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); @@ -2588,15 +2945,15 @@ void __init setup_IO_APIC(void) } /* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path */ static int __init io_apic_bug_finalize(void) { - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; } late_initcall(io_apic_bug_finalize); @@ -2605,7 +2962,7 @@ struct sysfs_ioapic_data { struct sys_device dev; struct IO_APIC_route_entry entry[0]; }; -static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS]; +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; static int ioapic_suspend(struct sys_device *dev, pm_message_t state) { @@ -2615,8 +2972,8 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state) data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - entry[i] = ioapic_read_entry(dev->id, i); + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) + *entry = ioapic_read_entry(dev->id, i); return 0; } @@ -2653,14 +3010,14 @@ static struct sysdev_class ioapic_sysdev_class = { static int __init ioapic_init_sysfs(void) { - struct sys_device *dev; - int i, size, error = 0; + struct sys_device * dev; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) return error; - for (i = 0; i < nr_ioapics; i++) { + for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); @@ -2691,18 +3048,18 @@ device_initcall(ioapic_init_sysfs); unsigned int create_irq_nr(unsigned int irq_want) { /* Allocate an unused irq */ - unsigned int irq, new; + unsigned int irq; + unsigned int new; unsigned long flags; struct irq_cfg *cfg_new; #ifndef CONFIG_HAVE_SPARSE_IRQ - /* only can use bus/dev/fn.. when per_cpu vector is used */ irq_want = nr_irqs - 1; #endif irq = 0; spin_lock_irqsave(&vector_lock, flags); - for (new = (nr_irqs - 1); new > 0; new--) { + for (new = irq_want; new > 0; new--) { if (platform_legacy_irq(new)) continue; cfg_new = irq_cfg(new); @@ -2725,7 +3082,14 @@ unsigned int create_irq_nr(unsigned int irq_want) int create_irq(void) { - return create_irq_nr(nr_irqs - 1); + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; } void destroy_irq(unsigned int irq) @@ -2734,6 +3098,9 @@ void destroy_irq(unsigned int irq) dynamic_irq_cleanup(irq); +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq); spin_unlock_irqrestore(&vector_lock, flags); @@ -2759,25 +3126,54 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms cpus_and(tmp, cfg->domain, tmp); dest = cpu_mask_to_apicid(tmp); - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); + } return err; } @@ -2788,6 +3184,7 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) struct msi_msg msg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2808,8 +3205,61 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); - irq_to_desc(irq)->affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif #endif /* CONFIG_SMP */ /* @@ -2827,6 +3277,45 @@ static struct irq_chip msi_chip = { .retrigger = ioapic_retrigger_irq, }; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) { @@ -2840,7 +3329,17 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) set_irq_msi(irq, desc); write_msi_msg(irq, &msg); - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_to_desc(irq); + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); return 0; } @@ -2859,59 +3358,164 @@ static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) { - int irq, ret; - + unsigned int irq; + int ret; unsigned int irq_want; irq_want = build_irq_for_pci_dev(dev) + 0x100; irq = create_irq_nr(irq_want); - if (irq == 0) return -1; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif ret = setup_msi_irq(dev, desc, irq); if (ret < 0) { destroy_irq(irq); return ret; - } - + } return 0; + +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif } int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - unsigned int irq; - int ret, sub_handle; - struct msi_desc *desc; - unsigned int irq_want; - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); - if (irq == 0) - return -1; - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; + unsigned int irq; + int ret, sub_handle; + struct msi_desc *desc; + unsigned int irq_want; + +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; error: - destroy_irq(irq); - return ret; + destroy_irq(irq); + return ret; } - void arch_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ @@ -2938,6 +3542,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) struct irq_cfg *cfg; unsigned int dest; cpumask_t tmp; + struct irq_desc *desc; cpus_and(tmp, mask, cpu_online_map); if (cpus_empty(tmp)) @@ -2951,7 +3556,8 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) dest = cpu_mask_to_apicid(tmp); target_ht_irq(irq, dest, cfg->vector); - irq_to_desc(irq)->affinity = mask; + desc = irq_to_desc(irq); + desc->affinity = mask; } #endif @@ -2974,7 +3580,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) tmp = TARGET_CPUS; err = assign_irq_vector(irq, tmp); - if ( !err) { + if (!err) { struct ht_irq_msg msg; unsigned dest; @@ -3007,11 +3613,12 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) #endif /* CONFIG_HT_IRQ */ /* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration + ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ #ifdef CONFIG_ACPI +#ifdef CONFIG_X86_32 int __init io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; @@ -3086,7 +3693,6 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } - int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; @@ -3098,9 +3704,9 @@ int __init io_apic_get_version(int ioapic) return reg_01.bits.version; } +#endif - -int __init io_apic_get_redir_entries(int ioapic) +int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3113,10 +3719,10 @@ int __init io_apic_get_redir_entries(int ioapic) } -int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int polarity) +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { if (!IO_APIC_IRQ(irq)) { - printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", ioapic); return -EINVAL; } @@ -3132,6 +3738,7 @@ int io_apic_set_pci_routing(int ioapic, int pin, int irq, int triggering, int po return 0; } + int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) { int i; @@ -3163,7 +3770,6 @@ void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; struct irq_cfg *cfg; - struct irq_desc *desc; if (skip_ioapic_setup == 1) return; @@ -3184,43 +3790,124 @@ void __init setup_ioapic_dest(void) setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); - else { - desc = irq_to_desc(irq); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + else set_ioapic_affinity_irq(irq, TARGET_CPUS); - } } } } #endif +#ifdef CONFIG_X86_64 +#define IOAPIC_RESOURCE_NAME_SIZE 11 + +static struct resource *ioapic_resources; + +static struct resource * __init ioapic_setup_resources(void) +{ + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} +#endif + void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; +#ifdef CONFIG_X86_64 + struct resource *ioapic_res; + ioapic_res = ioapic_setup_resources(); +#endif for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif } else { +#ifdef CONFIG_X86_32 fake_ioapic_page: +#endif ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, + "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); idx++; + +#ifdef CONFIG_X86_64 + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } +#endif } } +#ifdef CONFIG_X86_64 +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk(KERN_ERR + "IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif -- cgit v1.2.3 From 26d347c2c035b1f4c5b3c5094f3046db9ec920f5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:42 -0700 Subject: rename io_apic_64.c and io_apic_32.c to io_apic.c The two files are now line by line equal. (sans a printk) Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 3913 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/io_apic_32.c | 3913 ------------------------------------------ arch/x86/kernel/io_apic_64.c | 3913 ------------------------------------------ 4 files changed, 3914 insertions(+), 7827 deletions(-) create mode 100644 arch/x86/kernel/io_apic.c delete mode 100644 arch/x86/kernel/io_apic_32.c delete mode 100644 arch/x86/kernel/io_apic_64.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0d41f0343dc..7264f9c3af8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -61,7 +61,7 @@ obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o -obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o +obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c new file mode 100644 index 00000000000..fba6d6ee348 --- /dev/null +++ b/arch/x86/kernel/io_apic.c @@ -0,0 +1,3913 @@ +/* + * Intel IO-APIC support for multi-Pentium hosts. + * + * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo + * + * Many thanks to Stig Venaas for trying out countless experimental + * patches and reporting/debugging problems patiently! + * + * (c) 1999, Multiple IO-APIC support, developed by + * Ken-ichi Yaku and + * Hidemi Kishimoto , + * further tested and cleaned up by Zach Brown + * and Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively + * Paul Diefenbaugh : Added full ACPI support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* time_after() */ +#ifdef CONFIG_ACPI +#include +#endif +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define __apicdebuginit(type) static type __init + +/* + * Is the SiS APIC rmw bug present ? + * -1 = don't know, 0 = no, 1 = yes + */ +int sis_apic_bug = -1; + +static DEFINE_SPINLOCK(ioapic_lock); +static DEFINE_SPINLOCK(vector_lock); + +int first_free_entry; +/* + * Rough estimation of how many shared IRQs there are, can + * be changed anytime. + */ +int pin_map_size; + +/* + * # of IRQ routing registers + */ +int nr_ioapic_registers[MAX_IO_APICS]; + +/* I/O APIC entries */ +struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; +int nr_ioapics; + +/* MP IRQ source entries */ +struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; + +/* # of MP IRQ source entries */ +int mp_irq_entries; + +#if defined (CONFIG_MCA) || defined (CONFIG_EISA) +int mp_bus_id_to_type[MAX_MP_BUSSES]; +#endif + +DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); + +int skip_ioapic_setup; + +static int __init parse_noapic(char *str) +{ + /* disable IO-APIC */ + disable_ioapic_setup(); + return 0; +} +early_param("noapic", parse_noapic); + +struct irq_cfg; +struct irq_pin_list; +struct irq_cfg { + unsigned int irq; + struct irq_cfg *next; + struct irq_pin_list *irq_2_pin; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; + u8 vector; + u8 move_in_progress : 1; +}; + +/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ +static struct irq_cfg irq_cfg_legacy[] __initdata = { + [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, + [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, + [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, + [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, + [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, + [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, + [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, + [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, + [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, + [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, + [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, + [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, + [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, + [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, + [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, + [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, +}; + +static struct irq_cfg irq_cfg_init = { .irq = -1U, }; +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; +} + +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +static void init_one_irq_cfg(struct irq_cfg *cfg) +{ + memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); +} + +static struct irq_cfg *irq_cfgx; +static struct irq_cfg *irq_cfgx_free; +static void __init init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_cfg *cfg; + int legacy_count; + int i; + + cfg = *da->name; + + memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); + + legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + for (i = legacy_count; i < *da->nr; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < *da->nr; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = &irq_cfgx[legacy_count]; + irq_cfgx[legacy_count - 1].next = NULL; +} + +#define for_each_irq_cfg(cfg) \ + for (cfg = irq_cfgx; cfg; cfg = cfg->next) + +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); + +static struct irq_cfg *irq_cfg(unsigned int irq) +{ + struct irq_cfg *cfg; + + cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + cfg = cfg->next; + } + + return NULL; +} + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + struct irq_cfg *cfg, *cfg_pri; + int i; + int count = 0; + + cfg_pri = cfg = irq_cfgx; + while (cfg) { + if (cfg->irq == irq) + return cfg; + + cfg_pri = cfg; + cfg = cfg->next; + count++; + } + + if (!irq_cfgx_free) { + unsigned long phys; + unsigned long total_bytes; + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); + + total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; + if (after_bootmem) + cfg = kzalloc(total_bytes, GFP_ATOMIC); + else + cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); + + if (!cfg) + panic("please boot with nr_irq_cfg= %d\n", count * 2); + + phys = __pa(cfg); + printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); + + for (i = 0; i < nr_irq_cfg; i++) + init_one_irq_cfg(&cfg[i]); + + for (i = 1; i < nr_irq_cfg; i++) + cfg[i-1].next = &cfg[i]; + + irq_cfgx_free = cfg; + } + + cfg = irq_cfgx_free; + irq_cfgx_free = irq_cfgx_free->next; + cfg->next = NULL; + if (cfg_pri) + cfg_pri->next = cfg; + else + irq_cfgx = cfg; + cfg->irq = irq; + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); +#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG + { + /* dump the results */ + struct irq_cfg *cfg; + unsigned long phys; + unsigned long bytes = sizeof(struct irq_cfg); + + printk(KERN_DEBUG "=========================== %d\n", irq); + printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); + for_each_irq_cfg(cfg) { + phys = __pa(cfg); + printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); + } + printk(KERN_DEBUG "===========================\n"); + } +#endif + return cfg; +} + +/* + * This is performance-critical, we want to do it O(1) + * + * the indexing order of this array favors 1:1 mappings + * between pins and IRQs. + */ + +struct irq_pin_list { + int apic, pin; + struct irq_pin_list *next; +}; + +static struct irq_pin_list *irq_2_pin_head; +/* fill one page ? */ +static int nr_irq_2_pin = 0x100; +static struct irq_pin_list *irq_2_pin_ptr; +static void __init irq_2_pin_init_work(void *data) +{ + struct dyn_array *da = data; + struct irq_pin_list *pin; + int i; + + pin = *da->name; + + for (i = 1; i < *da->nr; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = &pin[0]; +} +DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); + +static struct irq_pin_list *get_one_free_irq_2_pin(void) +{ + struct irq_pin_list *pin; + int i; + + pin = irq_2_pin_ptr; + + if (pin) { + irq_2_pin_ptr = pin->next; + pin->next = NULL; + return pin; + } + + /* + * we run out of pre-allocate ones, allocate more + */ + printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); + + if (after_bootmem) + pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, + GFP_ATOMIC); + else + pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * + nr_irq_2_pin, PAGE_SIZE, 0); + + if (!pin) + panic("can not get more irq_2_pin\n"); + + for (i = 1; i < nr_irq_2_pin; i++) + pin[i-1].next = &pin[i]; + + irq_2_pin_ptr = pin->next; + pin->next = NULL; + + return pin; +} + +struct io_apic { + unsigned int index; + unsigned int unused[3]; + unsigned int data; +}; + +static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) +{ + return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) + + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); +} + +static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); + return readl(&io_apic->data); +} + +static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + writel(reg, &io_apic->index); + writel(value, &io_apic->data); +} + +/* + * Re-write a value: to be used for read-modify-write + * cycles where the read already set up the index register. + * + * Older SiS APIC requires we rewrite the index register + */ +static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + if (sis_apic_bug) + writel(reg, &io_apic->index); + writel(value, &io_apic->data); +} + +#ifdef CONFIG_X86_64 +static bool io_apic_level_ack_pending(unsigned int irq) +{ + struct irq_pin_list *entry; + unsigned long flags; + struct irq_cfg *cfg = irq_cfg(irq); + + spin_lock_irqsave(&ioapic_lock, flags); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + int pin; + + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + /* Is the remote IRR bit set? */ + if (reg & IO_APIC_REDIR_REMOTE_IRR) { + spin_unlock_irqrestore(&ioapic_lock, flags); + return true; + } + if (!entry->next) + break; + entry = entry->next; + } + spin_unlock_irqrestore(&ioapic_lock, flags); + + return false; +} +#endif + +union entry_union { + struct { u32 w1, w2; }; + struct IO_APIC_route_entry entry; +}; + +static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +{ + union entry_union eu; + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + return eu.entry; +} + +/* + * When we write a new IO APIC routing entry, we need to write the high + * word first! If the mask bit in the low word is clear, we will enable + * the interrupt, and we need to make sure the entry is fully populated + * before that happens. + */ +static void +__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + union entry_union eu; + eu.entry = e; + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); +} + +static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) +{ + unsigned long flags; + spin_lock_irqsave(&ioapic_lock, flags); + __ioapic_write_entry(apic, pin, e); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +/* + * When we mask an IO APIC routing entry, we need to write the low + * word first, in order to set the mask bit before we change the + * high bits! + */ +static void ioapic_mask_entry(int apic, int pin) +{ + unsigned long flags; + union entry_union eu = { .entry.mask = 1 }; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0x10 + 2*pin, eu.w1); + io_apic_write(apic, 0x11 + 2*pin, eu.w2); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +#ifdef CONFIG_SMP +static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + int apic, pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + unsigned int reg; + + if (!entry) + break; + + apic = entry->apic; + pin = entry->pin; +#ifdef CONFIG_INTR_REMAP + /* + * With interrupt-remapping, destination information comes + * from interrupt-remapping table entry. + */ + if (!irq_remapped(irq)) + io_apic_write(apic, 0x11 + pin*2, dest); +#else + io_apic_write(apic, 0x11 + pin*2, dest); +#endif + reg = io_apic_read(apic, 0x10 + pin*2); + reg &= ~IO_APIC_REDIR_VECTOR_MASK; + reg |= vector; + io_apic_modify(apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +static int assign_irq_vector(int irq, cpumask_t mask); + +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + cfg = irq_cfg(irq); + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + desc = irq_to_desc(irq); + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + desc->affinity = mask; + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif /* CONFIG_SMP */ + +/* + * The common case is 1:1 IRQ<->pin mappings. Sometimes there are + * shared ISA-space IRQs, so we have to support them. We are super + * fast in the common case, and fast for shared ISA-space IRQs. + */ +static void add_pin_to_irq(unsigned int irq, int apic, int pin) +{ + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + /* first time to refer irq_cfg, so with new */ + cfg = irq_cfg_alloc(irq); + entry = cfg->irq_2_pin; + if (!entry) { + entry = get_one_free_irq_2_pin(); + cfg->irq_2_pin = entry; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); + return; + } + + while (entry->next) { + /* not again, please */ + if (entry->apic == apic && entry->pin == pin) + return; + + entry = entry->next; + } + + entry->next = get_one_free_irq_2_pin(); + entry = entry->next; + entry->apic = apic; + entry->pin = pin; + printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); +} + +/* + * Reroute an IRQ to a different pin. + */ +static void __init replace_pin_at_irq(unsigned int irq, + int oldapic, int oldpin, + int newapic, int newpin) +{ + struct irq_cfg *cfg = irq_cfg(irq); + struct irq_pin_list *entry = cfg->irq_2_pin; + int replaced = 0; + + while (entry) { + if (entry->apic == oldapic && entry->pin == oldpin) { + entry->apic = newapic; + entry->pin = newpin; + replaced = 1; + /* every one is different, right? */ + break; + } + entry = entry->next; + } + + /* why? call replace before add? */ + if (!replaced) + add_pin_to_irq(irq, newapic, newpin); +} + +#ifdef CONFIG_X86_64 +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) +{ + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); +} + +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_cfg *cfg; \ + struct irq_pin_list *entry; \ + \ + cfg = irq_cfg(irq); \ + entry = cfg->irq_2_pin; \ + for (;;) { \ + unsigned int reg; \ + if (!entry) \ + break; \ + pin = entry->pin; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ + FINAL; \ + if (!entry->next) \ + break; \ + entry = entry->next; \ + } \ +} + +#define DO_ACTION(name,R,ACTION, FINAL) \ + \ + static void name##_IO_APIC_irq (unsigned int irq) \ + __DO_ACTION(R, ACTION, FINAL) + +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + +/* mask = 0 */ +DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) + +#else + +static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +{ + struct irq_cfg *cfg; + struct irq_pin_list *entry; + unsigned int pin, reg; + + cfg = irq_cfg(irq); + entry = cfg->irq_2_pin; + for (;;) { + if (!entry) + break; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin*2); + reg &= ~disable; + reg |= enable; + io_apic_modify(entry->apic, 0x10 + pin*2, reg); + if (!entry->next) + break; + entry = entry->next; + } +} + +/* mask = 1 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); +} + +/* mask = 0 */ +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); +} + +/* mask = 1, trigger = 0 */ +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER); +} + +/* mask = 0, trigger = 1 */ +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED); +} + +#endif + +static void mask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __mask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void unmask_IO_APIC_irq (unsigned int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) +{ + struct IO_APIC_route_entry entry; + + /* Check delivery_mode to be sure we're not clearing an SMI pin */ + entry = ioapic_read_entry(apic, pin); + if (entry.delivery_mode == dest_SMI) + return; + /* + * Disable it in the IO-APIC irq-routing table: + */ + ioapic_mask_entry(apic, pin); +} + +static void clear_IO_APIC (void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + clear_IO_APIC_pin(apic, pin); +} + +#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) +void send_IPI_self(int vector) +{ + unsigned int cfg; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; + /* + * Send the IPI. The write to APIC_ICR fires this off. + */ + apic_write(APIC_ICR, cfg); +} +#endif /* !CONFIG_SMP && CONFIG_X86_32*/ + +#ifdef CONFIG_X86_32 +/* + * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to + * specific CPU-side IRQs. + */ + +#define MAX_PIRQS 8 +static int pirq_entries [MAX_PIRQS]; +static int pirqs_enabled; + +static int __init ioapic_pirq_setup(char *str) +{ + int i, max; + int ints[MAX_PIRQS+1]; + + get_options(str, ARRAY_SIZE(ints), ints); + + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; + + pirqs_enabled = 1; + apic_printk(APIC_VERBOSE, KERN_INFO + "PIRQ redirection, working around broken MP-BIOS.\n"); + max = MAX_PIRQS; + if (ints[0] < MAX_PIRQS) + max = ints[0]; + + for (i = 0; i < max; i++) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); + /* + * PIRQs are mapped upside down, usually. + */ + pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; + } + return 1; +} + +__setup("pirq=", ioapic_pirq_setup); +#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_INTR_REMAP +/* I/O APIC RTE contents at the OS boot up */ +static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; + +/* + * Saves and masks all the unmasked IO-APIC RTE's + */ +int save_mask_IO_APIC_setup(void) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + int apic, pin; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + + for (apic = 0; apic < nr_ioapics; apic++) { + early_ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_KERNEL); + if (!early_ioapic_entries[apic]) + return -ENOMEM; + } + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + + entry = early_ioapic_entries[apic][pin] = + ioapic_read_entry(apic, pin); + if (!entry.mask) { + entry.mask = 1; + ioapic_write_entry(apic, pin, entry); + } + } + return 0; +} + +void restore_IO_APIC_setup(void) +{ + int apic, pin; + + for (apic = 0; apic < nr_ioapics; apic++) + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) + ioapic_write_entry(apic, pin, + early_ioapic_entries[apic][pin]); +} + +void reinit_intr_remapped_IO_APIC(int intr_remapping) +{ + /* + * for now plain restore of previous settings. + * TBD: In the case of OS enabling interrupt-remapping, + * IO-APIC RTE's need to be setup to point to interrupt-remapping + * table entries. for now, do a plain restore, and wait for + * the setup_IO_APIC_irqs() to do proper initialization. + */ + restore_IO_APIC_setup(); +} +#endif + +/* + * Find the IRQ entry number of a certain pin. + */ +static int find_irq_entry(int apic, int pin, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_irqtype == type && + (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) && + mp_irqs[i].mp_dstirq == pin) + return i; + + return -1; +} + +/* + * Find the pin to which IRQ[irq] (ISA) is connected + */ +static int __init find_isa_irq_pin(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mp_srcbus; + + if (test_bit(lbus, mp_bus_not_pci) && + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) + + return mp_irqs[i].mp_dstirq; + } + return -1; +} + +static int __init find_isa_irq_apic(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mp_srcbus; + + if (test_bit(lbus, mp_bus_not_pci) && + (mp_irqs[i].mp_irqtype == type) && + (mp_irqs[i].mp_srcbusirq == irq)) + break; + } + if (i < mp_irq_entries) { + int apic; + for(apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) + return apic; + } + } + + return -1; +} + +/* + * Find a specific PCI IRQ entry. + * Not an __init, possibly needed by modules + */ +static int pin_2_irq(int idx, int apic, int pin); + +int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) +{ + int apic, i, best_guess = -1; + + apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", + bus, slot, pin); + if (test_bit(bus, mp_bus_not_pci)) { + apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); + return -1; + } + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mp_srcbus; + + for (apic = 0; apic < nr_ioapics; apic++) + if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || + mp_irqs[i].mp_dstapic == MP_APIC_ALL) + break; + + if (!test_bit(lbus, mp_bus_not_pci) && + !mp_irqs[i].mp_irqtype && + (bus == lbus) && + (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { + int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); + + if (!(apic || IO_APIC_IRQ(irq))) + continue; + + if (pin == (mp_irqs[i].mp_srcbusirq & 3)) + return irq; + /* + * Use the first all-but-pin matching entry as a + * best-guess fuzzy result for broken mptables. + */ + if (best_guess < 0) + best_guess = irq; + } + } + return best_guess; +} + +EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); + +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) +/* + * EISA Edge/Level control register, ELCR + */ +static int EISA_ELCR(unsigned int irq) +{ + if (irq < 16) { + unsigned int port = 0x4d0 + (irq >> 3); + return (inb(port) >> (irq & 7)) & 1; + } + apic_printk(APIC_VERBOSE, KERN_INFO + "Broken MPtable reports ISA irq %d\n", irq); + return 0; +} + +#endif + +/* ISA interrupts are always polarity zero edge triggered, + * when listed as conforming in the MP table. */ + +#define default_ISA_trigger(idx) (0) +#define default_ISA_polarity(idx) (0) + +/* EISA interrupts are always polarity zero and can be edge or level + * trigger depending on the ELCR value. If an interrupt is listed as + * EISA conforming in the MP table, that means its trigger type must + * be read in from the ELCR */ + +#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) +#define default_EISA_polarity(idx) default_ISA_polarity(idx) + +/* PCI interrupts are always polarity one level triggered, + * when listed as conforming in the MP table. */ + +#define default_PCI_trigger(idx) (1) +#define default_PCI_polarity(idx) (1) + +/* MCA interrupts are always polarity zero level triggered, + * when listed as conforming in the MP table. */ + +#define default_MCA_trigger(idx) (1) +#define default_MCA_polarity(idx) default_ISA_polarity(idx) + +static int MPBIOS_polarity(int idx) +{ + int bus = mp_irqs[idx].mp_srcbus; + int polarity; + + /* + * Determine IRQ line polarity (high active or low active): + */ + switch (mp_irqs[idx].mp_irqflag & 3) + { + case 0: /* conforms, ie. bus-type dependent polarity */ + if (test_bit(bus, mp_bus_not_pci)) + polarity = default_ISA_polarity(idx); + else + polarity = default_PCI_polarity(idx); + break; + case 1: /* high active */ + { + polarity = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + case 3: /* low active */ + { + polarity = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + polarity = 1; + break; + } + } + return polarity; +} + +static int MPBIOS_trigger(int idx) +{ + int bus = mp_irqs[idx].mp_srcbus; + int trigger; + + /* + * Determine IRQ trigger mode (edge or level sensitive): + */ + switch ((mp_irqs[idx].mp_irqflag>>2) & 3) + { + case 0: /* conforms, ie. bus-type dependent */ + if (test_bit(bus, mp_bus_not_pci)) + trigger = default_ISA_trigger(idx); + else + trigger = default_PCI_trigger(idx); +#if defined(CONFIG_EISA) || defined(CONFIG_MCA) + switch (mp_bus_id_to_type[bus]) { + case MP_BUS_ISA: /* ISA pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_EISA: /* EISA pin */ + { + trigger = default_EISA_trigger(idx); + break; + } + case MP_BUS_PCI: /* PCI pin */ + { + /* set before the switch */ + break; + } + case MP_BUS_MCA: /* MCA pin */ + { + trigger = default_MCA_trigger(idx); + break; + } + default: + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + } +#endif + break; + case 1: /* edge */ + { + trigger = 0; + break; + } + case 2: /* reserved */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 1; + break; + } + case 3: /* level */ + { + trigger = 1; + break; + } + default: /* invalid */ + { + printk(KERN_WARNING "broken BIOS!!\n"); + trigger = 0; + break; + } + } + return trigger; +} + +static inline int irq_polarity(int idx) +{ + return MPBIOS_polarity(idx); +} + +static inline int irq_trigger(int idx) +{ + return MPBIOS_trigger(idx); +} + +int (*ioapic_renumber_irq)(int ioapic, int irq); +static int pin_2_irq(int idx, int apic, int pin) +{ + int irq, i; + int bus = mp_irqs[idx].mp_srcbus; + + /* + * Debugging check, we are in big trouble if this message pops up! + */ + if (mp_irqs[idx].mp_dstirq != pin) + printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); + + if (test_bit(bus, mp_bus_not_pci)) { + irq = mp_irqs[idx].mp_srcbusirq; + } else { + /* + * PCI IRQs are mapped in order + */ + i = irq = 0; + while (i < apic) + irq += nr_ioapic_registers[i++]; + irq += pin; + /* + * For MPS mode, so far only needed by ES7000 platform + */ + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); + } + +#ifdef CONFIG_X86_32 + /* + * PCI IRQ command line redirection. Yes, limits are hardcoded. + */ + if ((pin >= 16) && (pin <= 23)) { + if (pirq_entries[pin-16] != -1) { + if (!pirq_entries[pin-16]) { + apic_printk(APIC_VERBOSE, KERN_DEBUG + "disabling PIRQ%d\n", pin-16); + } else { + irq = pirq_entries[pin-16]; + apic_printk(APIC_VERBOSE, KERN_DEBUG + "using PIRQ%d -> IRQ %d\n", + pin-16, irq); + } + } + } +#endif + + return irq; +} + +void lock_vector_lock(void) +{ + /* Used to the online set of cpus does not change + * during assign_irq_vector. + */ + spin_lock(&vector_lock); +} + +void unlock_vector_lock(void) +{ + spin_unlock(&vector_lock); +} + +static int __assign_irq_vector(int irq, cpumask_t mask) +{ + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + unsigned int old_vector; + int cpu; + struct irq_cfg *cfg; + + cfg = irq_cfg(irq); + + /* Only try and allocate irqs on cpus that are present */ + cpus_and(mask, mask, cpu_online_map); + + if ((cfg->move_in_progress) || cfg->move_cleanup_count) + return -EBUSY; + + old_vector = cfg->vector; + if (old_vector) { + cpumask_t tmp; + cpus_and(tmp, cfg->domain, mask); + if (!cpus_empty(tmp)) + return 0; + } + + for_each_cpu_mask_nr(cpu, mask) { + cpumask_t domain, new_mask; + int new_cpu; + int vector, offset; + + domain = vector_allocation_domain(cpu); + cpus_and(new_mask, domain, cpu_online_map); + + vector = current_vector; + offset = current_offset; +next: + vector += 8; + if (vector >= first_system_vector) { + /* If we run out of vectors on large boxen, must share them. */ + offset = (offset + 1) % 8; + vector = FIRST_DEVICE_VECTOR + offset; + } + if (unlikely(current_vector == vector)) + continue; +#ifdef CONFIG_X86_64 + if (vector == IA32_SYSCALL_VECTOR) + goto next; +#else + if (vector == SYSCALL_VECTOR) + goto next; +#endif + for_each_cpu_mask_nr(new_cpu, new_mask) + if (per_cpu(vector_irq, new_cpu)[vector] != -1) + goto next; + /* Found one! */ + current_vector = vector; + current_offset = offset; + if (old_vector) { + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + } + for_each_cpu_mask_nr(new_cpu, new_mask) + per_cpu(vector_irq, new_cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + return 0; + } + return -ENOSPC; +} + +static int assign_irq_vector(int irq, cpumask_t mask) +{ + int err; + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + err = __assign_irq_vector(irq, mask); + spin_unlock_irqrestore(&vector_lock, flags); + return err; +} + +static void __clear_irq_vector(int irq) +{ + struct irq_cfg *cfg; + cpumask_t mask; + int cpu, vector; + + cfg = irq_cfg(irq); + BUG_ON(!cfg->vector); + + vector = cfg->vector; + cpus_and(mask, cfg->domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) + per_cpu(vector_irq, cpu)[vector] = -1; + + cfg->vector = 0; + cpus_clear(cfg->domain); +} + +void __setup_vector_irq(int cpu) +{ + /* Initialize vector_irq on a new cpu */ + /* This function must be called with vector_lock held */ + int irq, vector; + struct irq_cfg *cfg; + + /* Mark the inuse vectors */ + for_each_irq_cfg(cfg) { + if (!cpu_isset(cpu, cfg->domain)) + continue; + vector = cfg->vector; + irq = cfg->irq; + per_cpu(vector_irq, cpu)[vector] = irq; + } + /* Mark the free vectors */ + for (vector = 0; vector < NR_VECTORS; ++vector) { + irq = per_cpu(vector_irq, cpu)[vector]; + if (irq < 0) + continue; + + cfg = irq_cfg(irq); + if (!cpu_isset(cpu, cfg->domain)) + per_cpu(vector_irq, cpu)[vector] = -1; + } +} + +static struct irq_chip ioapic_chip; +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip; +#endif + +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + +#ifdef CONFIG_X86_32 +static inline int IO_APIC_irq_trigger(int irq) +{ + int apic, idx, pin; + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* + * nonexistent IRQs are edge default + */ + return 0; +} +#else +static inline int IO_APIC_irq_trigger(int irq) +{ + return 1; +} +#endif + +static void ioapic_register_intr(int irq, unsigned long trigger) +{ + struct irq_desc *desc; + + /* first time to use this irq_desc */ + if (irq < 16) + desc = irq_to_desc(irq); + else + desc = irq_to_desc_alloc(irq); + + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + desc->status |= IRQ_LEVEL; + else + desc->status &= ~IRQ_LEVEL; + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + desc->status |= IRQ_MOVE_PCNTXT; + if (trigger) + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, + handle_edge_irq, "edge"); + return; + } +#endif + if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || + trigger == IOAPIC_LEVEL) + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_fasteoi_irq, + "fasteoi"); + else + set_irq_chip_and_handler_name(irq, &ioapic_chip, + handle_edge_irq, "edge"); +} + +static int setup_ioapic_entry(int apic, int irq, + struct IO_APIC_route_entry *entry, + unsigned int destination, int trigger, + int polarity, int vector) +{ + /* + * add it to the IO-APIC irq-routing table: + */ + memset(entry,0,sizeof(*entry)); + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) { + struct intel_iommu *iommu = map_ioapic_to_ir(apic); + struct irte irte; + struct IR_IO_APIC_route_entry *ir_entry = + (struct IR_IO_APIC_route_entry *) entry; + int index; + + if (!iommu) + panic("No mapping iommu for ioapic %d\n", apic); + + index = alloc_irte(iommu, irq, 1); + if (index < 0) + panic("Failed to allocate IRTE for ioapic %d\n", apic); + + memset(&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = trigger; + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = vector; + irte.dest_id = IRTE_DEST(destination); + + modify_irte(irq, &irte); + + ir_entry->index2 = (index >> 15) & 0x1; + ir_entry->zero = 0; + ir_entry->format = 1; + ir_entry->index = (index & 0x7fff); + } else +#endif + { + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->dest = destination; + } + + entry->mask = 0; /* enable IRQ */ + entry->trigger = trigger; + entry->polarity = polarity; + entry->vector = vector; + + /* Mask level triggered irqs. + * Use IRQ_DELAYED_DISABLE for edge triggered irqs. + */ + if (trigger) + entry->mask = 1; + return 0; +} + +static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, + int trigger, int polarity) +{ + struct irq_cfg *cfg; + struct IO_APIC_route_entry entry; + cpumask_t mask; + + if (!IO_APIC_IRQ(irq)) + return; + + cfg = irq_cfg(irq); + + mask = TARGET_CPUS; + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(mask, cfg->domain, mask); + + apic_printk(APIC_VERBOSE,KERN_DEBUG + "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " + "IRQ %d Mode:%i Active:%i)\n", + apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, + irq, trigger, polarity); + + + if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, + cpu_mask_to_apicid(mask), trigger, polarity, + cfg->vector)) { + printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", + mp_ioapics[apic].mp_apicid, pin); + __clear_irq_vector(irq); + return; + } + + ioapic_register_intr(irq, trigger); + if (irq < 16) + disable_8259A_irq(irq); + + ioapic_write_entry(apic, pin, entry); +} + +static void __init setup_IO_APIC_irqs(void) +{ + int apic, pin, idx, irq, first_notcon = 1; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + idx = find_irq_entry(apic,pin,mp_INT); + if (idx == -1) { + if (first_notcon) { + apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); + first_notcon = 0; + } else + apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); + continue; + } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + + irq = pin_2_irq(idx, apic, pin); +#ifdef CONFIG_X86_32 + if (multi_timer_check(apic, irq)) + continue; +#endif + add_pin_to_irq(irq, apic, pin); + + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); + } + } + + if (!first_notcon) + apic_printk(APIC_VERBOSE, " not connected.\n"); +} + +/* + * Set up the timer pin, possibly with the 8259A-master behind. + */ +static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, + int vector) +{ + struct IO_APIC_route_entry entry; + +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + return; +#endif + + memset(&entry, 0, sizeof(entry)); + + /* + * We use logical delivery to get the timer IRQ + * to the first CPU. + */ + entry.dest_mode = INT_DEST_MODE; + entry.mask = 1; /* mask IRQ now */ + entry.dest = cpu_mask_to_apicid(TARGET_CPUS); + entry.delivery_mode = INT_DELIVERY_MODE; + entry.polarity = 0; + entry.trigger = 0; + entry.vector = vector; + + /* + * The timer IRQ doesn't have to know that behind the + * scene we may have a 8259A-master in AEOI mode ... + */ + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + + /* + * Add it to the IO-APIC irq-routing table: + */ + ioapic_write_entry(apic, pin, entry); +} + + +__apicdebuginit(void) print_IO_APIC(void) +{ + int apic, i; + union IO_APIC_reg_00 reg_00; + union IO_APIC_reg_01 reg_01; + union IO_APIC_reg_02 reg_02; + union IO_APIC_reg_03 reg_03; + unsigned long flags; + struct irq_cfg *cfg; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); + for (i = 0; i < nr_ioapics; i++) + printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", + mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); + + /* + * We are a bit conservative about what we expect. We have to + * know about every hardware change ASAP. + */ + printk(KERN_INFO "testing the IO APIC.......................\n"); + + for (apic = 0; apic < nr_ioapics; apic++) { + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + reg_01.raw = io_apic_read(apic, 1); + if (reg_01.bits.version >= 0x10) + reg_02.raw = io_apic_read(apic, 2); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); + spin_unlock_irqrestore(&ioapic_lock, flags); + + printk("\n"); + printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); + printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); + printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); + printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); + printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); + + printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); + printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); + + printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); + printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); + + /* + * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, + * but the value of reg_02 is read as the previous read register + * value, so ignore it if reg_02 == reg_01. + */ + if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); + printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); + } + + /* + * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 + * or reg_03, but the value of reg_0[23] is read as the previous read + * register value, so ignore it if reg_03 == reg_0[12]. + */ + if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && + reg_03.raw != reg_01.raw) { + printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); + printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); + } + + printk(KERN_DEBUG ".... IRQ redirection table:\n"); + + printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" + " Stat Dmod Deli Vect: \n"); + + for (i = 0; i <= reg_01.bits.entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + printk(KERN_DEBUG " %02x %03X ", + i, + entry.dest + ); + + printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector + ); + } + } + printk(KERN_DEBUG "IRQ to pin mappings:\n"); + for_each_irq_cfg(cfg) { + struct irq_pin_list *entry = cfg->irq_2_pin; + if (!entry) + continue; + printk(KERN_DEBUG "IRQ%d ", cfg->irq); + for (;;) { + printk("-> %d:%d", entry->apic, entry->pin); + if (!entry->next) + break; + entry = entry->next; + } + printk("\n"); + } + + printk(KERN_INFO ".................................... done.\n"); + + return; +} + +__apicdebuginit(void) print_APIC_bitfield(int base) +{ + unsigned int v; + int i, j; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); + for (i = 0; i < 8; i++) { + v = apic_read(base + i*0x10); + for (j = 0; j < 32; j++) { + if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + + v = apic_read(APIC_ESR); + printk(KERN_DEBUG "... APIC ESR: %08x\n", v); + } + + icr = apic_icr_read(); + printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + + v = apic_read(APIC_LVTT); + printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); + + if (maxlvt > 3) { /* PC is LVT#4. */ + v = apic_read(APIC_LVTPC); + printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); + } + v = apic_read(APIC_LVT0); + printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); + v = apic_read(APIC_LVT1); + printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); + + if (maxlvt > 2) { /* ERR is LVT#3. */ + v = apic_read(APIC_LVTERR); + printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); + } + + v = apic_read(APIC_TMICT); + printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); + v = apic_read(APIC_TMCCT); + printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); + v = apic_read(APIC_TDCR); + printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); + printk("\n"); +} + +__apicdebuginit(void) print_all_local_APICs(void) +{ + on_each_cpu(print_local_APIC, NULL, 1); +} + +__apicdebuginit(void) print_PIC(void) +{ + unsigned int v; + unsigned long flags; + + if (apic_verbosity == APIC_QUIET) + return; + + printk(KERN_DEBUG "\nprinting PIC contents\n"); + + spin_lock_irqsave(&i8259A_lock, flags); + + v = inb(0xa1) << 8 | inb(0x21); + printk(KERN_DEBUG "... PIC IMR: %04x\n", v); + + v = inb(0xa0) << 8 | inb(0x20); + printk(KERN_DEBUG "... PIC IRR: %04x\n", v); + + outb(0x0b,0xa0); + outb(0x0b,0x20); + v = inb(0xa0) << 8 | inb(0x20); + outb(0x0a,0xa0); + outb(0x0a,0x20); + + spin_unlock_irqrestore(&i8259A_lock, flags); + + printk(KERN_DEBUG "... PIC ISR: %04x\n", v); + + v = inb(0x4d1) << 8 | inb(0x4d0); + printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); +} + +__apicdebuginit(int) print_all_ICs(void) +{ + print_PIC(); + print_all_local_APICs(); + print_IO_APIC(); + + return 0; +} + +fs_initcall(print_all_ICs); + + +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + +void __init enable_IO_APIC(void) +{ + union IO_APIC_reg_01 reg_01; + int i8259_apic, i8259_pin; + int apic; + unsigned long flags; + +#ifdef CONFIG_X86_32 + int i; + if (!pirqs_enabled) + for (i = 0; i < MAX_PIRQS; i++) + pirq_entries[i] = -1; +#endif + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + for (apic = 0; apic < nr_ioapics; apic++) { + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(apic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; + /* See if any of the pins is in ExtINT mode */ + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + struct IO_APIC_route_entry entry; + entry = ioapic_read_entry(apic, pin); + + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; + } + } + } + found_i8259: + /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ + i8259_pin = find_isa_irq_pin(0, mp_ExtINT); + i8259_apic = find_isa_irq_apic(0, mp_ExtINT); + /* Trust the MP table if nothing is setup in the hardware */ + if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { + printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); + ioapic_i8259.pin = i8259_pin; + ioapic_i8259.apic = i8259_apic; + } + /* Complain if the MP table and the hardware disagree */ + if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && + (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) + { + printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); + } + + /* + * Do not trust the IO-APIC being empty at bootup + */ + clear_IO_APIC(); +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ + /* + * Clear the IO-APIC before rebooting: + */ + clear_IO_APIC(); + + /* + * If the i8259 is routed through an IOAPIC + * Put that IOAPIC in virtual wire mode + * so legacy interrupts can be delivered. + */ + if (ioapic_i8259.pin != -1) { + struct IO_APIC_route_entry entry; + + memset(&entry, 0, sizeof(entry)); + entry.mask = 0; /* Enabled */ + entry.trigger = 0; /* Edge */ + entry.irr = 0; + entry.polarity = 0; /* High */ + entry.delivery_status = 0; + entry.dest_mode = 0; /* Physical */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ + entry.vector = 0; + entry.dest = read_apic_id(); + + /* + * Add it to the IO-APIC irq-routing table: + */ + ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); + } + + disconnect_bsp_APIC(ioapic_i8259.pin != -1); +} + +#ifdef CONFIG_X86_32 +/* + * function to set the IO-APIC physical IDs based on the + * values stored in the MPC table. + * + * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 + */ + +static void __init setup_ioapic_ids_from_mpc(void) +{ + union IO_APIC_reg_00 reg_00; + physid_mask_t phys_id_present_map; + int apic; + int i; + unsigned char old_id; + unsigned long flags; + + if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) + return; + + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + /* + * This is broken; anything with a real cpu count has to + * circumvent this idiocy regardless. + */ + phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); + + /* + * Set the IOAPIC ID to the value stored in the MPC table. + */ + for (apic = 0; apic < nr_ioapics; apic++) { + + /* Read the register 0 value */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + old_id = mp_ioapics[apic].mp_apicid; + + if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", + apic, mp_ioapics[apic].mp_apicid); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + reg_00.bits.ID); + mp_ioapics[apic].mp_apicid = reg_00.bits.ID; + } + + /* + * Sanity check, is the ID really free? Every APIC in a + * system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(phys_id_present_map, + mp_ioapics[apic].mp_apicid)) { + printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", + apic, mp_ioapics[apic].mp_apicid); + for (i = 0; i < get_physical_broadcast(); i++) + if (!physid_isset(i, phys_id_present_map)) + break; + if (i >= get_physical_broadcast()) + panic("Max APIC ID exceeded!\n"); + printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", + i); + physid_set(i, phys_id_present_map); + mp_ioapics[apic].mp_apicid = i; + } else { + physid_mask_t tmp; + tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); + apic_printk(APIC_VERBOSE, "Setting %d in the " + "phys_id_present_map\n", + mp_ioapics[apic].mp_apicid); + physids_or(phys_id_present_map, phys_id_present_map, tmp); + } + + + /* + * We need to adjust the IRQ routing table + * if the ID changed. + */ + if (old_id != mp_ioapics[apic].mp_apicid) + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_dstapic == old_id) + mp_irqs[i].mp_dstapic + = mp_ioapics[apic].mp_apicid; + + /* + * Read the right value from the MPC table and + * write it into the ID register. + */ + apic_printk(APIC_VERBOSE, KERN_INFO + "...changing IO-APIC physical APIC ID to %d ...", + mp_ioapics[apic].mp_apicid); + + reg_00.bits.ID = mp_ioapics[apic].mp_apicid; + spin_lock_irqsave(&ioapic_lock, flags); + + /* + * Sanity check + */ + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(apic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) + printk("could not set ID!\n"); + else + apic_printk(APIC_VERBOSE, " ok.\n"); + } +} +#endif + +int no_timer_check __initdata; + +static int __init notimercheck(char *s) +{ + no_timer_check = 1; + return 1; +} +__setup("no_timer_check", notimercheck); + +/* + * There is a nasty bug in some older SMP boards, their mptable lies + * about the timer IRQ. We do the following to work around the situation: + * + * - timer IRQ defaults to IO-APIC IRQ + * - if this function detects that timer IRQs are defunct, then we fall + * back to ISA timer IRQs + */ +static int __init timer_irq_works(void) +{ + unsigned long t1 = jiffies; + unsigned long flags; + + if (no_timer_check) + return 1; + + local_save_flags(flags); + local_irq_enable(); + /* Let ten ticks pass... */ + mdelay((10 * 1000) / HZ); + local_irq_restore(flags); + + /* + * Expect a few ticks at least, to be sure some possible + * glue logic does not lock up after one or two first + * ticks in a non-ExtINT mode. Also the local APIC + * might have cached one ExtINT interrupt. Finally, at + * least one tick may be lost due to delays. + */ + + /* jiffies wrap? */ + if (time_after(jiffies, t1 + 4)) + return 1; + return 0; +} + +/* + * In the SMP+IOAPIC case it might happen that there are an unspecified + * number of pending IRQ events unhandled. These cases are very rare, + * so we 'resend' these IRQs via IPIs, to the same CPU. It's much + * better to do it this way as thus we do not have to be aware of + * 'pending' interrupts in the IRQ path, except at this point. + */ +/* + * Edge triggered needs to resend any interrupt + * that was delayed but this is now handled in the device + * independent code. + */ + +/* + * Starting up a edge-triggered IO-APIC interrupt is + * nasty - we need to make sure that we get the edge. + * If it is already asserted for some reason, we need + * return 1 to indicate that is was pending. + * + * This is not complete - we should be able to fake + * an edge even if it isn't on the 8259A... + */ + +static unsigned int startup_ioapic_irq(unsigned int irq) +{ + int was_pending = 0; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + if (irq < 16) { + disable_8259A_irq(irq); + if (i8259A_irq_pending(irq)) + was_pending = 1; + } + __unmask_IO_APIC_irq(irq); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return was_pending; +} + +#ifdef CONFIG_X86_64 +static int ioapic_retrigger_irq(unsigned int irq) +{ + + struct irq_cfg *cfg = irq_cfg(irq); + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); + spin_unlock_irqrestore(&vector_lock, flags); + + return 1; +} +#else +static int ioapic_retrigger_irq(unsigned int irq) +{ + send_IPI_self(irq_cfg(irq)->vector); + + return 1; +} +#endif + +/* + * Level and edge triggered IO-APIC interrupts need different handling, + * so we use two separate IRQ descriptors. Edge triggered IRQs can be + * handled with the level-triggered descriptor, but that one has slightly + * more overhead. Level-triggered interrupts cannot be handled with the + * edge-triggered handler, without risking IRQ storms and other ugly + * races. + */ + +#ifdef CONFIG_SMP + +#ifdef CONFIG_INTR_REMAP +static void ir_irq_migration(struct work_struct *work); + +static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); + +/* + * Migrate the IO-APIC irq in the presence of intr-remapping. + * + * For edge triggered, irq migration is a simple atomic update(of vector + * and cpu destination) of IRTE and flush the hardware cache. + * + * For level triggered, we need to modify the io-apic RTE aswell with the update + * vector information, along with modifying IRTE with vector and destination. + * So irq migration for level triggered is little bit more complex compared to + * edge triggered migration. But the good news is, we use the same algorithm + * for level triggered migration as we have today, only difference being, + * we now initiate the irq migration from process context instead of the + * interrupt context. + * + * In future, when we do a directed EOI (combined with cpu EOI broadcast + * suppression) to the IO-APIC, level triggered irq migration will also be + * as simple as edge triggered migration and we can do the irq migration + * with a simple atomic update to IO-APIC RTE. + */ +static void migrate_ioapic_irq(int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + cpumask_t tmp, cleanup_mask; + struct irte irte; + int modify_ioapic_rte; + unsigned int dest; + unsigned long flags; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + desc = irq_to_desc(irq); + modify_ioapic_rte = desc->status & IRQ_LEVEL; + if (modify_ioapic_rte) { + spin_lock_irqsave(&ioapic_lock, flags); + __target_IO_APIC_irq(irq, dest, cfg->vector); + spin_unlock_irqrestore(&ioapic_lock, flags); + } + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * Modified the IRTE and flushes the Interrupt entry cache. + */ + modify_irte(irq, &irte); + + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc->affinity = mask; +} + +static int migrate_irq_remapped_level(int irq) +{ + int ret = -1; + struct irq_desc *desc = irq_to_desc(irq); + + mask_IO_APIC_irq(irq); + + if (io_apic_level_ack_pending(irq)) { + /* + * Interrupt in progress. Migrating irq now will change the + * vector information in the IO-APIC RTE and that will confuse + * the EOI broadcast performed by cpu. + * So, delay the irq migration to the next instance. + */ + schedule_delayed_work(&ir_migration_work, 1); + goto unmask; + } + + /* everthing is clear. we have right of way */ + migrate_ioapic_irq(irq, desc->pending_mask); + + ret = 0; + desc->status &= ~IRQ_MOVE_PENDING; + cpus_clear(desc->pending_mask); + +unmask: + unmask_IO_APIC_irq(irq); + return ret; +} + +static void ir_irq_migration(struct work_struct *work) +{ + unsigned int irq; + struct irq_desc *desc; + + for_each_irq_desc(irq, desc) { + if (desc->status & IRQ_MOVE_PENDING) { + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + if (!desc->chip->set_affinity || + !(desc->status & IRQ_MOVE_PENDING)) { + desc->status &= ~IRQ_MOVE_PENDING; + spin_unlock_irqrestore(&desc->lock, flags); + continue; + } + + desc->chip->set_affinity(irq, desc->pending_mask); + spin_unlock_irqrestore(&desc->lock, flags); + } + } +} + +/* + * Migrates the IRQ destination in the process context. + */ +static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->status & IRQ_LEVEL) { + desc->status |= IRQ_MOVE_PENDING; + desc->pending_mask = mask; + migrate_irq_remapped_level(irq); + return; + } + + migrate_ioapic_irq(irq, mask); +} +#endif + +asmlinkage void smp_irq_move_cleanup_interrupt(void) +{ + unsigned vector, me; + ack_APIC_irq(); +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + + me = smp_processor_id(); + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { + unsigned int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + + desc = irq_to_desc(irq); + if (!desc) + continue; + + cfg = irq_cfg(irq); + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) + goto unlock; + + __get_cpu_var(vector_irq)[vector] = -1; + cfg->move_cleanup_count--; +unlock: + spin_unlock(&desc->lock); + } + + irq_exit(); +} + +static void irq_complete_move(unsigned int irq) +{ + struct irq_cfg *cfg = irq_cfg(irq); + unsigned vector, me; + + if (likely(!cfg->move_in_progress)) + return; + + vector = ~get_irq_regs()->orig_ax; + me = smp_processor_id(); + if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { + cpumask_t cleanup_mask; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } +} +#else +static inline void irq_complete_move(unsigned int irq) {} +#endif +#ifdef CONFIG_INTR_REMAP +static void ack_x2apic_level(unsigned int irq) +{ + ack_x2APIC_irq(); +} + +static void ack_x2apic_edge(unsigned int irq) +{ + ack_x2APIC_irq(); +} +#endif + +static void ack_apic_edge(unsigned int irq) +{ + irq_complete_move(irq); + move_native_irq(irq); + ack_APIC_irq(); +} + +#ifdef CONFIG_X86_64 +static void ack_apic_level(unsigned int irq) +{ + int do_unmask_irq = 0; + + irq_complete_move(irq); +#ifdef CONFIG_GENERIC_PENDING_IRQ + /* If we are moving the irq we need to mask it */ + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + do_unmask_irq = 1; + mask_IO_APIC_irq(irq); + } +#endif + + /* + * We must acknowledge the irq before we move it or the acknowledge will + * not propagate properly. + */ + ack_APIC_irq(); + + /* Now we can move and renable the irq */ + if (unlikely(do_unmask_irq)) { + /* Only migrate the irq if the ack has been received. + * + * On rare occasions the broadcast level triggered ack gets + * delayed going to ioapics, and if we reprogram the + * vector while Remote IRR is still set the irq will never + * fire again. + * + * To prevent this scenario we read the Remote IRR bit + * of the ioapic. This has two effects. + * - On any sane system the read of the ioapic will + * flush writes (and acks) going to the ioapic from + * this cpu. + * - We get to see if the ACK has actually been delivered. + * + * Based on failed experiments of reprogramming the + * ioapic entry from outside of irq context starting + * with masking the ioapic entry and then polling until + * Remote IRR was clear before reprogramming the + * ioapic I don't trust the Remote IRR bit to be + * completey accurate. + * + * However there appears to be no other way to plug + * this race, so if the Remote IRR bit is not + * accurate and is causing problems then it is a hardware bug + * and you can go talk to the chipset vendor about it. + */ + if (!io_apic_level_ack_pending(irq)) + move_masked_irq(irq); + unmask_IO_APIC_irq(irq); + } +} +#else +atomic_t irq_mis_count; +static void ack_apic_level(unsigned int irq) +{ + unsigned long v; + int i; + + irq_complete_move(irq); + move_native_irq(irq); + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); + + ack_APIC_irq(); + + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(irq); + __unmask_and_level_IO_APIC_irq(irq); + spin_unlock(&ioapic_lock); + } +} +#endif + +static struct irq_chip ioapic_chip __read_mostly = { + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +#ifdef CONFIG_INTR_REMAP +static struct irq_chip ir_ioapic_chip __read_mostly = { + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, +#ifdef CONFIG_SMP + .set_affinity = set_ir_ioapic_affinity_irq, +#endif + .retrigger = ioapic_retrigger_irq, +}; +#endif + +static inline void init_IO_APIC_traps(void) +{ + int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * NOTE! The local APIC isn't very good at handling + * multiple interrupts at the same interrupt level. + * As the interrupt level is determined by taking the + * vector number and shifting that right by 4, we + * want to spread these out a bit so that they don't + * all fall in the same interrupt level. + * + * Also, we've got to be careful not to trash gate + * 0x80, because int 0x80 is hm, kind of importantish. ;) + */ + for_each_irq_cfg(cfg) { + irq = cfg->irq; + if (IO_APIC_IRQ(irq) && !cfg->vector) { + /* + * Hmm.. We don't have an entry for this, + * so default to an old-fashioned 8259 + * interrupt if we can.. + */ + if (irq < 16) + make_8259A_irq(irq); + else { + desc = irq_to_desc(irq); + /* Strange. Oh, well.. */ + desc->chip = &no_irq_chip; + } + } + } +} + +/* + * The local APIC irq-chip implementation: + */ + +static void mask_lapic_irq(unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); +} + +static void unmask_lapic_irq(unsigned int irq) +{ + unsigned long v; + + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); +} + +static void ack_lapic_irq (unsigned int irq) +{ + ack_APIC_irq(); +} + +static struct irq_chip lapic_chip __read_mostly = { + .name = "local-APIC", + .mask = mask_lapic_irq, + .unmask = unmask_lapic_irq, + .ack = ack_lapic_irq, +}; + +static void lapic_register_intr(int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + desc->status &= ~IRQ_LEVEL; + set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, + "edge"); +} + +static void __init setup_nmi(void) +{ + /* + * Dirty trick to enable the NMI watchdog ... + * We put the 8259A master into AEOI mode and + * unmask on all local APICs LVT0 as NMI. + * + * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') + * is from Maciej W. Rozycki - so we do not have to EOI from + * the NMI handler or the timer interrupt. + */ + apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); + + enable_NMI_through_LVT0(); + + apic_printk(APIC_VERBOSE, " done.\n"); +} + +/* + * This looks a bit hackish but it's about the only one way of sending + * a few INTA cycles to 8259As and any associated glue logic. ICR does + * not support the ExtINT mode, unfortunately. We need to send these + * cycles as some i82489DX-based boards have glue logic that keeps the + * 8259A interrupt line asserted until INTA. --macro + */ +static inline void __init unlock_ExtINT_logic(void) +{ + int apic, pin, i; + struct IO_APIC_route_entry entry0, entry1; + unsigned char save_control, save_freq_select; + + pin = find_isa_irq_pin(8, mp_INT); + if (pin == -1) { + WARN_ON_ONCE(1); + return; + } + apic = find_isa_irq_apic(8, mp_INT); + if (apic == -1) { + WARN_ON_ONCE(1); + return; + } + + entry0 = ioapic_read_entry(apic, pin); + clear_IO_APIC_pin(apic, pin); + + memset(&entry1, 0, sizeof(entry1)); + + entry1.dest_mode = 0; /* physical delivery */ + entry1.mask = 0; /* unmask IRQ now */ + entry1.dest = hard_smp_processor_id(); + entry1.delivery_mode = dest_ExtINT; + entry1.polarity = entry0.polarity; + entry1.trigger = 0; + entry1.vector = 0; + + ioapic_write_entry(apic, pin, entry1); + + save_control = CMOS_READ(RTC_CONTROL); + save_freq_select = CMOS_READ(RTC_FREQ_SELECT); + CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, + RTC_FREQ_SELECT); + CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); + + i = 100; + while (i-- > 0) { + mdelay(10); + if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) + i -= 10; + } + + CMOS_WRITE(save_control, RTC_CONTROL); + CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); + clear_IO_APIC_pin(apic, pin); + + ioapic_write_entry(apic, pin, entry0); +} + +static int disable_timer_pin_1 __initdata; +/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ +static int __init disable_timer_pin_setup(char *arg) +{ + disable_timer_pin_1 = 1; + return 0; +} +early_param("disable_timer_pin_1", disable_timer_pin_setup); + +int timer_through_8259 __initdata; + +/* + * This code may look a bit paranoid, but it's supposed to cooperate with + * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ + * is so screwy. Thanks to Brian Perkins for testing/hacking this beast + * fanatically on his truly buggy board. + * + * FIXME: really need to revamp this for all platforms. + */ +static inline void __init check_timer(void) +{ + struct irq_cfg *cfg = irq_cfg(0); + int apic1, pin1, apic2, pin2; + unsigned long flags; + unsigned int ver; + int no_pin1 = 0; + + local_irq_save(flags); + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); + + /* + * get/set the timer IRQ vector: + */ + disable_8259A_irq(0); + assign_irq_vector(0, TARGET_CPUS); + + /* + * As IRQ0 is to be enabled in the 8259A, the virtual + * wire has to be disabled in the local APIC. Also + * timer interrupts need to be acknowledged manually in + * the 8259A for the i82489DX when using the NMI + * watchdog as that APIC treats NMIs as level-triggered. + * The AEOI mode will finish them in the 8259A + * automatically. + */ + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); + init_8259A(1); +#ifdef CONFIG_X86_32 + timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); +#endif + + pin1 = find_isa_irq_pin(0, mp_INT); + apic1 = find_isa_irq_apic(0, mp_INT); + pin2 = ioapic_i8259.pin; + apic2 = ioapic_i8259.apic; + + apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " + "apic1=%d pin1=%d apic2=%d pin2=%d\n", + cfg->vector, apic1, pin1, apic2, pin2); + + /* + * Some BIOS writers are clueless and report the ExtINTA + * I/O APIC input from the cascaded 8259A as the timer + * interrupt input. So just in case, if only one pin + * was found above, try it both directly and through the + * 8259A. + */ + if (pin1 == -1) { +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("BIOS bug: timer not connected to IO-APIC"); +#endif + pin1 = pin2; + apic1 = apic2; + no_pin1 = 1; + } else if (pin2 == -1) { + pin2 = pin1; + apic2 = apic1; + } + + if (pin1 != -1) { + /* + * Ok, does IRQ0 through the IOAPIC work? + */ + if (no_pin1) { + add_pin_to_irq(0, apic1, pin1); + setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); + } + unmask_IO_APIC_irq(0); + if (timer_irq_works()) { + if (nmi_watchdog == NMI_IO_APIC) { + setup_nmi(); + enable_8259A_irq(0); + } + if (disable_timer_pin_1 > 0) + clear_IO_APIC_pin(0, pin1); + goto out; + } +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + panic("timer doesn't work through Interrupt-remapped IO-APIC"); +#endif + clear_IO_APIC_pin(apic1, pin1); + if (!no_pin1) + apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " + "8254 timer not connected to IO-APIC\n"); + + apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " + "(IRQ0) through the 8259A ...\n"); + apic_printk(APIC_QUIET, KERN_INFO + "..... (found apic %d pin %d) ...\n", apic2, pin2); + /* + * legacy devices should be connected to IO APIC #0 + */ + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); + setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); + unmask_IO_APIC_irq(0); + enable_8259A_irq(0); + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); + timer_through_8259 = 1; + if (nmi_watchdog == NMI_IO_APIC) { + disable_8259A_irq(0); + setup_nmi(); + enable_8259A_irq(0); + } + goto out; + } + /* + * Cleanup, just in case ... + */ + disable_8259A_irq(0); + clear_IO_APIC_pin(apic2, pin2); + apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); + } + + if (nmi_watchdog == NMI_IO_APIC) { + apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " + "through the IO-APIC - disabling NMI Watchdog!\n"); + nmi_watchdog = NMI_NONE; + } +#ifdef CONFIG_X86_32 + timer_ack = 0; +#endif + + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as Virtual Wire IRQ...\n"); + + lapic_register_intr(0); + apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ + enable_8259A_irq(0); + + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + goto out; + } + disable_8259A_irq(0); + apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); + apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); + + apic_printk(APIC_QUIET, KERN_INFO + "...trying to set up timer as ExtINT IRQ...\n"); + + init_8259A(0); + make_8259A_irq(0); + apic_write(APIC_LVT0, APIC_DM_EXTINT); + + unlock_ExtINT_logic(); + + if (timer_irq_works()) { + apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); + goto out; + } + apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); + panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " + "report. Then try booting with the 'noapic' option.\n"); +out: + local_irq_restore(flags); +} + +/* + * Traditionally ISA IRQ2 is the cascade IRQ, and is not available + * to devices. However there may be an I/O APIC pin available for + * this interrupt regardless. The pin may be left unconnected, but + * typically it will be reused as an ExtINT cascade interrupt for + * the master 8259A. In the MPS case such a pin will normally be + * reported as an ExtINT interrupt in the MP table. With ACPI + * there is no provision for ExtINT interrupts, and in the absence + * of an override it would be treated as an ordinary ISA I/O APIC + * interrupt, that is edge-triggered and unmasked by default. We + * used to do this, but it caused problems on some systems because + * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using + * the same ExtINT cascade interrupt to drive the local APIC of the + * bootstrap processor. Therefore we refrain from routing IRQ2 to + * the I/O APIC in all cases now. No actual device should request + * it anyway. --macro + */ +#define PIC_IRQS (1 << PIC_CASCADE_IR) + +void __init setup_IO_APIC(void) +{ + +#ifdef CONFIG_X86_32 + enable_IO_APIC(); +#else + /* + * calling enable_IO_APIC() is moved to setup_local_APIC for BP + */ +#endif + + io_apic_irqs = ~PIC_IRQS; + + apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); + /* + * Set up IO-APIC IRQ routing. + */ +#ifdef CONFIG_X86_32 + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); +#endif + sync_Arb_IDs(); + setup_IO_APIC_irqs(); + init_IO_APIC_traps(); + check_timer(); +} + +/* + * Called after all the initialization is done. If we didnt find any + * APIC bugs then we can allow the modify fast path + */ + +static int __init io_apic_bug_finalize(void) +{ + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; +} + +late_initcall(io_apic_bug_finalize); + +struct sysfs_ioapic_data { + struct sys_device dev; + struct IO_APIC_route_entry entry[0]; +}; +static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; + +static int ioapic_suspend(struct sys_device *dev, pm_message_t state) +{ + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; + for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) + *entry = ioapic_read_entry(dev->id, i); + + return 0; +} + +static int ioapic_resume(struct sys_device *dev) +{ + struct IO_APIC_route_entry *entry; + struct sysfs_ioapic_data *data; + unsigned long flags; + union IO_APIC_reg_00 reg_00; + int i; + + data = container_of(dev, struct sysfs_ioapic_data, dev); + entry = data->entry; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(dev->id, 0); + if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { + reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; + io_apic_write(dev->id, 0, reg_00.raw); + } + spin_unlock_irqrestore(&ioapic_lock, flags); + for (i = 0; i < nr_ioapic_registers[dev->id]; i++) + ioapic_write_entry(dev->id, i, entry[i]); + + return 0; +} + +static struct sysdev_class ioapic_sysdev_class = { + .name = "ioapic", + .suspend = ioapic_suspend, + .resume = ioapic_resume, +}; + +static int __init ioapic_init_sysfs(void) +{ + struct sys_device * dev; + int i, size, error; + + error = sysdev_class_register(&ioapic_sysdev_class); + if (error) + return error; + + for (i = 0; i < nr_ioapics; i++ ) { + size = sizeof(struct sys_device) + nr_ioapic_registers[i] + * sizeof(struct IO_APIC_route_entry); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); + if (!mp_ioapic_data[i]) { + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); + continue; + } + dev = &mp_ioapic_data[i]->dev; + dev->id = i; + dev->cls = &ioapic_sysdev_class; + error = sysdev_register(dev); + if (error) { + kfree(mp_ioapic_data[i]); + mp_ioapic_data[i] = NULL; + printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); + continue; + } + } + + return 0; +} + +device_initcall(ioapic_init_sysfs); + +/* + * Dynamic irq allocate and deallocation + */ +unsigned int create_irq_nr(unsigned int irq_want) +{ + /* Allocate an unused irq */ + unsigned int irq; + unsigned int new; + unsigned long flags; + struct irq_cfg *cfg_new; + +#ifndef CONFIG_HAVE_SPARSE_IRQ + irq_want = nr_irqs - 1; +#endif + + irq = 0; + spin_lock_irqsave(&vector_lock, flags); + for (new = irq_want; new > 0; new--) { + if (platform_legacy_irq(new)) + continue; + cfg_new = irq_cfg(new); + if (cfg_new && cfg_new->vector != 0) + continue; + /* check if need to create one */ + if (!cfg_new) + cfg_new = irq_cfg_alloc(new); + if (__assign_irq_vector(new, TARGET_CPUS) == 0) + irq = new; + break; + } + spin_unlock_irqrestore(&vector_lock, flags); + + if (irq > 0) { + dynamic_irq_init(irq); + } + return irq; +} + +int create_irq(void) +{ + int irq; + + irq = create_irq_nr(nr_irqs - 1); + + if (irq == 0) + irq = -1; + + return irq; +} + +void destroy_irq(unsigned int irq) +{ + unsigned long flags; + + dynamic_irq_cleanup(irq); + +#ifdef CONFIG_INTR_REMAP + free_irte(irq); +#endif + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); +} + +/* + * MSI message composition + */ +#ifdef CONFIG_PCI_MSI +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + cpumask_t tmp; + + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (err) + return err; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irte irte; + int ir_index; + u16 sub_handle; + + ir_index = map_irq_to_irte_handle(irq, &sub_handle); + BUG_ON(ir_index == -1); + + memset (&irte, 0, sizeof(irte)); + + irte.present = 1; + irte.dst_mode = INT_DEST_MODE; + irte.trigger_mode = 0; /* edge */ + irte.dlvry_mode = INT_DELIVERY_MODE; + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + modify_irte(irq, &irte); + + msg->address_hi = MSI_ADDR_BASE_HI; + msg->data = sub_handle; + msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | + MSI_ADDR_IR_SHV | + MSI_ADDR_IR_INDEX1(ir_index) | + MSI_ADDR_IR_INDEX2(ir_index); + } else +#endif + { + msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_lo = + MSI_ADDR_BASE_LO | + ((INT_DEST_MODE == 0) ? + MSI_ADDR_DEST_MODE_PHYSICAL: + MSI_ADDR_DEST_MODE_LOGICAL) | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_ADDR_REDIRECTION_CPU: + MSI_ADDR_REDIRECTION_LOWPRI) | + MSI_ADDR_DEST_ID(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + MSI_DATA_DELIVERY_FIXED: + MSI_DATA_DELIVERY_LOWPRI) | + MSI_DATA_VECTOR(cfg->vector); + } + return err; +} + +#ifdef CONFIG_SMP +static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + read_msi_msg(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + write_msi_msg(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} + +#ifdef CONFIG_INTR_REMAP +/* + * Migrate the MSI irq to another cpumask. This migration is + * done in the process context using interrupt-remapping hardware. + */ +static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned int dest; + cpumask_t tmp, cleanup_mask; + struct irte irte; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (get_irte(irq, &irte)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + irte.vector = cfg->vector; + irte.dest_id = IRTE_DEST(dest); + + /* + * atomically update the IRTE with the new destination and vector. + */ + modify_irte(irq, &irte); + + /* + * After this point, all the interrupts will start arriving + * at the new destination. So, time to cleanup the previous + * vector allocation. + */ + if (cfg->move_in_progress) { + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); + cfg->move_in_progress = 0; + } + + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif +#endif /* CONFIG_SMP */ + +/* + * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, + * which implement the MSI or MSI-X Capability Structure. + */ +static struct irq_chip msi_chip = { + .name = "PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +#ifdef CONFIG_INTR_REMAP +static struct irq_chip msi_ir_chip = { + .name = "IR-PCI-MSI", + .unmask = unmask_msi_irq, + .mask = mask_msi_irq, + .ack = ack_x2apic_edge, +#ifdef CONFIG_SMP + .set_affinity = ir_set_msi_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +/* + * Map the PCI dev to the corresponding remapping hardware unit + * and allocate 'nvec' consecutive interrupt-remapping table entries + * in it. + */ +static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) +{ + struct intel_iommu *iommu; + int index; + + iommu = map_dev_to_ir(dev); + if (!iommu) { + printk(KERN_ERR + "Unable to map PCI %s to iommu\n", pci_name(dev)); + return -ENOENT; + } + + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", nvec, + pci_name(dev)); + return -ENOSPC; + } + return index; +} +#endif + +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(dev, irq, &msg); + if (ret < 0) + return ret; + + set_irq_msi(irq, desc); + write_msi_msg(irq, &msg); + +#ifdef CONFIG_INTR_REMAP + if (irq_remapped(irq)) { + struct irq_desc *desc = irq_to_desc(irq); + /* + * irq migration in process context + */ + desc->status |= IRQ_MOVE_PCNTXT; + set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); + } else +#endif + set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + + return 0; +} + +static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) +{ + unsigned int irq; + + irq = dev->bus->number; + irq <<= 8; + irq |= dev->devfn; + irq <<= 12; + + return irq; +} + +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) +{ + unsigned int irq; + int ret; + unsigned int irq_want; + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + + irq = create_irq_nr(irq_want); + if (irq == 0) + return -1; + +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + ret = msi_alloc_irte(dev, irq, 1); + if (ret < 0) + goto error; +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) { + destroy_irq(irq); + return ret; + } + return 0; + +#ifdef CONFIG_INTR_REMAP +error: + destroy_irq(irq); + return ret; +#endif +} + +int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + unsigned int irq; + int ret, sub_handle; + struct msi_desc *desc; + unsigned int irq_want; + +#ifdef CONFIG_INTR_REMAP + struct intel_iommu *iommu = 0; + int index = 0; +#endif + + irq_want = build_irq_for_pci_dev(dev) + 0x100; + sub_handle = 0; + list_for_each_entry(desc, &dev->msi_list, list) { + irq = create_irq_nr(irq_want--); + if (irq == 0) + return -1; +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) + goto no_ir; + + if (!sub_handle) { + /* + * allocate the consecutive block of IRTE's + * for 'nvec' + */ + index = msi_alloc_irte(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + iommu = map_dev_to_ir(dev); + if (!iommu) { + ret = -ENOENT; + goto error; + } + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + } +no_ir: +#endif + ret = setup_msi_irq(dev, desc, irq); + if (ret < 0) + goto error; + sub_handle++; + } + return 0; + +error: + destroy_irq(irq); + return ret; +} + +void arch_teardown_msi_irq(unsigned int irq) +{ + destroy_irq(irq); +} + +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + +#endif /* CONFIG_PCI_MSI */ +/* + * Hypertransport interrupt support + */ +#ifdef CONFIG_HT_IRQ + +#ifdef CONFIG_SMP + +static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) +{ + struct ht_irq_msg msg; + fetch_ht_irq_msg(irq, &msg); + + msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); + msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); + + msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); + msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); + + write_ht_irq_msg(irq, &msg); +} + +static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + unsigned int dest; + cpumask_t tmp; + struct irq_desc *desc; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + target_ht_irq(irq, dest, cfg->vector); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif + +static struct irq_chip ht_irq_chip = { + .name = "PCI-HT", + .mask = mask_ht_irq, + .unmask = unmask_ht_irq, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = set_ht_irq_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) +{ + struct irq_cfg *cfg; + int err; + cpumask_t tmp; + + tmp = TARGET_CPUS; + err = assign_irq_vector(irq, tmp); + if (!err) { + struct ht_irq_msg msg; + unsigned dest; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, tmp); + dest = cpu_mask_to_apicid(tmp); + + msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); + + msg.address_lo = + HT_IRQ_LOW_BASE | + HT_IRQ_LOW_DEST_ID(dest) | + HT_IRQ_LOW_VECTOR(cfg->vector) | + ((INT_DEST_MODE == 0) ? + HT_IRQ_LOW_DM_PHYSICAL : + HT_IRQ_LOW_DM_LOGICAL) | + HT_IRQ_LOW_RQEOI_EDGE | + ((INT_DELIVERY_MODE != dest_LowestPrio) ? + HT_IRQ_LOW_MT_FIXED : + HT_IRQ_LOW_MT_ARBITRATED) | + HT_IRQ_LOW_IRQ_MASKED; + + write_ht_irq_msg(irq, &msg); + + set_irq_chip_and_handler_name(irq, &ht_irq_chip, + handle_edge_irq, "edge"); + } + return err; +} +#endif /* CONFIG_HT_IRQ */ + +/* -------------------------------------------------------------------------- + ACPI-based IOAPIC Configuration + -------------------------------------------------------------------------- */ + +#ifdef CONFIG_ACPI + +#ifdef CONFIG_X86_32 +int __init io_apic_get_unique_id(int ioapic, int apic_id) +{ + union IO_APIC_reg_00 reg_00; + static physid_mask_t apic_id_map = PHYSID_MASK_NONE; + physid_mask_t tmp; + unsigned long flags; + int i = 0; + + /* + * The P4 platform supports up to 256 APIC IDs on two separate APIC + * buses (one for LAPICs, one for IOAPICs), where predecessors only + * supports up to 16 on one shared APIC bus. + * + * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full + * advantage of new APIC bus architecture. + */ + + if (physids_empty(apic_id_map)) + apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); + + spin_lock_irqsave(&ioapic_lock, flags); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + if (apic_id >= get_physical_broadcast()) { + printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " + "%d\n", ioapic, apic_id, reg_00.bits.ID); + apic_id = reg_00.bits.ID; + } + + /* + * Every APIC in a system must have a unique ID or we get lots of nice + * 'stuck on smp_invalidate_needed IPI wait' messages. + */ + if (check_apicid_used(apic_id_map, apic_id)) { + + for (i = 0; i < get_physical_broadcast(); i++) { + if (!check_apicid_used(apic_id_map, i)) + break; + } + + if (i == get_physical_broadcast()) + panic("Max apic_id exceeded!\n"); + + printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " + "trying %d\n", ioapic, apic_id, i); + + apic_id = i; + } + + tmp = apicid_to_cpu_present(apic_id); + physids_or(apic_id_map, apic_id_map, tmp); + + if (reg_00.bits.ID != apic_id) { + reg_00.bits.ID = apic_id; + + spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(ioapic, 0, reg_00.raw); + reg_00.raw = io_apic_read(ioapic, 0); + spin_unlock_irqrestore(&ioapic_lock, flags); + + /* Sanity check */ + if (reg_00.bits.ID != apic_id) { + printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); + return -1; + } + } + + apic_printk(APIC_VERBOSE, KERN_INFO + "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); + + return apic_id; +} + +int __init io_apic_get_version(int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.version; +} +#endif + +int __init io_apic_get_redir_entries (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.entries; +} + + +int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) +{ + if (!IO_APIC_IRQ(irq)) { + apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", + ioapic); + return -EINVAL; + } + + /* + * IRQs < 16 are already in the irq_2_pin[] map + */ + if (irq >= 16) + add_pin_to_irq(irq, ioapic, pin); + + setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); + + return 0; +} + + +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mp_irqtype == mp_INT && + mp_irqs[i].mp_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + +#endif /* CONFIG_ACPI */ + +/* + * This function currently is only a helper for the i386 smp boot process where + * we need to reprogram the ioredtbls to cater for the cpus which have come online + * so mask in all cases should simply be TARGET_CPUS + */ +#ifdef CONFIG_SMP +void __init setup_ioapic_dest(void) +{ + int pin, ioapic, irq, irq_entry; + struct irq_cfg *cfg; + + if (skip_ioapic_setup == 1) + return; + + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { + for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { + irq_entry = find_irq_entry(ioapic, pin, mp_INT); + if (irq_entry == -1) + continue; + irq = pin_2_irq(irq_entry, ioapic, pin); + + /* setup_IO_APIC_irqs could fail to get vector for some device + * when you have too many devices, because at that time only boot + * cpu is online. + */ + cfg = irq_cfg(irq); + if (!cfg->vector) + setup_IO_APIC_irq(ioapic, pin, irq, + irq_trigger(irq_entry), + irq_polarity(irq_entry)); +#ifdef CONFIG_INTR_REMAP + else if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + else + set_ioapic_affinity_irq(irq, TARGET_CPUS); + } + + } +} +#endif + +#ifdef CONFIG_X86_64 +#define IOAPIC_RESOURCE_NAME_SIZE 11 + +static struct resource *ioapic_resources; + +static struct resource * __init ioapic_setup_resources(void) +{ + unsigned long n; + struct resource *res; + char *mem; + int i; + + if (nr_ioapics <= 0) + return NULL; + + n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); + n *= nr_ioapics; + + mem = alloc_bootmem(n); + res = (void *)mem; + + if (mem != NULL) { + mem += sizeof(struct resource) * nr_ioapics; + + for (i = 0; i < nr_ioapics; i++) { + res[i].name = mem; + res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; + sprintf(mem, "IOAPIC %u", i); + mem += IOAPIC_RESOURCE_NAME_SIZE; + } + } + + ioapic_resources = res; + + return res; +} +#endif + +void __init ioapic_init_mappings(void) +{ + unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; + int i; +#ifdef CONFIG_X86_64 + struct resource *ioapic_res; + + ioapic_res = ioapic_setup_resources(); +#endif + for (i = 0; i < nr_ioapics; i++) { + if (smp_found_config) { + ioapic_phys = mp_ioapics[i].mp_apicaddr; +#ifdef CONFIG_X86_32 + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } +#endif + } else { +#ifdef CONFIG_X86_32 +fake_ioapic_page: +#endif + ioapic_phys = (unsigned long) + alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = __pa(ioapic_phys); + } + set_fixmap_nocache(idx, ioapic_phys); + apic_printk(APIC_VERBOSE, + "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx), ioapic_phys); + idx++; + +#ifdef CONFIG_X86_64 + if (ioapic_res != NULL) { + ioapic_res->start = ioapic_phys; + ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res++; + } +#endif + } +} + +#ifdef CONFIG_X86_64 +static int __init ioapic_insert_resources(void) +{ + int i; + struct resource *r = ioapic_resources; + + if (!r) { + printk(KERN_ERR + "IO APIC resources could be not be allocated.\n"); + return -1; + } + + for (i = 0; i < nr_ioapics; i++) { + insert_resource(&iomem_resource, r); + r++; + } + + return 0; +} + +/* Insert the IO APIC resources after PCI initialization has occured to handle + * IO APICS that are mapped in on a BAR in PCI space. */ +late_initcall(ioapic_insert_resources); +#endif diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c deleted file mode 100644 index fba6d6ee348..00000000000 --- a/arch/x86/kernel/io_apic_32.c +++ /dev/null @@ -1,3913 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* time_after() */ -#ifdef CONFIG_ACPI -#include -#endif -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define __apicdebuginit(type) static type __init - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int first_free_entry; -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -int skip_ioapic_setup; - -static int __init parse_noapic(char *str) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -struct irq_cfg; -struct irq_pin_list; -struct irq_cfg { - unsigned int irq; - struct irq_cfg *next; - struct irq_pin_list *irq_2_pin; - cpumask_t domain; - cpumask_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, -}; - -static struct irq_cfg irq_cfg_init = { .irq = -1U, }; -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); - -static void init_one_irq_cfg(struct irq_cfg *cfg) -{ - memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); -} - -static struct irq_cfg *irq_cfgx; -static struct irq_cfg *irq_cfgx_free; -static void __init init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; - - cfg = *da->name; - - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); - for (i = legacy_count; i < *da->nr; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < *da->nr; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = &irq_cfgx[legacy_count]; - irq_cfgx[legacy_count - 1].next = NULL; -} - -#define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg; cfg = cfg->next) - -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - struct irq_cfg *cfg; - - cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg = cfg->next; - } - - return NULL; -} - -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - struct irq_cfg *cfg, *cfg_pri; - int i; - int count = 0; - - cfg_pri = cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg_pri = cfg; - cfg = cfg->next; - count++; - } - - if (!irq_cfgx_free) { - unsigned long phys; - unsigned long total_bytes; - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - - total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; - if (after_bootmem) - cfg = kzalloc(total_bytes, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); - - phys = __pa(cfg); - printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = cfg; - } - - cfg = irq_cfgx_free; - irq_cfgx_free = irq_cfgx_free->next; - cfg->next = NULL; - if (cfg_pri) - cfg_pri->next = cfg; - else - irq_cfgx = cfg; - cfg->irq = irq; - printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); -#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG - { - /* dump the results */ - struct irq_cfg *cfg; - unsigned long phys; - unsigned long bytes = sizeof(struct irq_cfg); - - printk(KERN_DEBUG "=========================== %d\n", irq); - printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); - for_each_irq_cfg(cfg) { - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); - } - printk(KERN_DEBUG "===========================\n"); - } -#endif - return cfg; -} - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; - -static struct irq_pin_list *irq_2_pin_head; -/* fill one page ? */ -static int nr_irq_2_pin = 0x100; -static struct irq_pin_list *irq_2_pin_ptr; -static void __init irq_2_pin_init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_pin_list *pin; - int i; - - pin = *da->name; - - for (i = 1; i < *da->nr; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = &pin[0]; -} -DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); - -static struct irq_pin_list *get_one_free_irq_2_pin(void) -{ - struct irq_pin_list *pin; - int i; - - pin = irq_2_pin_ptr; - - if (pin) { - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; - } - - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); - - if (after_bootmem) - pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, - GFP_ATOMIC); - else - pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * - nr_irq_2_pin, PAGE_SIZE, 0); - - if (!pin) - panic("can not get more irq_2_pin\n"); - - for (i = 1; i < nr_irq_2_pin; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = pin->next; - pin->next = NULL; - - return pin; -} - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -#ifdef CONFIG_X86_64 -static bool io_apic_level_ack_pending(unsigned int irq) -{ - struct irq_pin_list *entry; - unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - int pin; - - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); - return true; - } - if (!entry->next) - break; - entry = entry->next; - } - spin_unlock_irqrestore(&ioapic_lock, flags); - - return false; -} -#endif - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - int apic, pin; - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; -#ifdef CONFIG_INTR_REMAP - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int assign_irq_vector(int irq, cpumask_t mask); - -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - desc = irq_to_desc(irq); - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - desc->affinity = mask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} -#endif /* CONFIG_SMP */ - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(); - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); - return; - } - - while (entry->next) { - /* not again, please */ - if (entry->apic == apic && entry->pin == pin) - return; - - entry = entry->next; - } - - entry->next = get_one_free_irq_2_pin(); - entry = entry->next; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_cfg *cfg = irq_cfg(irq); - struct irq_pin_list *entry = cfg->irq_2_pin; - int replaced = 0; - - while (entry) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - replaced = 1; - /* every one is different, right? */ - break; - } - entry = entry->next; - } - - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq(irq, newapic, newpin); -} - -#ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - -#define DO_ACTION(name,R,ACTION, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) - -/* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) - -#else - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - unsigned int pin, reg; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} - -#endif - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - -#ifdef CONFIG_X86_32 -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - -/* - * Saves and masks all the unmasked IO-APIC RTE's - */ -int save_mask_IO_APIC_setup(void) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - int apic, pin; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - - for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - return -ENOMEM; - } - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = - ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } - return 0; -} - -void restore_IO_APIC_setup(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); -} - -void reinit_intr_remapped_IO_APIC(int intr_remapping) -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(); -} -#endif - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - - return mp_irqs[i].mp_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -#endif - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mp_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -int (*ioapic_renumber_irq)(int ioapic, int irq); -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mp_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mp_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; - } else { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - } - -#ifdef CONFIG_X86_32 - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } -#endif - - return irq; -} - -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - spin_unlock(&vector_lock); -} - -static int __assign_irq_vector(int irq, cpumask_t mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - unsigned int old_vector; - int cpu; - struct irq_cfg *cfg; - - cfg = irq_cfg(irq); - - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); - - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - - old_vector = cfg->vector; - if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) - return 0; - } - - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; - int new_cpu; - int vector, offset; - - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) - goto next; -#endif - for_each_cpu_mask_nr(new_cpu, new_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - } - for_each_cpu_mask_nr(new_cpu, new_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cfg->domain = domain; - return 0; - } - return -ENOSPC; -} - -static int assign_irq_vector(int irq, cpumask_t mask) -{ - int err; - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); - spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq) -{ - struct irq_cfg *cfg; - cpumask_t mask; - int cpu, vector; - - cfg = irq_cfg(irq); - BUG_ON(!cfg->vector); - - vector = cfg->vector; - cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) - per_cpu(vector_irq, cpu)[vector] = -1; - - cfg->vector = 0; - cpus_clear(cfg->domain); -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ - int irq, vector; - struct irq_cfg *cfg; - - /* Mark the inuse vectors */ - for_each_irq_cfg(cfg) { - if (!cpu_isset(cpu, cfg->domain)) - continue; - vector = cfg->vector; - irq = cfg->irq; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq < 0) - continue; - - cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = -1; - } -} - -static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip; -#endif - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - -static void ioapic_register_intr(int irq, unsigned long trigger) -{ - struct irq_desc *desc; - - /* first time to use this irq_desc */ - if (irq < 16) - desc = irq_to_desc(irq); - else - desc = irq_to_desc_alloc(irq); - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - desc->status |= IRQ_LEVEL; - else - desc->status &= ~IRQ_LEVEL; - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - desc->status |= IRQ_MOVE_PCNTXT; - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; - } -#endif - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); -} - -static int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) -{ - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; - - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); - - memset(&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = vector; - irte.dest_id = IRTE_DEST(destination); - - modify_irte(irq, &irte); - - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - } else -#endif - { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; - } - - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; - entry->vector = vector; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry->mask = 1; - return 0; -} - -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, - int trigger, int polarity) -{ - struct irq_cfg *cfg; - struct IO_APIC_route_entry entry; - cpumask_t mask; - - if (!IO_APIC_IRQ(irq)) - return; - - cfg = irq_cfg(irq); - - mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(mask, cfg->domain, mask); - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, - irq, trigger, polarity); - - - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); - return; - } - - ioapic_register_intr(irq, trigger); - if (irq < 16) - disable_8259A_irq(irq); - - ioapic_write_entry(apic, pin, entry); -} - -static void __init setup_IO_APIC_irqs(void) -{ - int apic, pin, idx, irq, first_notcon = 1; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } - - irq = pin_2_irq(idx, apic, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) - continue; -#endif - add_pin_to_irq(irq, apic, pin); - - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, - int vector) -{ - struct IO_APIC_route_entry entry; - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - return; -#endif - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); -} - - -__apicdebuginit(void) print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - struct irq_cfg *cfg; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X ", - i, - entry.dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; - if (!entry) - continue; - printk(KERN_DEBUG "IRQ%d ", cfg->irq); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -__apicdebuginit(void) print_APIC_bitfield(int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -__apicdebuginit(void) print_all_local_APICs(void) -{ - on_each_cpu(print_local_APIC, NULL, 1); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int apic; - unsigned long flags; - -#ifdef CONFIG_X86_32 - int i; - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; -#endif - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -#ifdef CONFIG_X86_32 -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) - return; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mp_apicid; - - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mp_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - unsigned long flags; - - if (no_timer_check) - return 1; - - local_save_flags(flags); - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - local_irq_restore(flags); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - - /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) - return 1; - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ - -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -#ifdef CONFIG_X86_64 -static int ioapic_retrigger_irq(unsigned int irq) -{ - - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_cfg(irq)->vector); - - return 1; -} -#endif - -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -#ifdef CONFIG_SMP - -#ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. - * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. - */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct irq_desc *desc; - cpumask_t tmp, cleanup_mask; - struct irte irte; - int modify_ioapic_rte; - unsigned int dest; - unsigned long flags; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - desc = irq_to_desc(irq); - modify_ioapic_rte = desc->status & IRQ_LEVEL; - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Modified the IRTE and flushes the Interrupt entry cache. - */ - modify_irte(irq, &irte); - - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc->affinity = mask; -} - -static int migrate_irq_remapped_level(int irq) -{ - int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); - - mask_IO_APIC_irq(irq); - - if (io_apic_level_ack_pending(irq)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); - - ret = 0; - desc->status &= ~IRQ_MOVE_PENDING; - cpus_clear(desc->pending_mask); - -unmask: - unmask_IO_APIC_irq(irq); - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, desc->pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - -/* - * Migrates the IRQ destination in the process context. - */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc->status & IRQ_LEVEL) { - desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; - migrate_irq_remapped_level(irq); - return; - } - - migrate_ioapic_irq(irq, mask); -} -#endif - -asmlinkage void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - ack_APIC_irq(); -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; - - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) - goto unlock; - - __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; -unlock: - spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void irq_complete_move(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - unsigned vector, me; - - if (likely(!cfg->move_in_progress)) - return; - - vector = ~get_irq_regs()->orig_ax; - me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } -} -#else -static inline void irq_complete_move(unsigned int irq) {} -#endif -#ifdef CONFIG_INTR_REMAP -static void ack_x2apic_level(unsigned int irq) -{ - ack_x2APIC_irq(); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - -static void ack_apic_edge(unsigned int irq) -{ - irq_complete_move(irq); - move_native_irq(irq); - ack_APIC_irq(); -} - -#ifdef CONFIG_X86_64 -static void ack_apic_level(unsigned int irq) -{ - int do_unmask_irq = 0; - - irq_complete_move(irq); -#ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_IO_APIC_irq(irq); - } -#endif - - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(irq)) - move_masked_irq(irq); - unmask_IO_APIC_irq(irq); - } -} -#else -atomic_t irq_mis_count; -static void ack_apic_level(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} -#endif - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; -#endif - -static inline void init_IO_APIC_traps(void) -{ - int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for_each_irq_cfg(cfg) { - irq = cfg->irq; - if (IO_APIC_IRQ(irq) && !cfg->vector) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); - /* Strange. Oh, well.. */ - desc->chip = &no_irq_chip; - } - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void mask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static void ack_lapic_irq (unsigned int irq) -{ - ack_APIC_irq(); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .ack = ack_lapic_irq, -}; - -static void lapic_register_intr(int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); -} - -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void __init unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -static int disable_timer_pin_1 __initdata; -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", disable_timer_pin_setup); - -int timer_through_8259 __initdata; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms. - */ -static inline void __init check_timer(void) -{ - struct irq_cfg *cfg = irq_cfg(0); - int apic1, pin1, apic2, pin2; - unsigned long flags; - unsigned int ver; - int no_pin1 = 0; - - local_irq_save(flags); - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); - - /* - * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. Also - * timer interrupts need to be acknowledged manually in - * the 8259A for the i82489DX when using the NMI - * watchdog as that APIC treats NMIs as level-triggered. - * The AEOI mode will finish them in the 8259A - * automatically. - */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); -#ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); -#endif - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); - - /* - * Some BIOS writers are clueless and report the ExtINTA - * I/O APIC input from the cascaded 8259A as the timer - * interrupt input. So just in case, if only one pin - * was found above, try it both directly and through the - * 8259A. - */ - if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); -#endif - pin1 = pin2; - apic1 = apic2; - no_pin1 = 1; - } else if (pin2 == -1) { - pin2 = pin1; - apic2 = apic1; - } - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); - } - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - goto out; - } -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif - clear_IO_APIC_pin(apic1, pin1); - if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); - - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); - enable_8259A_irq(0); - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - goto out; - } - /* - * Cleanup, just in case ... - */ - disable_8259A_irq(0); - clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); - } - - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); - - lapic_register_intr(0); - apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); - - init_8259A(0); - make_8259A_irq(0); - apic_write(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option.\n"); -out: - local_irq_restore(flags); -} - -/* - * Traditionally ISA IRQ2 is the cascade IRQ, and is not available - * to devices. However there may be an I/O APIC pin available for - * this interrupt regardless. The pin may be left unconnected, but - * typically it will be reused as an ExtINT cascade interrupt for - * the master 8259A. In the MPS case such a pin will normally be - * reported as an ExtINT interrupt in the MP table. With ACPI - * there is no provision for ExtINT interrupts, and in the absence - * of an override it would be treated as an ordinary ISA I/O APIC - * interrupt, that is edge-triggered and unmasked by default. We - * used to do this, but it caused problems on some systems because - * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using - * the same ExtINT cascade interrupt to drive the local APIC of the - * bootstrap processor. Therefore we refrain from routing IRQ2 to - * the I/O APIC in all cases now. No actual device should request - * it anyway. --macro - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - -#ifdef CONFIG_X86_32 - enable_IO_APIC(); -#else - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ -#endif - - io_apic_irqs = ~PIC_IRQS; - - apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* - * Set up IO-APIC IRQ routing. - */ -#ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); -#endif - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); -} - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) - *entry = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device * dev; - int i, size, error; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -unsigned int create_irq_nr(unsigned int irq_want) -{ - /* Allocate an unused irq */ - unsigned int irq; - unsigned int new; - unsigned long flags; - struct irq_cfg *cfg_new; - -#ifndef CONFIG_HAVE_SPARSE_IRQ - irq_want = nr_irqs - 1; -#endif - - irq = 0; - spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { - if (platform_legacy_irq(new)) - continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) - continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); - if (__assign_irq_vector(new, TARGET_CPUS) == 0) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq > 0) { - dynamic_irq_init(irq); - } - return irq; -} - -int create_irq(void) -{ - int irq; - - irq = create_irq_nr(nr_irqs - 1); - - if (irq == 0) - irq = -1; - - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - -#ifdef CONFIG_INTR_REMAP - free_irte(irq); -#endif - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI message composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (err) - return err; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - memset (&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - modify_irte(irq, &irte); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } - return err; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} - -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp, cleanup_mask; - struct irte irte; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_x2apic_edge, -#ifdef CONFIG_SMP - .set_affinity = ir_set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} -#endif - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) - return ret; - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irq_desc *desc = irq_to_desc(irq); - /* - * irq migration in process context - */ - desc->status |= IRQ_MOVE_PCNTXT; - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else -#endif - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - - return 0; -} - -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - unsigned int irq; - int ret, sub_handle; - struct msi_desc *desc; - unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP - struct intel_iommu *iommu = 0; - int index = 0; -#endif - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); - if (irq == 0) - return -1; -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - } -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; - -error: - destroy_irq(irq); - return ret; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#ifdef CONFIG_DMAR -#ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif /* CONFIG_SMP */ - -struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .unmask = dmar_msi_unmask, - .mask = dmar_msi_mask, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = dmar_msi_set_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - int err; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return err; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_32 -int __init io_apic_get_unique_id(int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - -int __init io_apic_get_version(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} -#endif - -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); - - return 0; -} - - -int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) -{ - int i; - - if (skip_ioapic_setup) - return -1; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) - break; - if (i >= mp_irq_entries) - return -1; - - *trigger = irq_trigger(i); - *polarity = irq_polarity(i); - return 0; -} - -#endif /* CONFIG_ACPI */ - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - struct irq_cfg *cfg; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - cfg = irq_cfg(irq); - if (!cfg->vector) - setup_IO_APIC_irq(ioapic, pin, irq, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); -#ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif - else - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -#ifdef CONFIG_X86_64 -#define IOAPIC_RESOURCE_NAME_SIZE 11 - -static struct resource *ioapic_resources; - -static struct resource * __init ioapic_setup_resources(void) -{ - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - } - - ioapic_resources = res; - - return res; -} -#endif - -void __init ioapic_init_mappings(void) -{ - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; -#ifdef CONFIG_X86_64 - struct resource *ioapic_res; - - ioapic_res = ioapic_setup_resources(); -#endif - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; -#ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } -#endif - } else { -#ifdef CONFIG_X86_32 -fake_ioapic_page: -#endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - -#ifdef CONFIG_X86_64 - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } -#endif - } -} - -#ifdef CONFIG_X86_64 -static int __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - printk(KERN_ERR - "IO APIC resources could be not be allocated.\n"); - return -1; - } - - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; - } - - return 0; -} - -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); -#endif diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c deleted file mode 100644 index 940c4167b32..00000000000 --- a/arch/x86/kernel/io_apic_64.c +++ /dev/null @@ -1,3913 +0,0 @@ -/* - * Intel IO-APIC support for multi-Pentium hosts. - * - * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo - * - * Many thanks to Stig Venaas for trying out countless experimental - * patches and reporting/debugging problems patiently! - * - * (c) 1999, Multiple IO-APIC support, developed by - * Ken-ichi Yaku and - * Hidemi Kishimoto , - * further tested and cleaned up by Zach Brown - * and Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively - * Paul Diefenbaugh : Added full ACPI support - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* time_after() */ -#ifdef CONFIG_ACPI -#include -#endif -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define __apicdebuginit(type) static type __init - -/* - * Is the SiS APIC rmw bug present ? - * -1 = don't know, 0 = no, 1 = yes - */ -int sis_apic_bug = -1; - -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); - -int first_free_entry; -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - -/* - * # of IRQ routing registers - */ -int nr_ioapic_registers[MAX_IO_APICS]; - -/* I/O APIC entries */ -struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; -int nr_ioapics; - -/* MP IRQ source entries */ -struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; - -/* # of MP IRQ source entries */ -int mp_irq_entries; - -#if defined (CONFIG_MCA) || defined (CONFIG_EISA) -int mp_bus_id_to_type[MAX_MP_BUSSES]; -#endif - -DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); - -int skip_ioapic_setup; - -static int __init parse_noapic(char *str) -{ - /* disable IO-APIC */ - disable_ioapic_setup(); - return 0; -} -early_param("noapic", parse_noapic); - -struct irq_cfg; -struct irq_pin_list; -struct irq_cfg { - unsigned int irq; - struct irq_cfg *next; - struct irq_pin_list *irq_2_pin; - cpumask_t domain; - cpumask_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - -/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg_legacy[] __initdata = { - [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, - [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, - [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, - [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, - [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, - [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, - [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, - [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, - [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, - [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, - [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, - [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, - [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, - [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, - [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, - [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, -}; - -static struct irq_cfg irq_cfg_init = { .irq = -1U, }; -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); - -static void init_one_irq_cfg(struct irq_cfg *cfg) -{ - memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); -} - -static struct irq_cfg *irq_cfgx; -static struct irq_cfg *irq_cfgx_free; -static void __init init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; - - cfg = *da->name; - - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); - for (i = legacy_count; i < *da->nr; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < *da->nr; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = &irq_cfgx[legacy_count]; - irq_cfgx[legacy_count - 1].next = NULL; -} - -#define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg; cfg = cfg->next) - -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - struct irq_cfg *cfg; - - cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg = cfg->next; - } - - return NULL; -} - -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - struct irq_cfg *cfg, *cfg_pri; - int i; - int count = 0; - - cfg_pri = cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg_pri = cfg; - cfg = cfg->next; - count++; - } - - if (!irq_cfgx_free) { - unsigned long phys; - unsigned long total_bytes; - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - - total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; - if (after_bootmem) - cfg = kzalloc(total_bytes, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); - - phys = __pa(cfg); - printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = cfg; - } - - cfg = irq_cfgx_free; - irq_cfgx_free = irq_cfgx_free->next; - cfg->next = NULL; - if (cfg_pri) - cfg_pri->next = cfg; - else - irq_cfgx = cfg; - cfg->irq = irq; - printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); -#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG - { - /* dump the results */ - struct irq_cfg *cfg; - unsigned long phys; - unsigned long bytes = sizeof(struct irq_cfg); - - printk(KERN_DEBUG "=========================== %d\n", irq); - printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); - for_each_irq_cfg(cfg) { - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); - } - printk(KERN_DEBUG "===========================\n"); - } -#endif - return cfg; -} - -/* - * This is performance-critical, we want to do it O(1) - * - * the indexing order of this array favors 1:1 mappings - * between pins and IRQs. - */ - -struct irq_pin_list { - int apic, pin; - struct irq_pin_list *next; -}; - -static struct irq_pin_list *irq_2_pin_head; -/* fill one page ? */ -static int nr_irq_2_pin = 0x100; -static struct irq_pin_list *irq_2_pin_ptr; -static void __init irq_2_pin_init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_pin_list *pin; - int i; - - pin = *da->name; - - for (i = 1; i < *da->nr; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = &pin[0]; -} -DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); - -static struct irq_pin_list *get_one_free_irq_2_pin(void) -{ - struct irq_pin_list *pin; - int i; - - pin = irq_2_pin_ptr; - - if (pin) { - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; - } - - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); - - if (after_bootmem) - pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, - GFP_ATOMIC); - else - pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * - nr_irq_2_pin, PAGE_SIZE, 0); - - if (!pin) - panic("can not get more irq_2_pin\n"); - - for (i = 1; i < nr_irq_2_pin; i++) - pin[i-1].next = &pin[i]; - - irq_2_pin_ptr = pin->next; - pin->next = NULL; - - return pin; -} - -struct io_apic { - unsigned int index; - unsigned int unused[3]; - unsigned int data; -}; - -static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) -{ - return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) - + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); -} - -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - return readl(&io_apic->data); -} - -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -/* - * Re-write a value: to be used for read-modify-write - * cycles where the read already set up the index register. - * - * Older SiS APIC requires we rewrite the index register - */ -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); - writel(value, &io_apic->data); -} - -#ifdef CONFIG_X86_64 -static bool io_apic_level_ack_pending(unsigned int irq) -{ - struct irq_pin_list *entry; - unsigned long flags; - struct irq_cfg *cfg = irq_cfg(irq); - - spin_lock_irqsave(&ioapic_lock, flags); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - int pin; - - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - /* Is the remote IRR bit set? */ - if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); - return true; - } - if (!entry->next) - break; - entry = entry->next; - } - spin_unlock_irqrestore(&ioapic_lock, flags); - - return false; -} -#endif - -union entry_union { - struct { u32 w1, w2; }; - struct IO_APIC_route_entry entry; -}; - -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) -{ - union entry_union eu; - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); - return eu.entry; -} - -/* - * When we write a new IO APIC routing entry, we need to write the high - * word first! If the mask bit in the low word is clear, we will enable - * the interrupt, and we need to make sure the entry is fully populated - * before that happens. - */ -static void -__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - union entry_union eu; - eu.entry = e; - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); -} - -static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) -{ - unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); - __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -/* - * When we mask an IO APIC routing entry, we need to write the low - * word first, in order to set the mask bit before we change the - * high bits! - */ -static void ioapic_mask_entry(int apic, int pin) -{ - unsigned long flags; - union entry_union eu = { .entry.mask = 1 }; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(apic, 0x10 + 2*pin, eu.w1); - io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -#ifdef CONFIG_SMP -static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - int apic, pin; - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - unsigned int reg; - - if (!entry) - break; - - apic = entry->apic; - pin = entry->pin; -#ifdef CONFIG_INTR_REMAP - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(irq)) - io_apic_write(apic, 0x11 + pin*2, dest); -#else - io_apic_write(apic, 0x11 + pin*2, dest); -#endif - reg = io_apic_read(apic, 0x10 + pin*2); - reg &= ~IO_APIC_REDIR_VECTOR_MASK; - reg |= vector; - io_apic_modify(apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -static int assign_irq_vector(int irq, cpumask_t mask); - -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - cfg = irq_cfg(irq); - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - desc = irq_to_desc(irq); - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - desc->affinity = mask; - spin_unlock_irqrestore(&ioapic_lock, flags); -} -#endif /* CONFIG_SMP */ - -/* - * The common case is 1:1 IRQ<->pin mappings. Sometimes there are - * shared ISA-space IRQs, so we have to support them. We are super - * fast in the common case, and fast for shared ISA-space IRQs. - */ -static void add_pin_to_irq(unsigned int irq, int apic, int pin) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - - /* first time to refer irq_cfg, so with new */ - cfg = irq_cfg_alloc(irq); - entry = cfg->irq_2_pin; - if (!entry) { - entry = get_one_free_irq_2_pin(); - cfg->irq_2_pin = entry; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); - return; - } - - while (entry->next) { - /* not again, please */ - if (entry->apic == apic && entry->pin == pin) - return; - - entry = entry->next; - } - - entry->next = get_one_free_irq_2_pin(); - entry = entry->next; - entry->apic = apic; - entry->pin = pin; - printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); -} - -/* - * Reroute an IRQ to a different pin. - */ -static void __init replace_pin_at_irq(unsigned int irq, - int oldapic, int oldpin, - int newapic, int newpin) -{ - struct irq_cfg *cfg = irq_cfg(irq); - struct irq_pin_list *entry = cfg->irq_2_pin; - int replaced = 0; - - while (entry) { - if (entry->apic == oldapic && entry->pin == oldpin) { - entry->apic = newapic; - entry->pin = newpin; - replaced = 1; - /* every one is different, right? */ - break; - } - entry = entry->next; - } - - /* why? call replace before add? */ - if (!replaced) - add_pin_to_irq(irq, newapic, newpin); -} - -#ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - -#define DO_ACTION(name,R,ACTION, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) - -/* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) - -#else - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) -{ - struct irq_cfg *cfg; - struct irq_pin_list *entry; - unsigned int pin, reg; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } -} - -/* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} - -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} - -/* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} - -/* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} - -#endif - -static void mask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __mask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void unmask_IO_APIC_irq (unsigned int irq) -{ - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - -static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) -{ - struct IO_APIC_route_entry entry; - - /* Check delivery_mode to be sure we're not clearing an SMI pin */ - entry = ioapic_read_entry(apic, pin); - if (entry.delivery_mode == dest_SMI) - return; - /* - * Disable it in the IO-APIC irq-routing table: - */ - ioapic_mask_entry(apic, pin); -} - -static void clear_IO_APIC (void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - clear_IO_APIC_pin(apic, pin); -} - -#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32) -void send_IPI_self(int vector) -{ - unsigned int cfg; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL; - /* - * Send the IPI. The write to APIC_ICR fires this off. - */ - apic_write(APIC_ICR, cfg); -} -#endif /* !CONFIG_SMP && CONFIG_X86_32*/ - -#ifdef CONFIG_X86_32 -/* - * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to - * specific CPU-side IRQs. - */ - -#define MAX_PIRQS 8 -static int pirq_entries [MAX_PIRQS]; -static int pirqs_enabled; - -static int __init ioapic_pirq_setup(char *str) -{ - int i, max; - int ints[MAX_PIRQS+1]; - - get_options(str, ARRAY_SIZE(ints), ints); - - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; - - pirqs_enabled = 1; - apic_printk(APIC_VERBOSE, KERN_INFO - "PIRQ redirection, working around broken MP-BIOS.\n"); - max = MAX_PIRQS; - if (ints[0] < MAX_PIRQS) - max = ints[0]; - - for (i = 0; i < max; i++) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "... PIRQ%d -> IRQ %d\n", i, ints[i+1]); - /* - * PIRQs are mapped upside down, usually. - */ - pirq_entries[MAX_PIRQS-i-1] = ints[i+1]; - } - return 1; -} - -__setup("pirq=", ioapic_pirq_setup); -#endif /* CONFIG_X86_32 */ - -#ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; - -/* - * Saves and masks all the unmasked IO-APIC RTE's - */ -int save_mask_IO_APIC_setup(void) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - int apic, pin; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - - for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - return -ENOMEM; - } - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - - entry = early_ioapic_entries[apic][pin] = - ioapic_read_entry(apic, pin); - if (!entry.mask) { - entry.mask = 1; - ioapic_write_entry(apic, pin, entry); - } - } - return 0; -} - -void restore_IO_APIC_setup(void) -{ - int apic, pin; - - for (apic = 0; apic < nr_ioapics; apic++) - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); -} - -void reinit_intr_remapped_IO_APIC(int intr_remapping) -{ - /* - * for now plain restore of previous settings. - * TBD: In the case of OS enabling interrupt-remapping, - * IO-APIC RTE's need to be setup to point to interrupt-remapping - * table entries. for now, do a plain restore, and wait for - * the setup_IO_APIC_irqs() to do proper initialization. - */ - restore_IO_APIC_setup(); -} -#endif - -/* - * Find the IRQ entry number of a certain pin. - */ -static int find_irq_entry(int apic, int pin, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == type && - (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) && - mp_irqs[i].mp_dstirq == pin) - return i; - - return -1; -} - -/* - * Find the pin to which IRQ[irq] (ISA) is connected - */ -static int __init find_isa_irq_pin(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - - return mp_irqs[i].mp_dstirq; - } - return -1; -} - -static int __init find_isa_irq_apic(int irq, int type) -{ - int i; - - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - if (test_bit(lbus, mp_bus_not_pci) && - (mp_irqs[i].mp_irqtype == type) && - (mp_irqs[i].mp_srcbusirq == irq)) - break; - } - if (i < mp_irq_entries) { - int apic; - for(apic = 0; apic < nr_ioapics; apic++) { - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) - return apic; - } - } - - return -1; -} - -/* - * Find a specific PCI IRQ entry. - * Not an __init, possibly needed by modules - */ -static int pin_2_irq(int idx, int apic, int pin); - -int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) -{ - int apic, i, best_guess = -1; - - apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", - bus, slot, pin); - if (test_bit(bus, mp_bus_not_pci)) { - apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); - return -1; - } - for (i = 0; i < mp_irq_entries; i++) { - int lbus = mp_irqs[i].mp_srcbus; - - for (apic = 0; apic < nr_ioapics; apic++) - if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || - mp_irqs[i].mp_dstapic == MP_APIC_ALL) - break; - - if (!test_bit(lbus, mp_bus_not_pci) && - !mp_irqs[i].mp_irqtype && - (bus == lbus) && - (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { - int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); - - if (!(apic || IO_APIC_IRQ(irq))) - continue; - - if (pin == (mp_irqs[i].mp_srcbusirq & 3)) - return irq; - /* - * Use the first all-but-pin matching entry as a - * best-guess fuzzy result for broken mptables. - */ - if (best_guess < 0) - best_guess = irq; - } - } - return best_guess; -} - -EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); - -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) -/* - * EISA Edge/Level control register, ELCR - */ -static int EISA_ELCR(unsigned int irq) -{ - if (irq < 16) { - unsigned int port = 0x4d0 + (irq >> 3); - return (inb(port) >> (irq & 7)) & 1; - } - apic_printk(APIC_VERBOSE, KERN_INFO - "Broken MPtable reports ISA irq %d\n", irq); - return 0; -} - -#endif - -/* ISA interrupts are always polarity zero edge triggered, - * when listed as conforming in the MP table. */ - -#define default_ISA_trigger(idx) (0) -#define default_ISA_polarity(idx) (0) - -/* EISA interrupts are always polarity zero and can be edge or level - * trigger depending on the ELCR value. If an interrupt is listed as - * EISA conforming in the MP table, that means its trigger type must - * be read in from the ELCR */ - -#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) -#define default_EISA_polarity(idx) default_ISA_polarity(idx) - -/* PCI interrupts are always polarity one level triggered, - * when listed as conforming in the MP table. */ - -#define default_PCI_trigger(idx) (1) -#define default_PCI_polarity(idx) (1) - -/* MCA interrupts are always polarity zero level triggered, - * when listed as conforming in the MP table. */ - -#define default_MCA_trigger(idx) (1) -#define default_MCA_polarity(idx) default_ISA_polarity(idx) - -static int MPBIOS_polarity(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int polarity; - - /* - * Determine IRQ line polarity (high active or low active): - */ - switch (mp_irqs[idx].mp_irqflag & 3) - { - case 0: /* conforms, ie. bus-type dependent polarity */ - if (test_bit(bus, mp_bus_not_pci)) - polarity = default_ISA_polarity(idx); - else - polarity = default_PCI_polarity(idx); - break; - case 1: /* high active */ - { - polarity = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - case 3: /* low active */ - { - polarity = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - polarity = 1; - break; - } - } - return polarity; -} - -static int MPBIOS_trigger(int idx) -{ - int bus = mp_irqs[idx].mp_srcbus; - int trigger; - - /* - * Determine IRQ trigger mode (edge or level sensitive): - */ - switch ((mp_irqs[idx].mp_irqflag>>2) & 3) - { - case 0: /* conforms, ie. bus-type dependent */ - if (test_bit(bus, mp_bus_not_pci)) - trigger = default_ISA_trigger(idx); - else - trigger = default_PCI_trigger(idx); -#if defined(CONFIG_EISA) || defined(CONFIG_MCA) - switch (mp_bus_id_to_type[bus]) { - case MP_BUS_ISA: /* ISA pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_EISA: /* EISA pin */ - { - trigger = default_EISA_trigger(idx); - break; - } - case MP_BUS_PCI: /* PCI pin */ - { - /* set before the switch */ - break; - } - case MP_BUS_MCA: /* MCA pin */ - { - trigger = default_MCA_trigger(idx); - break; - } - default: - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - } -#endif - break; - case 1: /* edge */ - { - trigger = 0; - break; - } - case 2: /* reserved */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 1; - break; - } - case 3: /* level */ - { - trigger = 1; - break; - } - default: /* invalid */ - { - printk(KERN_WARNING "broken BIOS!!\n"); - trigger = 0; - break; - } - } - return trigger; -} - -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - -int (*ioapic_renumber_irq)(int ioapic, int irq); -static int pin_2_irq(int idx, int apic, int pin) -{ - int irq, i; - int bus = mp_irqs[idx].mp_srcbus; - - /* - * Debugging check, we are in big trouble if this message pops up! - */ - if (mp_irqs[idx].mp_dstirq != pin) - printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); - - if (test_bit(bus, mp_bus_not_pci)) { - irq = mp_irqs[idx].mp_srcbusirq; - } else { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); - } - -#ifdef CONFIG_X86_32 - /* - * PCI IRQ command line redirection. Yes, limits are hardcoded. - */ - if ((pin >= 16) && (pin <= 23)) { - if (pirq_entries[pin-16] != -1) { - if (!pirq_entries[pin-16]) { - apic_printk(APIC_VERBOSE, KERN_DEBUG - "disabling PIRQ%d\n", pin-16); - } else { - irq = pirq_entries[pin-16]; - apic_printk(APIC_VERBOSE, KERN_DEBUG - "using PIRQ%d -> IRQ %d\n", - pin-16, irq); - } - } - } -#endif - - return irq; -} - -void lock_vector_lock(void) -{ - /* Used to the online set of cpus does not change - * during assign_irq_vector. - */ - spin_lock(&vector_lock); -} - -void unlock_vector_lock(void) -{ - spin_unlock(&vector_lock); -} - -static int __assign_irq_vector(int irq, cpumask_t mask) -{ - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; - unsigned int old_vector; - int cpu; - struct irq_cfg *cfg; - - cfg = irq_cfg(irq); - - /* Only try and allocate irqs on cpus that are present */ - cpus_and(mask, mask, cpu_online_map); - - if ((cfg->move_in_progress) || cfg->move_cleanup_count) - return -EBUSY; - - old_vector = cfg->vector; - if (old_vector) { - cpumask_t tmp; - cpus_and(tmp, cfg->domain, mask); - if (!cpus_empty(tmp)) - return 0; - } - - for_each_cpu_mask_nr(cpu, mask) { - cpumask_t domain, new_mask; - int new_cpu; - int vector, offset; - - domain = vector_allocation_domain(cpu); - cpus_and(new_mask, domain, cpu_online_map); - - vector = current_vector; - offset = current_offset; -next: - vector += 8; - if (vector >= first_system_vector) { - /* If we run out of vectors on large boxen, must share them. */ - offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; - } - if (unlikely(current_vector == vector)) - continue; -#ifdef CONFIG_X86_64 - if (vector == IA32_SYSCALL_VECTOR) - goto next; -#else - if (vector == SYSCALL_VECTOR) - goto next; -#endif - for_each_cpu_mask_nr(new_cpu, new_mask) - if (per_cpu(vector_irq, new_cpu)[vector] != -1) - goto next; - /* Found one! */ - current_vector = vector; - current_offset = offset; - if (old_vector) { - cfg->move_in_progress = 1; - cfg->old_domain = cfg->domain; - } - for_each_cpu_mask_nr(new_cpu, new_mask) - per_cpu(vector_irq, new_cpu)[vector] = irq; - cfg->vector = vector; - cfg->domain = domain; - return 0; - } - return -ENOSPC; -} - -static int assign_irq_vector(int irq, cpumask_t mask) -{ - int err; - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - err = __assign_irq_vector(irq, mask); - spin_unlock_irqrestore(&vector_lock, flags); - return err; -} - -static void __clear_irq_vector(int irq) -{ - struct irq_cfg *cfg; - cpumask_t mask; - int cpu, vector; - - cfg = irq_cfg(irq); - BUG_ON(!cfg->vector); - - vector = cfg->vector; - cpus_and(mask, cfg->domain, cpu_online_map); - for_each_cpu_mask_nr(cpu, mask) - per_cpu(vector_irq, cpu)[vector] = -1; - - cfg->vector = 0; - cpus_clear(cfg->domain); -} - -void __setup_vector_irq(int cpu) -{ - /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ - int irq, vector; - struct irq_cfg *cfg; - - /* Mark the inuse vectors */ - for_each_irq_cfg(cfg) { - if (!cpu_isset(cpu, cfg->domain)) - continue; - vector = cfg->vector; - irq = cfg->irq; - per_cpu(vector_irq, cpu)[vector] = irq; - } - /* Mark the free vectors */ - for (vector = 0; vector < NR_VECTORS; ++vector) { - irq = per_cpu(vector_irq, cpu)[vector]; - if (irq < 0) - continue; - - cfg = irq_cfg(irq); - if (!cpu_isset(cpu, cfg->domain)) - per_cpu(vector_irq, cpu)[vector] = -1; - } -} - -static struct irq_chip ioapic_chip; -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip; -#endif - -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - -#ifdef CONFIG_X86_32 -static inline int IO_APIC_irq_trigger(int irq) -{ - int apic, idx, pin; - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* - * nonexistent IRQs are edge default - */ - return 0; -} -#else -static inline int IO_APIC_irq_trigger(int irq) -{ - return 1; -} -#endif - -static void ioapic_register_intr(int irq, unsigned long trigger) -{ - struct irq_desc *desc; - - /* first time to use this irq_desc */ - if (irq < 16) - desc = irq_to_desc(irq); - else - desc = irq_to_desc_alloc(irq); - - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - desc->status |= IRQ_LEVEL; - else - desc->status &= ~IRQ_LEVEL; - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - desc->status |= IRQ_MOVE_PCNTXT; - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; - } -#endif - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); -} - -static int setup_ioapic_entry(int apic, int irq, - struct IO_APIC_route_entry *entry, - unsigned int destination, int trigger, - int polarity, int vector) -{ - /* - * add it to the IO-APIC irq-routing table: - */ - memset(entry,0,sizeof(*entry)); - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) { - struct intel_iommu *iommu = map_ioapic_to_ir(apic); - struct irte irte; - struct IR_IO_APIC_route_entry *ir_entry = - (struct IR_IO_APIC_route_entry *) entry; - int index; - - if (!iommu) - panic("No mapping iommu for ioapic %d\n", apic); - - index = alloc_irte(iommu, irq, 1); - if (index < 0) - panic("Failed to allocate IRTE for ioapic %d\n", apic); - - memset(&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = trigger; - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = vector; - irte.dest_id = IRTE_DEST(destination); - - modify_irte(irq, &irte); - - ir_entry->index2 = (index >> 15) & 0x1; - ir_entry->zero = 0; - ir_entry->format = 1; - ir_entry->index = (index & 0x7fff); - } else -#endif - { - entry->delivery_mode = INT_DELIVERY_MODE; - entry->dest_mode = INT_DEST_MODE; - entry->dest = destination; - } - - entry->mask = 0; /* enable IRQ */ - entry->trigger = trigger; - entry->polarity = polarity; - entry->vector = vector; - - /* Mask level triggered irqs. - * Use IRQ_DELAYED_DISABLE for edge triggered irqs. - */ - if (trigger) - entry->mask = 1; - return 0; -} - -static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, - int trigger, int polarity) -{ - struct irq_cfg *cfg; - struct IO_APIC_route_entry entry; - cpumask_t mask; - - if (!IO_APIC_IRQ(irq)) - return; - - cfg = irq_cfg(irq); - - mask = TARGET_CPUS; - if (assign_irq_vector(irq, mask)) - return; - - cpus_and(mask, cfg->domain, mask); - - apic_printk(APIC_VERBOSE,KERN_DEBUG - "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " - "IRQ %d Mode:%i Active:%i)\n", - apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, - irq, trigger, polarity); - - - if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, - cpu_mask_to_apicid(mask), trigger, polarity, - cfg->vector)) { - printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", - mp_ioapics[apic].mp_apicid, pin); - __clear_irq_vector(irq); - return; - } - - ioapic_register_intr(irq, trigger); - if (irq < 16) - disable_8259A_irq(irq); - - ioapic_write_entry(apic, pin, entry); -} - -static void __init setup_IO_APIC_irqs(void) -{ - int apic, pin, idx, irq, first_notcon = 1; - - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } - - irq = pin_2_irq(idx, apic, pin); -#ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) - continue; -#endif - add_pin_to_irq(irq, apic, pin); - - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } - } - - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); -} - -/* - * Set up the timer pin, possibly with the 8259A-master behind. - */ -static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, - int vector) -{ - struct IO_APIC_route_entry entry; - -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - return; -#endif - - memset(&entry, 0, sizeof(entry)); - - /* - * We use logical delivery to get the timer IRQ - * to the first CPU. - */ - entry.dest_mode = INT_DEST_MODE; - entry.mask = 1; /* mask IRQ now */ - entry.dest = cpu_mask_to_apicid(TARGET_CPUS); - entry.delivery_mode = INT_DELIVERY_MODE; - entry.polarity = 0; - entry.trigger = 0; - entry.vector = vector; - - /* - * The timer IRQ doesn't have to know that behind the - * scene we may have a 8259A-master in AEOI mode ... - */ - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(apic, pin, entry); -} - - -__apicdebuginit(void) print_IO_APIC(void) -{ - int apic, i; - union IO_APIC_reg_00 reg_00; - union IO_APIC_reg_01 reg_01; - union IO_APIC_reg_02 reg_02; - union IO_APIC_reg_03 reg_03; - unsigned long flags; - struct irq_cfg *cfg; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); - for (i = 0; i < nr_ioapics; i++) - printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", - mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); - - /* - * We are a bit conservative about what we expect. We have to - * know about every hardware change ASAP. - */ - printk(KERN_INFO "testing the IO APIC.......................\n"); - - for (apic = 0; apic < nr_ioapics; apic++) { - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - reg_01.raw = io_apic_read(apic, 1); - if (reg_01.bits.version >= 0x10) - reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); - - printk("\n"); - printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); - printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); - printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); - printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); - printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); - - printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); - printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); - - printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); - printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); - - /* - * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, - * but the value of reg_02 is read as the previous read register - * value, so ignore it if reg_02 == reg_01. - */ - if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); - printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); - } - - /* - * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02 - * or reg_03, but the value of reg_0[23] is read as the previous read - * register value, so ignore it if reg_03 == reg_0[12]. - */ - if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw && - reg_03.raw != reg_01.raw) { - printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); - printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); - } - - printk(KERN_DEBUG ".... IRQ redirection table:\n"); - - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect: \n"); - - for (i = 0; i <= reg_01.bits.entries; i++) { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(apic, i); - - printk(KERN_DEBUG " %02x %03X ", - i, - entry.dest - ); - - printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } - printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(cfg) { - struct irq_pin_list *entry = cfg->irq_2_pin; - if (!entry) - continue; - printk(KERN_DEBUG "IRQ%d ", cfg->irq); - for (;;) { - printk("-> %d:%d", entry->apic, entry->pin); - if (!entry->next) - break; - entry = entry->next; - } - printk("\n"); - } - - printk(KERN_INFO ".................................... done.\n"); - - return; -} - -__apicdebuginit(void) print_APIC_bitfield(int base) -{ - unsigned int v; - int i, j; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG); - for (i = 0; i < 8; i++) { - v = apic_read(base + i*0x10); - for (j = 0; j < 32; j++) { - if (v & (1< 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - - v = apic_read(APIC_ESR); - printk(KERN_DEBUG "... APIC ESR: %08x\n", v); - } - - icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); - - v = apic_read(APIC_LVTT); - printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); - - if (maxlvt > 3) { /* PC is LVT#4. */ - v = apic_read(APIC_LVTPC); - printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v); - } - v = apic_read(APIC_LVT0); - printk(KERN_DEBUG "... APIC LVT0: %08x\n", v); - v = apic_read(APIC_LVT1); - printk(KERN_DEBUG "... APIC LVT1: %08x\n", v); - - if (maxlvt > 2) { /* ERR is LVT#3. */ - v = apic_read(APIC_LVTERR); - printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v); - } - - v = apic_read(APIC_TMICT); - printk(KERN_DEBUG "... APIC TMICT: %08x\n", v); - v = apic_read(APIC_TMCCT); - printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); - v = apic_read(APIC_TDCR); - printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); - printk("\n"); -} - -__apicdebuginit(void) print_all_local_APICs(void) -{ - on_each_cpu(print_local_APIC, NULL, 1); -} - -__apicdebuginit(void) print_PIC(void) -{ - unsigned int v; - unsigned long flags; - - if (apic_verbosity == APIC_QUIET) - return; - - printk(KERN_DEBUG "\nprinting PIC contents\n"); - - spin_lock_irqsave(&i8259A_lock, flags); - - v = inb(0xa1) << 8 | inb(0x21); - printk(KERN_DEBUG "... PIC IMR: %04x\n", v); - - v = inb(0xa0) << 8 | inb(0x20); - printk(KERN_DEBUG "... PIC IRR: %04x\n", v); - - outb(0x0b,0xa0); - outb(0x0b,0x20); - v = inb(0xa0) << 8 | inb(0x20); - outb(0x0a,0xa0); - outb(0x0a,0x20); - - spin_unlock_irqrestore(&i8259A_lock, flags); - - printk(KERN_DEBUG "... PIC ISR: %04x\n", v); - - v = inb(0x4d1) << 8 | inb(0x4d0); - printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); -} - -__apicdebuginit(int) print_all_ICs(void) -{ - print_PIC(); - print_all_local_APICs(); - print_IO_APIC(); - - return 0; -} - -fs_initcall(print_all_ICs); - - -/* Where if anywhere is the i8259 connect in external int mode */ -static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; - -void __init enable_IO_APIC(void) -{ - union IO_APIC_reg_01 reg_01; - int i8259_apic, i8259_pin; - int apic; - unsigned long flags; - -#ifdef CONFIG_X86_32 - int i; - if (!pirqs_enabled) - for (i = 0; i < MAX_PIRQS; i++) - pirq_entries[i] = -1; -#endif - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } - for(apic = 0; apic < nr_ioapics; apic++) { - int pin; - /* See if any of the pins is in ExtINT mode */ - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - struct IO_APIC_route_entry entry; - entry = ioapic_read_entry(apic, pin); - - /* If the interrupt line is enabled and in ExtInt mode - * I have found the pin where the i8259 is connected. - */ - if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { - ioapic_i8259.apic = apic; - ioapic_i8259.pin = pin; - goto found_i8259; - } - } - } - found_i8259: - /* Look to see what if the MP table has reported the ExtINT */ - /* If we could not find the appropriate pin by looking at the ioapic - * the i8259 probably is not connected the ioapic but give the - * mptable a chance anyway. - */ - i8259_pin = find_isa_irq_pin(0, mp_ExtINT); - i8259_apic = find_isa_irq_apic(0, mp_ExtINT); - /* Trust the MP table if nothing is setup in the hardware */ - if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { - printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); - ioapic_i8259.pin = i8259_pin; - ioapic_i8259.apic = i8259_apic; - } - /* Complain if the MP table and the hardware disagree */ - if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && - (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) - { - printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); - } - - /* - * Do not trust the IO-APIC being empty at bootup - */ - clear_IO_APIC(); -} - -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) -{ - /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - /* - * If the i8259 is routed through an IOAPIC - * Put that IOAPIC in virtual wire mode - * so legacy interrupts can be delivered. - */ - if (ioapic_i8259.pin != -1) { - struct IO_APIC_route_entry entry; - - memset(&entry, 0, sizeof(entry)); - entry.mask = 0; /* Enabled */ - entry.trigger = 0; /* Edge */ - entry.irr = 0; - entry.polarity = 0; /* High */ - entry.delivery_status = 0; - entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = dest_ExtINT; /* ExtInt */ - entry.vector = 0; - entry.dest = read_apic_id(); - - /* - * Add it to the IO-APIC irq-routing table: - */ - ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); - } - - disconnect_bsp_APIC(ioapic_i8259.pin != -1); -} - -#ifdef CONFIG_X86_32 -/* - * function to set the IO-APIC physical IDs based on the - * values stored in the MPC table. - * - * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 - */ - -static void __init setup_ioapic_ids_from_mpc(void) -{ - union IO_APIC_reg_00 reg_00; - physid_mask_t phys_id_present_map; - int apic; - int i; - unsigned char old_id; - unsigned long flags; - - if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) - return; - - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; - /* - * This is broken; anything with a real cpu count has to - * circumvent this idiocy regardless. - */ - phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); - - /* - * Set the IOAPIC ID to the value stored in the MPC table. - */ - for (apic = 0; apic < nr_ioapics; apic++) { - - /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - old_id = mp_ioapics[apic].mp_apicid; - - if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", - apic, mp_ioapics[apic].mp_apicid); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - reg_00.bits.ID); - mp_ioapics[apic].mp_apicid = reg_00.bits.ID; - } - - /* - * Sanity check, is the ID really free? Every APIC in a - * system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(phys_id_present_map, - mp_ioapics[apic].mp_apicid)) { - printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", - apic, mp_ioapics[apic].mp_apicid); - for (i = 0; i < get_physical_broadcast(); i++) - if (!physid_isset(i, phys_id_present_map)) - break; - if (i >= get_physical_broadcast()) - panic("Max APIC ID exceeded!\n"); - printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", - i); - physid_set(i, phys_id_present_map); - mp_ioapics[apic].mp_apicid = i; - } else { - physid_mask_t tmp; - tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); - apic_printk(APIC_VERBOSE, "Setting %d in the " - "phys_id_present_map\n", - mp_ioapics[apic].mp_apicid); - physids_or(phys_id_present_map, phys_id_present_map, tmp); - } - - - /* - * We need to adjust the IRQ routing table - * if the ID changed. - */ - if (old_id != mp_ioapics[apic].mp_apicid) - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_dstapic == old_id) - mp_irqs[i].mp_dstapic - = mp_ioapics[apic].mp_apicid; - - /* - * Read the right value from the MPC table and - * write it into the ID register. - */ - apic_printk(APIC_VERBOSE, KERN_INFO - "...changing IO-APIC physical APIC ID to %d ...", - mp_ioapics[apic].mp_apicid); - - reg_00.bits.ID = mp_ioapics[apic].mp_apicid; - spin_lock_irqsave(&ioapic_lock, flags); - - /* - * Sanity check - */ - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(apic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) - printk("could not set ID!\n"); - else - apic_printk(APIC_VERBOSE, " ok.\n"); - } -} -#endif - -int no_timer_check __initdata; - -static int __init notimercheck(char *s) -{ - no_timer_check = 1; - return 1; -} -__setup("no_timer_check", notimercheck); - -/* - * There is a nasty bug in some older SMP boards, their mptable lies - * about the timer IRQ. We do the following to work around the situation: - * - * - timer IRQ defaults to IO-APIC IRQ - * - if this function detects that timer IRQs are defunct, then we fall - * back to ISA timer IRQs - */ -static int __init timer_irq_works(void) -{ - unsigned long t1 = jiffies; - unsigned long flags; - - if (no_timer_check) - return 1; - - local_save_flags(flags); - local_irq_enable(); - /* Let ten ticks pass... */ - mdelay((10 * 1000) / HZ); - local_irq_restore(flags); - - /* - * Expect a few ticks at least, to be sure some possible - * glue logic does not lock up after one or two first - * ticks in a non-ExtINT mode. Also the local APIC - * might have cached one ExtINT interrupt. Finally, at - * least one tick may be lost due to delays. - */ - - /* jiffies wrap? */ - if (time_after(jiffies, t1 + 4)) - return 1; - return 0; -} - -/* - * In the SMP+IOAPIC case it might happen that there are an unspecified - * number of pending IRQ events unhandled. These cases are very rare, - * so we 'resend' these IRQs via IPIs, to the same CPU. It's much - * better to do it this way as thus we do not have to be aware of - * 'pending' interrupts in the IRQ path, except at this point. - */ -/* - * Edge triggered needs to resend any interrupt - * that was delayed but this is now handled in the device - * independent code. - */ - -/* - * Starting up a edge-triggered IO-APIC interrupt is - * nasty - we need to make sure that we get the edge. - * If it is already asserted for some reason, we need - * return 1 to indicate that is was pending. - * - * This is not complete - we should be able to fake - * an edge even if it isn't on the 8259A... - */ - -static unsigned int startup_ioapic_irq(unsigned int irq) -{ - int was_pending = 0; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < 16) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) - was_pending = 1; - } - __unmask_IO_APIC_irq(irq); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return was_pending; -} - -#ifdef CONFIG_X86_64 -static int ioapic_retrigger_irq(unsigned int irq) -{ - - struct irq_cfg *cfg = irq_cfg(irq); - unsigned long flags; - - spin_lock_irqsave(&vector_lock, flags); - send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); - - return 1; -} -#else -static int ioapic_retrigger_irq(unsigned int irq) -{ - send_IPI_self(irq_cfg(irq)->vector); - - return 1; -} -#endif - -/* - * Level and edge triggered IO-APIC interrupts need different handling, - * so we use two separate IRQ descriptors. Edge triggered IRQs can be - * handled with the level-triggered descriptor, but that one has slightly - * more overhead. Level-triggered interrupts cannot be handled with the - * edge-triggered handler, without risking IRQ storms and other ugly - * races. - */ - -#ifdef CONFIG_SMP - -#ifdef CONFIG_INTR_REMAP -static void ir_irq_migration(struct work_struct *work); - -static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); - -/* - * Migrate the IO-APIC irq in the presence of intr-remapping. - * - * For edge triggered, irq migration is a simple atomic update(of vector - * and cpu destination) of IRTE and flush the hardware cache. - * - * For level triggered, we need to modify the io-apic RTE aswell with the update - * vector information, along with modifying IRTE with vector and destination. - * So irq migration for level triggered is little bit more complex compared to - * edge triggered migration. But the good news is, we use the same algorithm - * for level triggered migration as we have today, only difference being, - * we now initiate the irq migration from process context instead of the - * interrupt context. - * - * In future, when we do a directed EOI (combined with cpu EOI broadcast - * suppression) to the IO-APIC, level triggered irq migration will also be - * as simple as edge triggered migration and we can do the irq migration - * with a simple atomic update to IO-APIC RTE. - */ -static void migrate_ioapic_irq(int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct irq_desc *desc; - cpumask_t tmp, cleanup_mask; - struct irte irte; - int modify_ioapic_rte; - unsigned int dest; - unsigned long flags; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - desc = irq_to_desc(irq); - modify_ioapic_rte = desc->status & IRQ_LEVEL; - if (modify_ioapic_rte) { - spin_lock_irqsave(&ioapic_lock, flags); - __target_IO_APIC_irq(irq, dest, cfg->vector); - spin_unlock_irqrestore(&ioapic_lock, flags); - } - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * Modified the IRTE and flushes the Interrupt entry cache. - */ - modify_irte(irq, &irte); - - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc->affinity = mask; -} - -static int migrate_irq_remapped_level(int irq) -{ - int ret = -1; - struct irq_desc *desc = irq_to_desc(irq); - - mask_IO_APIC_irq(irq); - - if (io_apic_level_ack_pending(irq)) { - /* - * Interrupt in progress. Migrating irq now will change the - * vector information in the IO-APIC RTE and that will confuse - * the EOI broadcast performed by cpu. - * So, delay the irq migration to the next instance. - */ - schedule_delayed_work(&ir_migration_work, 1); - goto unmask; - } - - /* everthing is clear. we have right of way */ - migrate_ioapic_irq(irq, desc->pending_mask); - - ret = 0; - desc->status &= ~IRQ_MOVE_PENDING; - cpus_clear(desc->pending_mask); - -unmask: - unmask_IO_APIC_irq(irq); - return ret; -} - -static void ir_irq_migration(struct work_struct *work) -{ - unsigned int irq; - struct irq_desc *desc; - - for_each_irq_desc(irq, desc) { - if (desc->status & IRQ_MOVE_PENDING) { - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - if (!desc->chip->set_affinity || - !(desc->status & IRQ_MOVE_PENDING)) { - desc->status &= ~IRQ_MOVE_PENDING; - spin_unlock_irqrestore(&desc->lock, flags); - continue; - } - - desc->chip->set_affinity(irq, desc->pending_mask); - spin_unlock_irqrestore(&desc->lock, flags); - } - } -} - -/* - * Migrates the IRQ destination in the process context. - */ -static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - - if (desc->status & IRQ_LEVEL) { - desc->status |= IRQ_MOVE_PENDING; - desc->pending_mask = mask; - migrate_irq_remapped_level(irq); - return; - } - - migrate_ioapic_irq(irq, mask); -} -#endif - -asmlinkage void smp_irq_move_cleanup_interrupt(void) -{ - unsigned vector, me; - ack_APIC_irq(); -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - - me = smp_processor_id(); - for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { - unsigned int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; - - desc = irq_to_desc(irq); - if (!desc) - continue; - - cfg = irq_cfg(irq); - spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; - - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) - goto unlock; - - __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; -unlock: - spin_unlock(&desc->lock); - } - - irq_exit(); -} - -static void irq_complete_move(unsigned int irq) -{ - struct irq_cfg *cfg = irq_cfg(irq); - unsigned vector, me; - - if (likely(!cfg->move_in_progress)) - return; - - vector = ~get_irq_regs()->orig_ax; - me = smp_processor_id(); - if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { - cpumask_t cleanup_mask; - - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } -} -#else -static inline void irq_complete_move(unsigned int irq) {} -#endif -#ifdef CONFIG_INTR_REMAP -static void ack_x2apic_level(unsigned int irq) -{ - ack_x2APIC_irq(); -} - -static void ack_x2apic_edge(unsigned int irq) -{ - ack_x2APIC_irq(); -} -#endif - -static void ack_apic_edge(unsigned int irq) -{ - irq_complete_move(irq); - move_native_irq(irq); - ack_APIC_irq(); -} - -#ifdef CONFIG_X86_64 -static void ack_apic_level(unsigned int irq) -{ - int do_unmask_irq = 0; - - irq_complete_move(irq); -#ifdef CONFIG_GENERIC_PENDING_IRQ - /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { - do_unmask_irq = 1; - mask_IO_APIC_irq(irq); - } -#endif - - /* - * We must acknowledge the irq before we move it or the acknowledge will - * not propagate properly. - */ - ack_APIC_irq(); - - /* Now we can move and renable the irq */ - if (unlikely(do_unmask_irq)) { - /* Only migrate the irq if the ack has been received. - * - * On rare occasions the broadcast level triggered ack gets - * delayed going to ioapics, and if we reprogram the - * vector while Remote IRR is still set the irq will never - * fire again. - * - * To prevent this scenario we read the Remote IRR bit - * of the ioapic. This has two effects. - * - On any sane system the read of the ioapic will - * flush writes (and acks) going to the ioapic from - * this cpu. - * - We get to see if the ACK has actually been delivered. - * - * Based on failed experiments of reprogramming the - * ioapic entry from outside of irq context starting - * with masking the ioapic entry and then polling until - * Remote IRR was clear before reprogramming the - * ioapic I don't trust the Remote IRR bit to be - * completey accurate. - * - * However there appears to be no other way to plug - * this race, so if the Remote IRR bit is not - * accurate and is causing problems then it is a hardware bug - * and you can go talk to the chipset vendor about it. - */ - if (!io_apic_level_ack_pending(irq)) - move_masked_irq(irq); - unmask_IO_APIC_irq(irq); - } -} -#else -atomic_t irq_mis_count; -static void ack_apic_level(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); - - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(irq); - __unmask_and_level_IO_APIC_irq(irq); - spin_unlock(&ioapic_lock); - } -} -#endif - -static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, -#ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, -#endif - .retrigger = ioapic_retrigger_irq, -}; -#endif - -static inline void init_IO_APIC_traps(void) -{ - int irq; - struct irq_desc *desc; - struct irq_cfg *cfg; - - /* - * NOTE! The local APIC isn't very good at handling - * multiple interrupts at the same interrupt level. - * As the interrupt level is determined by taking the - * vector number and shifting that right by 4, we - * want to spread these out a bit so that they don't - * all fall in the same interrupt level. - * - * Also, we've got to be careful not to trash gate - * 0x80, because int 0x80 is hm, kind of importantish. ;) - */ - for_each_irq_cfg(cfg) { - irq = cfg->irq; - if (IO_APIC_IRQ(irq) && !cfg->vector) { - /* - * Hmm.. We don't have an entry for this, - * so default to an old-fashioned 8259 - * interrupt if we can.. - */ - if (irq < 16) - make_8259A_irq(irq); - else { - desc = irq_to_desc(irq); - /* Strange. Oh, well.. */ - desc->chip = &no_irq_chip; - } - } - } -} - -/* - * The local APIC irq-chip implementation: - */ - -static void mask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); -} - -static void unmask_lapic_irq(unsigned int irq) -{ - unsigned long v; - - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); -} - -static void ack_lapic_irq (unsigned int irq) -{ - ack_APIC_irq(); -} - -static struct irq_chip lapic_chip __read_mostly = { - .name = "local-APIC", - .mask = mask_lapic_irq, - .unmask = unmask_lapic_irq, - .ack = ack_lapic_irq, -}; - -static void lapic_register_intr(int irq) -{ - struct irq_desc *desc; - - desc = irq_to_desc(irq); - desc->status &= ~IRQ_LEVEL; - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, - "edge"); -} - -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - -/* - * This looks a bit hackish but it's about the only one way of sending - * a few INTA cycles to 8259As and any associated glue logic. ICR does - * not support the ExtINT mode, unfortunately. We need to send these - * cycles as some i82489DX-based boards have glue logic that keeps the - * 8259A interrupt line asserted until INTA. --macro - */ -static inline void __init unlock_ExtINT_logic(void) -{ - int apic, pin, i; - struct IO_APIC_route_entry entry0, entry1; - unsigned char save_control, save_freq_select; - - pin = find_isa_irq_pin(8, mp_INT); - if (pin == -1) { - WARN_ON_ONCE(1); - return; - } - apic = find_isa_irq_apic(8, mp_INT); - if (apic == -1) { - WARN_ON_ONCE(1); - return; - } - - entry0 = ioapic_read_entry(apic, pin); - clear_IO_APIC_pin(apic, pin); - - memset(&entry1, 0, sizeof(entry1)); - - entry1.dest_mode = 0; /* physical delivery */ - entry1.mask = 0; /* unmask IRQ now */ - entry1.dest = hard_smp_processor_id(); - entry1.delivery_mode = dest_ExtINT; - entry1.polarity = entry0.polarity; - entry1.trigger = 0; - entry1.vector = 0; - - ioapic_write_entry(apic, pin, entry1); - - save_control = CMOS_READ(RTC_CONTROL); - save_freq_select = CMOS_READ(RTC_FREQ_SELECT); - CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6, - RTC_FREQ_SELECT); - CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL); - - i = 100; - while (i-- > 0) { - mdelay(10); - if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF) - i -= 10; - } - - CMOS_WRITE(save_control, RTC_CONTROL); - CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(apic, pin); - - ioapic_write_entry(apic, pin, entry0); -} - -static int disable_timer_pin_1 __initdata; -/* Actually the next is obsolete, but keep it for paranoid reasons -AK */ -static int __init disable_timer_pin_setup(char *arg) -{ - disable_timer_pin_1 = 1; - return 0; -} -early_param("disable_timer_pin_1", disable_timer_pin_setup); - -int timer_through_8259 __initdata; - -/* - * This code may look a bit paranoid, but it's supposed to cooperate with - * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ - * is so screwy. Thanks to Brian Perkins for testing/hacking this beast - * fanatically on his truly buggy board. - * - * FIXME: really need to revamp this for all platforms. - */ -static inline void __init check_timer(void) -{ - struct irq_cfg *cfg = irq_cfg(0); - int apic1, pin1, apic2, pin2; - unsigned long flags; - unsigned int ver; - int no_pin1 = 0; - - local_irq_save(flags); - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - - /* - * get/set the timer IRQ vector: - */ - disable_8259A_irq(0); - assign_irq_vector(0, TARGET_CPUS); - - /* - * As IRQ0 is to be enabled in the 8259A, the virtual - * wire has to be disabled in the local APIC. Also - * timer interrupts need to be acknowledged manually in - * the 8259A for the i82489DX when using the NMI - * watchdog as that APIC treats NMIs as level-triggered. - * The AEOI mode will finish them in the 8259A - * automatically. - */ - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); -#ifdef CONFIG_X86_32 - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); -#endif - - pin1 = find_isa_irq_pin(0, mp_INT); - apic1 = find_isa_irq_apic(0, mp_INT); - pin2 = ioapic_i8259.pin; - apic2 = ioapic_i8259.apic; - - apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " - "apic1=%d pin1=%d apic2=%d pin2=%d\n", - cfg->vector, apic1, pin1, apic2, pin2); - - /* - * Some BIOS writers are clueless and report the ExtINTA - * I/O APIC input from the cascaded 8259A as the timer - * interrupt input. So just in case, if only one pin - * was found above, try it both directly and through the - * 8259A. - */ - if (pin1 == -1) { -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); -#endif - pin1 = pin2; - apic1 = apic2; - no_pin1 = 1; - } else if (pin2 == -1) { - pin2 = pin1; - apic2 = apic1; - } - - if (pin1 != -1) { - /* - * Ok, does IRQ0 through the IOAPIC work? - */ - if (no_pin1) { - add_pin_to_irq(0, apic1, pin1); - setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); - } - unmask_IO_APIC_irq(0); - if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - enable_8259A_irq(0); - } - if (disable_timer_pin_1 > 0) - clear_IO_APIC_pin(0, pin1); - goto out; - } -#ifdef CONFIG_INTR_REMAP - if (intr_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); -#endif - clear_IO_APIC_pin(apic1, pin1); - if (!no_pin1) - apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " - "8254 timer not connected to IO-APIC\n"); - - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " - "(IRQ0) through the 8259A ...\n"); - apic_printk(APIC_QUIET, KERN_INFO - "..... (found apic %d pin %d) ...\n", apic2, pin2); - /* - * legacy devices should be connected to IO APIC #0 - */ - replace_pin_at_irq(0, apic1, pin1, apic2, pin2); - setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - unmask_IO_APIC_irq(0); - enable_8259A_irq(0); - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); - timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); - setup_nmi(); - enable_8259A_irq(0); - } - goto out; - } - /* - * Cleanup, just in case ... - */ - disable_8259A_irq(0); - clear_IO_APIC_pin(apic2, pin2); - apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); - } - - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as Virtual Wire IRQ...\n"); - - lapic_register_intr(0); - apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - enable_8259A_irq(0); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - disable_8259A_irq(0); - apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); - apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); - - apic_printk(APIC_QUIET, KERN_INFO - "...trying to set up timer as ExtINT IRQ...\n"); - - init_8259A(0); - make_8259A_irq(0); - apic_write(APIC_LVT0, APIC_DM_EXTINT); - - unlock_ExtINT_logic(); - - if (timer_irq_works()) { - apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); - goto out; - } - apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); - panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " - "report. Then try booting with the 'noapic' option.\n"); -out: - local_irq_restore(flags); -} - -/* - * Traditionally ISA IRQ2 is the cascade IRQ, and is not available - * to devices. However there may be an I/O APIC pin available for - * this interrupt regardless. The pin may be left unconnected, but - * typically it will be reused as an ExtINT cascade interrupt for - * the master 8259A. In the MPS case such a pin will normally be - * reported as an ExtINT interrupt in the MP table. With ACPI - * there is no provision for ExtINT interrupts, and in the absence - * of an override it would be treated as an ordinary ISA I/O APIC - * interrupt, that is edge-triggered and unmasked by default. We - * used to do this, but it caused problems on some systems because - * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using - * the same ExtINT cascade interrupt to drive the local APIC of the - * bootstrap processor. Therefore we refrain from routing IRQ2 to - * the I/O APIC in all cases now. No actual device should request - * it anyway. --macro - */ -#define PIC_IRQS (1 << PIC_CASCADE_IR) - -void __init setup_IO_APIC(void) -{ - -#ifdef CONFIG_X86_32 - enable_IO_APIC(); -#else - /* - * calling enable_IO_APIC() is moved to setup_local_APIC for BP - */ -#endif - - io_apic_irqs = ~PIC_IRQS; - - apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* - * Set up IO-APIC IRQ routing. - */ -#ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); -#endif - sync_Arb_IDs(); - setup_IO_APIC_irqs(); - init_IO_APIC_traps(); - check_timer(); -} - -/* - * Called after all the initialization is done. If we didnt find any - * APIC bugs then we can allow the modify fast path - */ - -static int __init io_apic_bug_finalize(void) -{ - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; -} - -late_initcall(io_apic_bug_finalize); - -struct sysfs_ioapic_data { - struct sys_device dev; - struct IO_APIC_route_entry entry[0]; -}; -static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS]; - -static int ioapic_suspend(struct sys_device *dev, pm_message_t state) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) - *entry = ioapic_read_entry(dev->id, i); - - return 0; -} - -static int ioapic_resume(struct sys_device *dev) -{ - struct IO_APIC_route_entry *entry; - struct sysfs_ioapic_data *data; - unsigned long flags; - union IO_APIC_reg_00 reg_00; - int i; - - data = container_of(dev, struct sysfs_ioapic_data, dev); - entry = data->entry; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(dev->id, 0); - if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { - reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; - io_apic_write(dev->id, 0, reg_00.raw); - } - spin_unlock_irqrestore(&ioapic_lock, flags); - for (i = 0; i < nr_ioapic_registers[dev->id]; i++) - ioapic_write_entry(dev->id, i, entry[i]); - - return 0; -} - -static struct sysdev_class ioapic_sysdev_class = { - .name = "ioapic", - .suspend = ioapic_suspend, - .resume = ioapic_resume, -}; - -static int __init ioapic_init_sysfs(void) -{ - struct sys_device * dev; - int i, size, error; - - error = sysdev_class_register(&ioapic_sysdev_class); - if (error) - return error; - - for (i = 0; i < nr_ioapics; i++ ) { - size = sizeof(struct sys_device) + nr_ioapic_registers[i] - * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); - if (!mp_ioapic_data[i]) { - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - dev = &mp_ioapic_data[i]->dev; - dev->id = i; - dev->cls = &ioapic_sysdev_class; - error = sysdev_register(dev); - if (error) { - kfree(mp_ioapic_data[i]); - mp_ioapic_data[i] = NULL; - printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); - continue; - } - } - - return 0; -} - -device_initcall(ioapic_init_sysfs); - -/* - * Dynamic irq allocate and deallocation - */ -unsigned int create_irq_nr(unsigned int irq_want) -{ - /* Allocate an unused irq */ - unsigned int irq; - unsigned int new; - unsigned long flags; - struct irq_cfg *cfg_new; - -#ifndef CONFIG_HAVE_SPARSE_IRQ - irq_want = nr_irqs - 1; -#endif - - irq = 0; - spin_lock_irqsave(&vector_lock, flags); - for (new = irq_want; new > 0; new--) { - if (platform_legacy_irq(new)) - continue; - cfg_new = irq_cfg(new); - if (cfg_new && cfg_new->vector != 0) - continue; - /* check if need to create one */ - if (!cfg_new) - cfg_new = irq_cfg_alloc(new); - if (__assign_irq_vector(new, TARGET_CPUS) == 0) - irq = new; - break; - } - spin_unlock_irqrestore(&vector_lock, flags); - - if (irq > 0) { - dynamic_irq_init(irq); - } - return irq; -} - -int create_irq(void) -{ - int irq; - - irq = create_irq_nr(nr_irqs - 1); - - if (irq == 0) - irq = -1; - - return irq; -} - -void destroy_irq(unsigned int irq) -{ - unsigned long flags; - - dynamic_irq_cleanup(irq); - -#ifdef CONFIG_INTR_REMAP - free_irte(irq); -#endif - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq); - spin_unlock_irqrestore(&vector_lock, flags); -} - -/* - * MSI message composition - */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) -{ - struct irq_cfg *cfg; - int err; - unsigned dest; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (err) - return err; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irte irte; - int ir_index; - u16 sub_handle; - - ir_index = map_irq_to_irte_handle(irq, &sub_handle); - BUG_ON(ir_index == -1); - - memset (&irte, 0, sizeof(irte)); - - irte.present = 1; - irte.dst_mode = INT_DEST_MODE; - irte.trigger_mode = 0; /* edge */ - irte.dlvry_mode = INT_DELIVERY_MODE; - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - modify_irte(irq, &irte); - - msg->address_hi = MSI_ADDR_BASE_HI; - msg->data = sub_handle; - msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT | - MSI_ADDR_IR_SHV | - MSI_ADDR_IR_INDEX1(ir_index) | - MSI_ADDR_IR_INDEX2(ir_index); - } else -#endif - { - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = - MSI_ADDR_BASE_LO | - ((INT_DEST_MODE == 0) ? - MSI_ADDR_DEST_MODE_PHYSICAL: - MSI_ADDR_DEST_MODE_LOGICAL) | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_ADDR_REDIRECTION_CPU: - MSI_ADDR_REDIRECTION_LOWPRI) | - MSI_ADDR_DEST_ID(dest); - - msg->data = - MSI_DATA_TRIGGER_EDGE | - MSI_DATA_LEVEL_ASSERT | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - MSI_DATA_DELIVERY_FIXED: - MSI_DATA_DELIVERY_LOWPRI) | - MSI_DATA_VECTOR(cfg->vector); - } - return err; -} - -#ifdef CONFIG_SMP -static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - read_msi_msg(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - write_msi_msg(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} - -#ifdef CONFIG_INTR_REMAP -/* - * Migrate the MSI irq to another cpumask. This migration is - * done in the process context using interrupt-remapping hardware. - */ -static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp, cleanup_mask; - struct irte irte; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (get_irte(irq, &irte)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - irte.vector = cfg->vector; - irte.dest_id = IRTE_DEST(dest); - - /* - * atomically update the IRTE with the new destination and vector. - */ - modify_irte(irq, &irte); - - /* - * After this point, all the interrupts will start arriving - * at the new destination. So, time to cleanup the previous - * vector allocation. - */ - if (cfg->move_in_progress) { - cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); - cfg->move_cleanup_count = cpus_weight(cleanup_mask); - send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); - cfg->move_in_progress = 0; - } - - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif -#endif /* CONFIG_SMP */ - -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. - */ -static struct irq_chip msi_chip = { - .name = "PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -#ifdef CONFIG_INTR_REMAP -static struct irq_chip msi_ir_chip = { - .name = "IR-PCI-MSI", - .unmask = unmask_msi_irq, - .mask = mask_msi_irq, - .ack = ack_x2apic_edge, -#ifdef CONFIG_SMP - .set_affinity = ir_set_msi_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -/* - * Map the PCI dev to the corresponding remapping hardware unit - * and allocate 'nvec' consecutive interrupt-remapping table entries - * in it. - */ -static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) -{ - struct intel_iommu *iommu; - int index; - - iommu = map_dev_to_ir(dev); - if (!iommu) { - printk(KERN_ERR - "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; - } - - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } - return index; -} -#endif - -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(dev, irq, &msg); - if (ret < 0) - return ret; - - set_irq_msi(irq, desc); - write_msi_msg(irq, &msg); - -#ifdef CONFIG_INTR_REMAP - if (irq_remapped(irq)) { - struct irq_desc *desc = irq_to_desc(irq); - /* - * irq migration in process context - */ - desc->status |= IRQ_MOVE_PCNTXT; - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else -#endif - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); - - return 0; -} - -static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) -{ - unsigned int irq; - - irq = dev->bus->number; - irq <<= 8; - irq |= dev->devfn; - irq <<= 12; - - return irq; -} - -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) -{ - unsigned int irq; - int ret; - unsigned int irq_want; - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - - irq = create_irq_nr(irq_want); - if (irq == 0) - return -1; - -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - ret = msi_alloc_irte(dev, irq, 1); - if (ret < 0) - goto error; -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) { - destroy_irq(irq); - return ret; - } - return 0; - -#ifdef CONFIG_INTR_REMAP -error: - destroy_irq(irq); - return ret; -#endif -} - -int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - unsigned int irq; - int ret, sub_handle; - struct msi_desc *desc; - unsigned int irq_want; - -#ifdef CONFIG_INTR_REMAP - struct intel_iommu *iommu = 0; - int index = 0; -#endif - - irq_want = build_irq_for_pci_dev(dev) + 0x100; - sub_handle = 0; - list_for_each_entry(desc, &dev->msi_list, list) { - irq = create_irq_nr(irq_want--); - if (irq == 0) - return -1; -#ifdef CONFIG_INTR_REMAP - if (!intr_remapping_enabled) - goto no_ir; - - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_irte(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - iommu = map_dev_to_ir(dev); - if (!iommu) { - ret = -ENOENT; - goto error; - } - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); - } -no_ir: -#endif - ret = setup_msi_irq(dev, desc, irq); - if (ret < 0) - goto error; - sub_handle++; - } - return 0; - -error: - destroy_irq(irq); - return ret; -} - -void arch_teardown_msi_irq(unsigned int irq) -{ - destroy_irq(irq); -} - -#ifdef CONFIG_DMAR -#ifdef CONFIG_SMP -static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - struct msi_msg msg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - dmar_msi_read(irq, &msg); - - msg.data &= ~MSI_DATA_VECTOR_MASK; - msg.data |= MSI_DATA_VECTOR(cfg->vector); - msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; - msg.address_lo |= MSI_ADDR_DEST_ID(dest); - - dmar_msi_write(irq, &msg); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif /* CONFIG_SMP */ - -struct irq_chip dmar_msi_type = { - .name = "DMAR_MSI", - .unmask = dmar_msi_unmask, - .mask = dmar_msi_mask, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = dmar_msi_set_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_dmar_msi(unsigned int irq) -{ - int ret; - struct msi_msg msg; - - ret = msi_compose_msg(NULL, irq, &msg); - if (ret < 0) - return ret; - dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); - return 0; -} -#endif - -#endif /* CONFIG_PCI_MSI */ -/* - * Hypertransport interrupt support - */ -#ifdef CONFIG_HT_IRQ - -#ifdef CONFIG_SMP - -static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) -{ - struct ht_irq_msg msg; - fetch_ht_irq_msg(irq, &msg); - - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - - msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest); - - write_ht_irq_msg(irq, &msg); -} - -static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) -{ - struct irq_cfg *cfg; - unsigned int dest; - cpumask_t tmp; - struct irq_desc *desc; - - cpus_and(tmp, mask, cpu_online_map); - if (cpus_empty(tmp)) - return; - - if (assign_irq_vector(irq, mask)) - return; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, mask); - dest = cpu_mask_to_apicid(tmp); - - target_ht_irq(irq, dest, cfg->vector); - desc = irq_to_desc(irq); - desc->affinity = mask; -} -#endif - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .mask = mask_ht_irq, - .unmask = unmask_ht_irq, - .ack = ack_apic_edge, -#ifdef CONFIG_SMP - .set_affinity = set_ht_irq_affinity, -#endif - .retrigger = ioapic_retrigger_irq, -}; - -int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) -{ - struct irq_cfg *cfg; - int err; - cpumask_t tmp; - - tmp = TARGET_CPUS; - err = assign_irq_vector(irq, tmp); - if (!err) { - struct ht_irq_msg msg; - unsigned dest; - - cfg = irq_cfg(irq); - cpus_and(tmp, cfg->domain, tmp); - dest = cpu_mask_to_apicid(tmp); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); - - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(dest) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((INT_DEST_MODE == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((INT_DELIVERY_MODE != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - - write_ht_irq_msg(irq, &msg); - - set_irq_chip_and_handler_name(irq, &ht_irq_chip, - handle_edge_irq, "edge"); - } - return err; -} -#endif /* CONFIG_HT_IRQ */ - -/* -------------------------------------------------------------------------- - ACPI-based IOAPIC Configuration - -------------------------------------------------------------------------- */ - -#ifdef CONFIG_ACPI - -#ifdef CONFIG_X86_32 -int __init io_apic_get_unique_id(int ioapic, int apic_id) -{ - union IO_APIC_reg_00 reg_00; - static physid_mask_t apic_id_map = PHYSID_MASK_NONE; - physid_mask_t tmp; - unsigned long flags; - int i = 0; - - /* - * The P4 platform supports up to 256 APIC IDs on two separate APIC - * buses (one for LAPICs, one for IOAPICs), where predecessors only - * supports up to 16 on one shared APIC bus. - * - * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full - * advantage of new APIC bus architecture. - */ - - if (physids_empty(apic_id_map)) - apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); - - spin_lock_irqsave(&ioapic_lock, flags); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - if (apic_id >= get_physical_broadcast()) { - printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " - "%d\n", ioapic, apic_id, reg_00.bits.ID); - apic_id = reg_00.bits.ID; - } - - /* - * Every APIC in a system must have a unique ID or we get lots of nice - * 'stuck on smp_invalidate_needed IPI wait' messages. - */ - if (check_apicid_used(apic_id_map, apic_id)) { - - for (i = 0; i < get_physical_broadcast(); i++) { - if (!check_apicid_used(apic_id_map, i)) - break; - } - - if (i == get_physical_broadcast()) - panic("Max apic_id exceeded!\n"); - - printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, " - "trying %d\n", ioapic, apic_id, i); - - apic_id = i; - } - - tmp = apicid_to_cpu_present(apic_id); - physids_or(apic_id_map, apic_id_map, tmp); - - if (reg_00.bits.ID != apic_id) { - reg_00.bits.ID = apic_id; - - spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(ioapic, 0, reg_00.raw); - reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); - - /* Sanity check */ - if (reg_00.bits.ID != apic_id) { - printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic); - return -1; - } - } - - apic_printk(APIC_VERBOSE, KERN_INFO - "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); - - return apic_id; -} - -int __init io_apic_get_version(int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.version; -} -#endif - -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - -int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) -{ - if (!IO_APIC_IRQ(irq)) { - apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); - return -EINVAL; - } - - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= 16) - add_pin_to_irq(irq, ioapic, pin); - - setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); - - return 0; -} - - -int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) -{ - int i; - - if (skip_ioapic_setup) - return -1; - - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].mp_irqtype == mp_INT && - mp_irqs[i].mp_srcbusirq == bus_irq) - break; - if (i >= mp_irq_entries) - return -1; - - *trigger = irq_trigger(i); - *polarity = irq_polarity(i); - return 0; -} - -#endif /* CONFIG_ACPI */ - -/* - * This function currently is only a helper for the i386 smp boot process where - * we need to reprogram the ioredtbls to cater for the cpus which have come online - * so mask in all cases should simply be TARGET_CPUS - */ -#ifdef CONFIG_SMP -void __init setup_ioapic_dest(void) -{ - int pin, ioapic, irq, irq_entry; - struct irq_cfg *cfg; - - if (skip_ioapic_setup == 1) - return; - - for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { - for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { - irq_entry = find_irq_entry(ioapic, pin, mp_INT); - if (irq_entry == -1) - continue; - irq = pin_2_irq(irq_entry, ioapic, pin); - - /* setup_IO_APIC_irqs could fail to get vector for some device - * when you have too many devices, because at that time only boot - * cpu is online. - */ - cfg = irq_cfg(irq); - if (!cfg->vector) - setup_IO_APIC_irq(ioapic, pin, irq, - irq_trigger(irq_entry), - irq_polarity(irq_entry)); -#ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif - else - set_ioapic_affinity_irq(irq, TARGET_CPUS); - } - - } -} -#endif - -#ifdef CONFIG_X86_64 -#define IOAPIC_RESOURCE_NAME_SIZE 11 - -static struct resource *ioapic_resources; - -static struct resource * __init ioapic_setup_resources(void) -{ - unsigned long n; - struct resource *res; - char *mem; - int i; - - if (nr_ioapics <= 0) - return NULL; - - n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource); - n *= nr_ioapics; - - mem = alloc_bootmem(n); - res = (void *)mem; - - if (mem != NULL) { - mem += sizeof(struct resource) * nr_ioapics; - - for (i = 0; i < nr_ioapics; i++) { - res[i].name = mem; - res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); - mem += IOAPIC_RESOURCE_NAME_SIZE; - } - } - - ioapic_resources = res; - - return res; -} -#endif - -void __init ioapic_init_mappings(void) -{ - unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; -#ifdef CONFIG_X86_64 - struct resource *ioapic_res; - - ioapic_res = ioapic_setup_resources(); -#endif - for (i = 0; i < nr_ioapics; i++) { - if (smp_found_config) { - ioapic_phys = mp_ioapics[i].mp_apicaddr; -#ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } -#endif - } else { -#ifdef CONFIG_X86_32 -fake_ioapic_page: -#endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); - ioapic_phys = __pa(ioapic_phys); - } - set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); - idx++; - -#ifdef CONFIG_X86_64 - if (ioapic_res != NULL) { - ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; - ioapic_res++; - } -#endif - } -} - -#ifdef CONFIG_X86_64 -static int __init ioapic_insert_resources(void) -{ - int i; - struct resource *r = ioapic_resources; - - if (!r) { - printk(KERN_ERR - "IO APIC resources could be not be allocated.\n"); - return -1; - } - - for (i = 0; i < nr_ioapics; i++) { - insert_resource(&iomem_resource, r); - r++; - } - - return 0; -} - -/* Insert the IO APIC resources after PCI initialization has occured to handle - * IO APICS that are mapped in on a BAR in PCI space. */ -late_initcall(ioapic_insert_resources); -#endif -- cgit v1.2.3 From c691cc84529ec88ccb32b174535bb61875888c90 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:43 -0700 Subject: io_apic: make 32 bit have io_apic resource in /proc/iomem Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index fba6d6ee348..7e303e0967a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3802,7 +3802,6 @@ void __init setup_ioapic_dest(void) } #endif -#ifdef CONFIG_X86_64 #define IOAPIC_RESOURCE_NAME_SIZE 11 static struct resource *ioapic_resources; @@ -3838,17 +3837,14 @@ static struct resource * __init ioapic_setup_resources(void) return res; } -#endif void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; int i; -#ifdef CONFIG_X86_64 struct resource *ioapic_res; ioapic_res = ioapic_setup_resources(); -#endif for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; @@ -3877,17 +3873,14 @@ fake_ioapic_page: __fix_to_virt(idx), ioapic_phys); idx++; -#ifdef CONFIG_X86_64 if (ioapic_res != NULL) { ioapic_res->start = ioapic_phys; ioapic_res->end = ioapic_phys + (4 * 1024) - 1; ioapic_res++; } -#endif } } -#ifdef CONFIG_X86_64 static int __init ioapic_insert_resources(void) { int i; @@ -3910,4 +3903,3 @@ static int __init ioapic_insert_resources(void) /* Insert the IO APIC resources after PCI initialization has occured to handle * IO APICS that are mapped in on a BAR in PCI space. */ late_initcall(ioapic_insert_resources); -#endif -- cgit v1.2.3 From 4e738e2f307113feaedebae147c3e0d072e39648 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:47 -0700 Subject: x86: unify mask_IO_APIC_irq use MACRO for 32 bit too Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 80 +++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 59 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7e303e0967a..099696109ca 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -610,18 +610,7 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } -#ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) -{ - struct io_apic __iomem *io_apic = io_apic_base(apic); - readl(&io_apic->data); -} - -#define __DO_ACTION(R, ACTION, FINAL) \ +#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ \ { \ int pin; \ @@ -636,7 +625,8 @@ static inline void io_apic_sync(unsigned int apic) break; \ pin = entry->pin; \ reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ + reg ACTION_DISABLE; \ + reg ACTION_ENABLE; \ io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ FINAL; \ if (!entry->next) \ @@ -645,66 +635,38 @@ static inline void io_apic_sync(unsigned int apic) } \ } -#define DO_ACTION(name,R,ACTION, FINAL) \ +#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ \ static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION, FINAL) - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic)) + __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) /* mask = 0 */ -DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, ) +DO_ACTION(__unmask, 0, |= 0, &= ~IO_APIC_REDIR_MASKED, ) -#else - -static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable) +#ifdef CONFIG_X86_64 +/* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ +static inline void io_apic_sync(unsigned int apic) { - struct irq_cfg *cfg; - struct irq_pin_list *entry; - unsigned int pin, reg; - - cfg = irq_cfg(irq); - entry = cfg->irq_2_pin; - for (;;) { - if (!entry) - break; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin*2); - reg &= ~disable; - reg |= enable; - io_apic_modify(entry->apic, 0x10 + pin*2, reg); - if (!entry->next) - break; - entry = entry->next; - } + struct io_apic __iomem *io_apic = io_apic_base(apic); + readl(&io_apic->data); } /* mask = 1 */ -static void __mask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0); -} +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic)) -/* mask = 0 */ -static void __unmask_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED); -} +#else + +/* mask = 1 */ +DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, ) /* mask = 1, trigger = 0 */ -static void __mask_and_edge_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER); -} +DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, ) /* mask = 0, trigger = 1 */ -static void __unmask_and_level_IO_APIC_irq(unsigned int irq) -{ - __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED); -} +DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, ) #endif -- cgit v1.2.3 From 3eb2cce84beae8fd41de950569cafd5bca7edd5d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:48 -0700 Subject: x86: unify ack_apic_edge use code in 64 to replace move_native_irq(irq, desc); in 32 bit Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 73 +++++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 099696109ca..a466b04ad5a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -389,7 +389,6 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned writel(value, &io_apic->data); } -#ifdef CONFIG_X86_64 static bool io_apic_level_ack_pending(unsigned int irq) { struct irq_pin_list *entry; @@ -419,7 +418,6 @@ static bool io_apic_level_ack_pending(unsigned int irq) return false; } -#endif union entry_union { struct { u32 w1, w2; }; @@ -2398,9 +2396,16 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } -#ifdef CONFIG_X86_64 +#ifdef CONFIG_X86_32 +atomic_t irq_mis_count; +#endif + static void ack_apic_level(unsigned int irq) { +#ifdef CONFIG_X86_32 + unsigned long v; + int i; +#endif int do_unmask_irq = 0; irq_complete_move(irq); @@ -2412,6 +2417,31 @@ static void ack_apic_level(unsigned int irq) } #endif +#ifdef CONFIG_X86_32 + /* + * It appears there is an erratum which affects at least version 0x11 + * of I/O APIC (that's the 82093AA and cores integrated into various + * chipsets). Under certain conditions a level-triggered interrupt is + * erroneously delivered as edge-triggered one but the respective IRR + * bit gets set nevertheless. As a result the I/O unit expects an EOI + * message but it will never arrive and further interrupts are blocked + * from the source. The exact reason is so far unknown, but the + * phenomenon was observed when two consecutive interrupt requests + * from a given source get delivered to the same CPU and the source is + * temporarily disabled in between. + * + * A workaround is to simulate an EOI message manually. We achieve it + * by setting the trigger mode to edge and then to level when the edge + * trigger mode gets detected in the TMR of a local APIC for a + * level-triggered interrupt. We mask the source for the time of the + * operation to prevent an edge-triggered interrupt escaping meanwhile. + * The idea is from Manfred Spraul. --macro + */ + i = irq_cfg(irq)->vector; + + v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); +#endif + /* * We must acknowledge the irq before we move it or the acknowledge will * not propagate properly. @@ -2450,41 +2480,8 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq(irq); } -} -#else -atomic_t irq_mis_count; -static void ack_apic_level(unsigned int irq) -{ - unsigned long v; - int i; - - irq_complete_move(irq); - move_native_irq(irq); - /* - * It appears there is an erratum which affects at least version 0x11 - * of I/O APIC (that's the 82093AA and cores integrated into various - * chipsets). Under certain conditions a level-triggered interrupt is - * erroneously delivered as edge-triggered one but the respective IRR - * bit gets set nevertheless. As a result the I/O unit expects an EOI - * message but it will never arrive and further interrupts are blocked - * from the source. The exact reason is so far unknown, but the - * phenomenon was observed when two consecutive interrupt requests - * from a given source get delivered to the same CPU and the source is - * temporarily disabled in between. - * - * A workaround is to simulate an EOI message manually. We achieve it - * by setting the trigger mode to edge and then to level when the edge - * trigger mode gets detected in the TMR of a local APIC for a - * level-triggered interrupt. We mask the source for the time of the - * operation to prevent an edge-triggered interrupt escaping meanwhile. - * The idea is from Manfred Spraul. --macro - */ - i = irq_cfg(irq)->vector; - - v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); - - ack_APIC_irq(); +#ifdef CONFIG_X86_32 if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); spin_lock(&ioapic_lock); @@ -2492,8 +2489,8 @@ static void ack_apic_level(unsigned int irq) __unmask_and_level_IO_APIC_irq(irq); spin_unlock(&ioapic_lock); } -} #endif +} static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", -- cgit v1.2.3 From 29ccbbf232c035b8c7ff0c5060fbe30a66ed9b99 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:49 -0700 Subject: x86: remove first_free_entry/pin_map_size no user now Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 7 ------- arch/x86/kernel/setup.c | 4 ---- 2 files changed, 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index a466b04ad5a..5de2d38812a 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -72,13 +72,6 @@ int sis_apic_bug = -1; static DEFINE_SPINLOCK(ioapic_lock); static DEFINE_SPINLOCK(vector_lock); -int first_free_entry; -/* - * Rough estimation of how many shared IRQs there are, can - * be changed anytime. - */ -int pin_map_size; - /* * # of IRQ routing registers */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 02e3a669797..d90c659b70e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1074,10 +1074,6 @@ void __init setup_arch(char **cmdline_p) nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif -#ifdef CONFIG_X86_IO_APIC - pin_map_size = nr_irqs * 2; - first_free_entry = nr_irqs; -#endif init_apic_mappings(); ioapic_init_mappings(); -- cgit v1.2.3 From ffd5aae7817fba22c5c3e304a31c44fa0a4e9a97 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:50 -0700 Subject: x86: print local APIC of APs one by one instead of print that of all APs at the time Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 5de2d38812a..bf0e66d7303 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1777,7 +1777,12 @@ __apicdebuginit(void) print_local_APIC(void *dummy) __apicdebuginit(void) print_all_local_APICs(void) { - on_each_cpu(print_local_APIC, NULL, 1); + int cpu; + + preempt_disable(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, print_local_APIC, NULL, 1); + preempt_enable(); } __apicdebuginit(void) print_PIC(void) -- cgit v1.2.3 From 8f09cd20a24c5f13c571bc73ddcd47be0af3b70f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:51 -0700 Subject: x86: make HAVE_SPARSE_IRQ support selectable Ingo said sparse_irq is some intrusive. need to make it selectable to make it simple, remove irq_desc as parameter in some functions. (ack, eoi, set_affinity). may need to make member if irq_chip to take irq_desc, or struct irq later. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 12 +++++++- arch/x86/kernel/io_apic.c | 71 ++++++++++++++++++++++++++++++++--------------- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 4 files changed, 62 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b157e94637b..600584b7a49 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -34,7 +34,6 @@ config X86 select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_DYN_ARRAY - select HAVE_SPARSE_IRQ config ARCH_DEFCONFIG string @@ -238,6 +237,17 @@ config SMP If you don't know what to do here, say N. +config HAVE_SPARSE_IRQ + bool "Support sparse irq numbering" + depends on PCI_MSI || HT_IRQ + default y + help + This enables support for sparse irq, esp for msi/msi-x. the irq + number will be bus/dev/fn + 12bit. You may need if you have lots of + cards supports msi-x installed. + + If you don't know what to do here, say Y. + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index bf0e66d7303..f853b667fa5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -107,7 +107,9 @@ struct irq_cfg; struct irq_pin_list; struct irq_cfg { unsigned int irq; +#ifdef CONFIG_HAVE_SPARSE_IRQ struct irq_cfg *next; +#endif struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; @@ -137,20 +139,6 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = { }; static struct irq_cfg irq_cfg_init = { .irq = -1U, }; -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); static void init_one_irq_cfg(struct irq_cfg *cfg) { @@ -158,7 +146,9 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) } static struct irq_cfg *irq_cfgx; +#ifdef CONFIG_HAVE_SPARSE_IRQ static struct irq_cfg *irq_cfgx_free; +#endif static void __init init_work(void *data) { struct dyn_array *da = data; @@ -174,15 +164,34 @@ static void __init init_work(void *data) for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); +#ifdef CONFIG_HAVE_SPARSE_IRQ for (i = 1; i < *da->nr; i++) cfg[i-1].next = &cfg[i]; irq_cfgx_free = &irq_cfgx[legacy_count]; irq_cfgx[legacy_count - 1].next = NULL; +#endif +} + +#ifdef CONFIG_HAVE_SPARSE_IRQ +/* need to be biger than size of irq_cfg_legacy */ +static int nr_irq_cfg = 32; + +static int __init parse_nr_irq_cfg(char *arg) +{ + if (arg) { + nr_irq_cfg = simple_strtoul(arg, NULL, 0); + if (nr_irq_cfg < 32) + nr_irq_cfg = 32; + } + return 0; } -#define for_each_irq_cfg(cfg) \ - for (cfg = irq_cfgx; cfg; cfg = cfg->next) +early_param("nr_irq_cfg", parse_nr_irq_cfg); + +#define for_each_irq_cfg(irqX, cfg) \ + for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U) + DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); @@ -273,7 +282,26 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) #endif return cfg; } +#else + +#define for_each_irq_cfg(irq, cfg) \ + for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq]) + +DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); +struct irq_cfg *irq_cfg(unsigned int irq) +{ + if (irq < nr_irqs) + return &irq_cfgx[irq]; + + return NULL; +} +struct irq_cfg *irq_cfg_alloc(unsigned int irq) +{ + return irq_cfg(irq); +} + +#endif /* * This is performance-critical, we want to do it O(1) * @@ -1282,11 +1310,10 @@ void __setup_vector_irq(int cpu) struct irq_cfg *cfg; /* Mark the inuse vectors */ - for_each_irq_cfg(cfg) { + for_each_irq_cfg(irq, cfg) { if (!cpu_isset(cpu, cfg->domain)) continue; vector = cfg->vector; - irq = cfg->irq; per_cpu(vector_irq, cpu)[vector] = irq; } /* Mark the free vectors */ @@ -1563,6 +1590,7 @@ __apicdebuginit(void) print_IO_APIC(void) union IO_APIC_reg_03 reg_03; unsigned long flags; struct irq_cfg *cfg; + unsigned int irq; if (apic_verbosity == APIC_QUIET) return; @@ -1651,11 +1679,11 @@ __apicdebuginit(void) print_IO_APIC(void) } } printk(KERN_DEBUG "IRQ to pin mappings:\n"); - for_each_irq_cfg(cfg) { + for_each_irq_cfg(irq, cfg) { struct irq_pin_list *entry = cfg->irq_2_pin; if (!entry) continue; - printk(KERN_DEBUG "IRQ%d ", cfg->irq); + printk(KERN_DEBUG "IRQ%d ", irq); for (;;) { printk("-> %d:%d", entry->apic, entry->pin); if (!entry->next) @@ -2535,8 +2563,7 @@ static inline void init_IO_APIC_traps(void) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - for_each_irq_cfg(cfg) { - irq = cfg->irq; + for_each_irq_cfg(irq, cfg) { if (IO_APIC_IRQ(irq) && !cfg->vector) { /* * Hmm.. We don't have an entry for this, diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index cc929f2f84f..b2e1082cf5a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -269,7 +269,7 @@ int show_interrupts(struct seq_file *p, void *v) struct irqaction * action; unsigned long flags; unsigned int entries; - struct irq_desc *desc; + struct irq_desc *desc = NULL; int tail = 0; #ifdef CONFIG_HAVE_SPARSE_IRQ diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 348a11168c2..c2fca89a3f7 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, void *v) struct irqaction * action; unsigned long flags; unsigned int entries; - struct irq_desc *desc; + struct irq_desc *desc = NULL; int tail = 0; #ifdef CONFIG_HAVE_SPARSE_IRQ -- cgit v1.2.3 From 9d6a4d0823b3b8e29156f5e698b5a68687afad32 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 19 Aug 2008 20:50:52 -0700 Subject: x86: probe nr_irqs even only mptable is used for !CONFIG_HAVE_SPARSE_IRQ fix: In file included from arch/x86/kernel/early-quirks.c:18: include/asm/io_apic.h: In function 'probe_nr_irqs': include/asm/io_apic.h:209: error: 'NR_IRQS' undeclared (first use in this function) include/asm/io_apic.h:209: error: (Each undeclared identifier is reported only once include/asm/io_apic.h:209: error: for each function it appears in.) v2: fix by Ingo Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 25 ------------------------- arch/x86/kernel/io_apic.c | 43 ++++++++++++++++++++++++++++++------------- arch/x86/kernel/setup.c | 6 +++--- 3 files changed, 33 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 3e9d163fd92..5fef4fece4a 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -957,29 +957,6 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } -int get_nr_irqs_via_madt(void) -{ - int idx; - int nr = 0; - - for (idx = 0; idx < nr_ioapics; idx++) { - if (mp_ioapic_routing[idx].gsi_end > nr) - nr = mp_ioapic_routing[idx].gsi_end; - } - - nr++; - - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < 32) - nr = 32; - - return nr; - -} - static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { @@ -1278,8 +1255,6 @@ static int __init acpi_parse_madt_ioapic_entries(void) } - nr_irqs = get_nr_irqs_via_madt(); - count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, nr_irqs); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f853b667fa5..f7e80262cbb 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3596,6 +3596,36 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ +int __init io_apic_get_redir_entries (int ioapic) +{ + union IO_APIC_reg_01 reg_01; + unsigned long flags; + + spin_lock_irqsave(&ioapic_lock, flags); + reg_01.raw = io_apic_read(ioapic, 1); + spin_unlock_irqrestore(&ioapic_lock, flags); + + return reg_01.bits.entries; +} + +int __init probe_nr_irqs(void) +{ + int idx; + int nr = 0; + + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx); + + /* double it for hotplug and msi and nmi */ + nr <<= 1; + + /* something wrong ? */ + if (nr < 32) + nr = 32; + + return nr; +} + /* -------------------------------------------------------------------------- ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ @@ -3690,19 +3720,6 @@ int __init io_apic_get_version(int ioapic) } #endif -int __init io_apic_get_redir_entries (int ioapic) -{ - union IO_APIC_reg_01 reg_01; - unsigned long flags; - - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - - return reg_01.bits.entries; -} - - int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) { if (!IO_APIC_IRQ(irq)) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d90c659b70e..61335306696 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1069,15 +1069,15 @@ void __init setup_arch(char **cmdline_p) prefill_possible_map(); #ifdef CONFIG_X86_64 - /* need to wait for nr_cpu_ids settle down */ - if (nr_irqs == NR_IRQS) - nr_irqs = 32 * nr_cpu_ids + 224; init_cpu_to_node(); #endif init_apic_mappings(); ioapic_init_mappings(); + /* need to wait for io_apic is mapped */ + nr_irqs = probe_nr_irqs(); + kvm_guest_init(); e820_reserve_resources(); -- cgit v1.2.3 From bf9d3cf73e8caf0b3ae0b7f508a9f251536f5ff4 Mon Sep 17 00:00:00 2001 From: Alex Nixon Date: Mon, 18 Aug 2008 22:17:08 -0700 Subject: xen: Fix bug `do_IRQ: cannot handle IRQ -1 vector 0x6 cpu 1' Following commit 9c3f2468d8339866d9ef6a25aae31a8909c6be0d, do_IRQ() looks up the IRQ number in the per-cpu variable vector_irq. This commit makes Xen initialise an identity vector_irq map for both X86_32 and X86_64. Signed-off-by: Alex Nixon Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/xen/irq.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 28b85ab8422..bb042608c60 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -21,7 +21,6 @@ void xen_force_evtchn_callback(void) static void __init __xen_init_IRQ(void) { -#ifdef CONFIG_X86_64 int i; /* Create identity vector->irq map */ @@ -31,7 +30,6 @@ static void __init __xen_init_IRQ(void) for_each_possible_cpu(cpu) per_cpu(vector_irq, cpu)[i] = i; } -#endif /* CONFIG_X86_64 */ xen_init_IRQ(); } -- cgit v1.2.3 From 0c425cec64eb0c0d0dd7037c21a25585cbe3636c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 18 Aug 2008 13:04:26 +0200 Subject: warning: fix arch x86 kernel io_apic c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fix warning: arch/x86/kernel/io_apic.c: In function ‘print_local_APIC’: arch/x86/kernel/io_apic.c:1786: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘u64’ arch/x86/kernel/io_apic.c:1787: warning: format ‘%08x’ expects type ‘unsigned int’, but argument 2 has type ‘u64’ By creating uniform behavior on 32-bit and 64-bit and printing out the ICR value in two 32-bit words. Code has changed: text data bss dec hex filename 22901 19650 17040 59591 e8c7 io_apic.o.before 22899 19650 17040 59589 e8c5 io_apic.o.after Due to the 32-bit cast narrowing the printed out value on 64-bit. Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f7e80262cbb..34c74cf5c24 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1774,8 +1774,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy) } icr = apic_icr_read(); - printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); - printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); + printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr); + printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32)); v = apic_read(APIC_LVTT); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); -- cgit v1.2.3 From e89eb43863c2d9f11a3bbe766766fe646e6c50d9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 20 Aug 2008 20:46:25 -0700 Subject: x86: sparse_irq needs spin_lock in allocations Suresh Siddha noticed that we should have a spinlock around it. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 34c74cf5c24..7ca55669047 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -146,6 +146,12 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) } static struct irq_cfg *irq_cfgx; + +/* + * Protect the irq_cfgx_free freelist: + */ +static DEFINE_SPINLOCK(irq_cfg_lock); + #ifdef CONFIG_HAVE_SPARSE_IRQ static struct irq_cfg *irq_cfgx_free; #endif @@ -213,8 +219,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) static struct irq_cfg *irq_cfg_alloc(unsigned int irq) { struct irq_cfg *cfg, *cfg_pri; - int i; + unsigned long flags; int count = 0; + int i; cfg_pri = cfg = irq_cfgx; while (cfg) { @@ -226,6 +233,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) count++; } + spin_lock_irqsave(&irq_cfg_lock, flags); if (!irq_cfgx_free) { unsigned long phys; unsigned long total_bytes; @@ -263,6 +271,9 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) else irq_cfgx = cfg; cfg->irq = irq; + + spin_unlock_irqrestore(&irq_cfg_lock, flags); + printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); #ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG { -- cgit v1.2.3 From a2d332fa3445160519de03c350a59602ac1c3df9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 12:56:32 -0700 Subject: x86: fix 32-bit ioapic lockup with sparseirqs Missed two lines when copying. Fix panic on one of Ingo's machines that need to adjust ioapic id when acpi off/ 32bit. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 7ca55669047..4e44fd1f466 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2077,6 +2077,8 @@ static void __init setup_ioapic_ids_from_mpc(void) reg_00.bits.ID = mp_ioapics[apic].mp_apicid; spin_lock_irqsave(&ioapic_lock, flags); + io_apic_write(apic, 0, reg_00.raw); + spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check -- cgit v1.2.3 From 052c0bff9b83a578654dfa513d6e3d0b3795f1e8 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 13:10:09 -0700 Subject: x86: fix probe_nr_irqs for xen otherwise Xen is _completely_ unusable with 5 or more VCPUs. (when !CONFIG_HAVE_SPARSE_IRQ). based on Alex Nixon's patch. also add +1 offset after redir_entries Signed-off-by: Yinghai Lu Acked-by: Jeremy Fitzhardinge Acked-by: Alex Nixon Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4e44fd1f466..d28128e0392 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3625,16 +3625,21 @@ int __init probe_nr_irqs(void) { int idx; int nr = 0; +#ifndef CONFIG_XEN + int nr_min = 32; +#else + int nr_min = NR_IRQS; +#endif for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx); + nr += io_apic_get_redir_entries(idx) + 1; /* double it for hotplug and msi and nmi */ nr <<= 1; /* something wrong ? */ - if (nr < 32) - nr = 32; + if (nr < nr_min) + nr = nr_min; return nr; } -- cgit v1.2.3 From 2699574b3c04351f5a23c8a842e17c303d7ebb6f Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Thu, 21 Aug 2008 11:26:43 -0700 Subject: x86: VMI, initialize IRQ vector Initialize vector_irq for the vmi used vector, to point to correct irq. Signed-off-by: Alok N Kataria Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmiclock_32.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 6953859fe28..254ee07f863 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -235,11 +235,14 @@ static void __devinit vmi_time_init_clockevent(void) void __init vmi_time_init(void) { + unsigned int cpu; /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ outb_pit(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.2.3 From db4b5525caafd846ec20f95afbc6403c792e22cf Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:39 -0700 Subject: x86: apic_64.c - setup_APIC_timer has to be __cpuinit function There is no need to hold this code if CPU_HOTPLUG is not defined. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index cd860856a0b..8f551276681 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -400,7 +400,7 @@ static void lapic_timer_broadcast(cpumask_t mask) * Setup the local APIC timer for this CPU. Copy the initilized values * of the boot CPU and register the clock event in the framework. */ -static void setup_APIC_timer(void) +static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); -- cgit v1.2.3 From 7c37e48b5125fdb0acac846a0a3af42806175d44 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:40 -0700 Subject: x86: apic - introduce get_physical_broadcast for 64bit We don't really use it now on 64bit mode but could reserve it for future. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 8f551276681..bb46711a0b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -244,6 +244,16 @@ void __cpuinit enable_NMI_through_LVT0(void) apic_write(APIC_LVT0, v); } +#ifdef CONFIG_X86_32 +/** + * get_physical_broadcast - Get number of physical broadcast IDs + */ +int get_physical_broadcast(void) +{ + return modern_apic() ? 0xff : 0xf; +} +#endif + /** * lapic_get_maxlvt - get the maximum number of local vector table entries */ -- cgit v1.2.3 From 920fa7a507c3b1004a9ebe07a2c9d38605b3406a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:41 -0700 Subject: x86: apic - unify setup_apicpmtimer Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 10 ++++++++++ arch/x86/kernel/apic_64.c | 2 ++ 2 files changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 21c831d96af..df6ed603e54 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1774,6 +1774,16 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); +#ifdef CONFIG_X86_64 +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + static int __init apic_set_verbosity(char *arg) { if (!arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index bb46711a0b1..ddc5b245faf 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1810,6 +1810,7 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); +#ifdef CONFIG_X86_64 static __init int setup_apicpmtimer(char *s) { apic_calibrate_pmtmr = 1; @@ -1817,6 +1818,7 @@ static __init int setup_apicpmtimer(char *s) return 0; } __setup("apicpmtimer", setup_apicpmtimer); +#endif static int __init apic_set_verbosity(char *arg) { -- cgit v1.2.3 From 80e5609cabd7e4321769701a70297f819a15b08d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:42 -0700 Subject: x86: apic_64.c - add sanity check for spurious vector definition Do not check for SPUTIOUS_APIC_VECTOR definition twice. Check it once - is what we need. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ddc5b245faf..4587e16f73e 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -46,6 +46,13 @@ #include #include +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; static int apic_calibrate_pmtmr __initdata; @@ -939,8 +946,6 @@ void __cpuinit setup_local_APIC(void) preempt_disable(); value = apic_read(APIC_LVR); - BUILD_BUG_ON((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f); - /* * Double-check whether this APIC is really registered. * This is meaningless in clustered apic mode, so we skip it. -- cgit v1.2.3 From 89c38c2867ebe37c4c5aee23e7fa1bffb025b171 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:43 -0700 Subject: x86: apic - unify setup_local_APIC - remove useless read of APIC_LVR - wrap with preempt_disable/enable - check for integrated APIC just in place v2: fix by Yinghai Lu. fix lapic_is_integrated using let 64-bit too have pic_mode Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 21 ++++++++++++++----- arch/x86/kernel/apic_64.c | 51 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 62 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index df6ed603e54..f8c1251984e 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1033,9 +1033,10 @@ static void __cpuinit lapic_setup_esr(void) */ void __cpuinit setup_local_APIC(void) { - unsigned long value, integrated; + unsigned int value; int i, j; +#ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (esr_disable) { apic_write(APIC_ESR, 0); @@ -1043,14 +1044,16 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); } +#endif - integrated = lapic_is_integrated(); + preempt_disable(); /* * Double-check whether this APIC is really registered. + * This is meaningless in clustered apic mode, so we skip it. */ if (!apic_id_registered()) - WARN_ON_ONCE(1); + BUG(); /* * Intel recommends to set DFR, LDR and TPR before enabling @@ -1096,6 +1099,7 @@ void __cpuinit setup_local_APIC(void) */ value |= APIC_SPIV_APIC_ENABLED; +#ifdef CONFIG_X86_32 /* * Some unknown Intel IO/APIC (or APIC) errata is biting us with * certain networking cards. If high frequency interrupts are @@ -1116,8 +1120,13 @@ void __cpuinit setup_local_APIC(void) * See also the comment in end_level_ioapic_irq(). --macro */ - /* Enable focus processor (bit==0) */ + /* + * - enable focus processor (bit==0) + * - 64bit mode always use processor focus + * so no need to set it + */ value &= ~APIC_SPIV_FOCUS_DISABLED; +#endif /* * Set spurious IRQ vector @@ -1154,9 +1163,11 @@ void __cpuinit setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; - if (!integrated) /* 82489DX */ + if (!lapic_is_integrated()) /* 82489DX */ value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); + + preempt_enable(); } void __cpuinit end_local_APIC_setup(void) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 4587e16f73e..283968d4e02 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -76,6 +76,8 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; */ unsigned int apic_verbosity; +int pic_mode; + /* Have we found an MP table */ int smp_found_config; @@ -943,8 +945,17 @@ void __cpuinit setup_local_APIC(void) unsigned int value; int i, j; +#ifdef CONFIG_X86_32 + /* Pound the ESR really hard over the head with a big hammer - mbligh */ + if (esr_disable) { + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + } +#endif + preempt_disable(); - value = apic_read(APIC_LVR); /* * Double-check whether this APIC is really registered. @@ -997,7 +1008,34 @@ void __cpuinit setup_local_APIC(void) */ value |= APIC_SPIV_APIC_ENABLED; - /* We always use processor focus */ +#ifdef CONFIG_X86_32 + /* + * Some unknown Intel IO/APIC (or APIC) errata is biting us with + * certain networking cards. If high frequency interrupts are + * happening on a particular IOAPIC pin, plus the IOAPIC routing + * entry is masked/unmasked at a high rate as well then sooner or + * later IOAPIC line gets 'stuck', no more interrupts are received + * from the device. If focus CPU is disabled then the hang goes + * away, oh well :-( + * + * [ This bug can be reproduced easily with a level-triggered + * PCI Ne2000 networking cards and PII/PIII processors, dual + * BX chipset. ] + */ + /* + * Actually disabling the focus CPU check just makes the hang less + * frequent as it makes the interrupt distributon model be more + * like LRU than MRU (the short-term load is more even across CPUs). + * See also the comment in end_level_ioapic_irq(). --macro + */ + + /* + * - enable focus processor (bit==0) + * - 64bit mode always use processor focus + * so no need to set it + */ + value &= ~APIC_SPIV_FOCUS_DISABLED; +#endif /* * Set spurious IRQ vector @@ -1016,14 +1054,14 @@ void __cpuinit setup_local_APIC(void) * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!smp_processor_id() && !value) { + if (!smp_processor_id() && (pic_mode || !value)) { value = APIC_DM_EXTINT; apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", - smp_processor_id()); + smp_processor_id()); } else { value = APIC_DM_EXTINT | APIC_LVT_MASKED; apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", - smp_processor_id()); + smp_processor_id()); } apic_write(APIC_LVT0, value); @@ -1034,7 +1072,10 @@ void __cpuinit setup_local_APIC(void) value = APIC_DM_NMI; else value = APIC_DM_NMI | APIC_LVT_MASKED; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; apic_write(APIC_LVT1, value); + preempt_enable(); } -- cgit v1.2.3 From 457cc52d4670bcf1470606a108bbf35aac28eb7f Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:44 -0700 Subject: x86: apic_32.c should use __cpuinit section All callers are __init or __cpuinit so there is no need to hold this code without CPU_HOTPLUG being set. Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index f8c1251984e..4ca3717b302 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -383,7 +383,7 @@ static void lapic_timer_broadcast(cpumask_t mask) * Setup the local APIC timer for this CPU. Copy the initilized values * of the boot CPU and register the clock event in the framework. */ -static void __devinit setup_APIC_timer(void) +static void __cpuinit setup_APIC_timer(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); @@ -652,7 +652,7 @@ void __init setup_boot_APIC_clock(void) setup_APIC_timer(); } -void __devinit setup_secondary_APIC_clock(void) +void __cpuinit setup_secondary_APIC_clock(void) { setup_APIC_timer(); } @@ -1713,7 +1713,7 @@ static struct sys_device device_lapic = { .cls = &lapic_sysclass, }; -static void __devinit apic_pm_activate(void) +static void __cpuinit apic_pm_activate(void) { apic_pm_state.active = 1; } -- cgit v1.2.3 From 6460bc73aac970135104a0bc407c2c8b85394d59 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 24 Aug 2008 02:01:45 -0700 Subject: x86: apic - unify smp_apic_timer_interrupt Signed-off-by: Cyrill Gorcunov Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 3 +++ arch/x86/kernel/apic_64.c | 2 ++ 2 files changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 4ca3717b302..913d9924b10 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -718,6 +718,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); local_apic_timer_interrupt(); irq_exit(); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 283968d4e02..2e109119f64 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -625,7 +625,9 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) * Besides, if we don't timer interrupts ignore the global * interrupt lock, which is the WrongThing (tm) to do. */ +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); local_apic_timer_interrupt(); irq_exit(); -- cgit v1.2.3 From b3c5117050e8028d48b2fa0ea09c7a50dd7f3414 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:46 -0700 Subject: x86: apic_xx.c order variables Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 46 +++++++++++++++++++++++---------------------- arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 913d9924b10..a54512bea62 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -50,16 +50,37 @@ # error SPURIOUS_APIC_VECTOR definition error #endif -unsigned long mp_lapic_addr; - +#ifdef CONFIG_X86_32 /* * Knob to control our willingness to enable the local APIC. * * +1=force-enable */ static int force_enable_local_apic; -int disable_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +#endif +#ifdef CONFIG_X86_64 +static int apic_calibrate_pmtmr __initdata; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + +unsigned long mp_lapic_addr; +int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ @@ -1742,15 +1763,6 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); static int __init setup_disableapic(char *arg) { @@ -1788,16 +1800,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -#ifdef CONFIG_X86_64 -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - static int __init apic_set_verbosity(char *arg) { if (!arg) { diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 2e109119f64..c40a900e155 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -53,16 +53,44 @@ # error SPURIOUS_APIC_VECTOR definition error #endif -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; +#ifdef CONFIG_X86_32 +/* + * Knob to control our willingness to enable the local APIC. + * + * +1=force-enable + */ +static int force_enable_local_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +#endif + +#ifdef CONFIG_X86_64 static int apic_calibrate_pmtmr __initdata; -int disable_apic; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + int disable_x2apic; int x2apic; - /* x2apic enabled before OS handover */ int x2apic_preenabled; +unsigned long mp_lapic_addr; +int disable_apic; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int disable_apic_timer __cpuinitdata; /* Local APIC timer works in C2 */ int local_apic_timer_c2_ok; EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); @@ -113,8 +141,6 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events); static unsigned long apic_phys; -unsigned long mp_lapic_addr; - /* * Get the LAPIC version */ @@ -1858,16 +1884,6 @@ static int __init parse_nolapic_timer(char *arg) } early_param("nolapic_timer", parse_nolapic_timer); -#ifdef CONFIG_X86_64 -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - static int __init apic_set_verbosity(char *arg) { if (!arg) { -- cgit v1.2.3 From 49899eacce79ce39faf531dad3e00f771eba2eb1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:47 -0700 Subject: x86: use HAVE_X2APIC in apic_64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_64.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index c40a900e155..d3ec746aede 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -82,10 +82,23 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif -int disable_x2apic; +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC int x2apic; /* x2apic enabled before OS handover */ int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif unsigned long mp_lapic_addr; int disable_apic; @@ -228,6 +241,7 @@ static struct apic_ops xapic_ops = { struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +#ifdef HAVE_X2APIC static void x2apic_wait_icr_idle(void) { /* no need to wait for icr idle in x2apic */ @@ -261,6 +275,7 @@ static struct apic_ops x2apic_ops = { .wait_icr_idle = x2apic_wait_icr_idle, .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, }; +#endif /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 @@ -1125,6 +1140,7 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef HAVE_X2APIC void check_x2apic(void) { int msr, msr2; @@ -1243,6 +1259,7 @@ end: return; } +#endif /* HAVE_X2APIC */ /* * Detect and enable local APICs on non-SMP boards. @@ -1291,10 +1308,12 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { +#ifdef HAVE_X2APIC if (x2apic) { boot_cpu_physical_apicid = read_apic_id(); return; } +#endif /* * If no local APIC can be found then set up a fake all @@ -1335,8 +1354,9 @@ int __init APIC_init_uniprocessor(void) printk(KERN_INFO "Apic disabled by BIOS\n"); return -1; } - +#ifdef HAVE_X2APIC enable_IR_x2apic(); +#endif setup_apic_routing(); verify_local_APIC(); @@ -1672,7 +1692,7 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_64 +#ifdef HAVE_X2APIC if (x2apic) enable_x2apic(); else @@ -1836,15 +1856,6 @@ __cpuinit int apic_is_clustered_box(void) return (clusters > 2); } -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); - - /* * APIC command line parameters */ -- cgit v1.2.3 From 3491998dd54f6d4ef7344518fe5463b299fdf537 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:48 -0700 Subject: x86: add hard_smp_prossor_id with MACRO in io_apic_xx.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 7 +++++++ arch/x86/kernel/apic_64.c | 2 ++ 2 files changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a54512bea62..93718ffb9b9 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1600,6 +1600,13 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +#ifdef CONFIG_X86_64 +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} +#endif + /* * Power management */ diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index d3ec746aede..eeb69838c2f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1612,10 +1612,12 @@ void __cpuinit generic_processor_info(int apicid, int version) cpu_set(cpu, cpu_present_map); } +#ifdef CONFIG_X86_64 int hard_smp_processor_id(void) { return read_apic_id(); } +#endif /* * Power management -- cgit v1.2.3 From f28c0ae21d80ffd6eb0987901c5273843387e341 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:49 -0700 Subject: x86: make apic_32/64.c more like except x2apic, detec_init_APIC, and calibrating_APIC_clock Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 125 +++++++++++++++++++++++++++++++++++++++++----- arch/x86/kernel/apic_64.c | 10 +++- 2 files changed, 122 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 93718ffb9b9..bfac26a1609 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -66,6 +66,9 @@ static int __init parse_lapic(char *arg) return 0; } early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + #endif #ifdef CONFIG_X86_64 @@ -131,9 +134,6 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - static unsigned long apic_phys; /* @@ -240,6 +240,7 @@ void __cpuinit enable_NMI_through_LVT0(void) apic_write(APIC_LVT0, v); } +#ifdef CONFIG_X86_32 /** * get_physical_broadcast - Get number of physical broadcast IDs */ @@ -247,6 +248,7 @@ int get_physical_broadcast(void) { return modern_apic() ? 0xff : 0xf; } +#endif /** * lapic_get_maxlvt - get the maximum number of local vector table entries @@ -1291,6 +1293,32 @@ no_apic: return -1; } +#ifdef CONFIG_X86_64 +void __init early_init_lapic_mapping(void) +{ + unsigned long phys_addr; + + /* + * If no local APIC can be found then go out + * : it means there is no mpatable and MADT + */ + if (!smp_found_config) + return; + + phys_addr = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, phys_addr); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + boot_cpu_physical_apicid = read_apic_id(); +} +#endif + /** * init_apic_mappings - initialize APIC mappings */ @@ -1308,8 +1336,8 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - printk(KERN_DEBUG "mapped APIC to %08lx (%08lx)\n", APIC_BASE, - apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, apic_phys); /* * Fetch the APIC ID of the BSP in case we have a @@ -1317,14 +1345,12 @@ void __init init_apic_mappings(void) */ if (boot_cpu_physical_apicid == -1U) boot_cpu_physical_apicid = read_apic_id(); - } /* * This initializes the IO-APIC and APIC hardware if this is * a UP kernel. */ - int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) @@ -1682,11 +1708,6 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); -#ifdef CONFIG_X86_64 - if (x2apic) - enable_x2apic(); - else -#endif { /* * Make sure the APICBASE points to the right address @@ -1770,7 +1791,87 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +#ifdef CONFIG_X86_64 +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. + */ +__cpuinit int apic_is_clustered_box(void) +{ + int i, clusters, zeros; + unsigned id; + u16 *bios_cpu_apicid; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + /* + * there is not this kind of box with AMD CPU yet. + * Some AMD box with quadcore cpu and 8 sockets apicid + * will be [4, 0x23] or [8, 0x27] could be thought to + * vsmp box still need checking... + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) + return 0; + + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + /* are we being called early in kernel startup? */ + if (bios_cpu_apicid) { + id = bios_cpu_apicid[i]; + } + else if (i < nr_cpu_ids) { + if (cpu_present(i)) + id = per_cpu(x86_bios_cpu_apicid, i); + else + continue; + } + else + break; + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. + * Since clusters are allocated sequentially, count zeros only if + * they are bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards + */ + if (is_vsmp_box() && clusters > 1) + return 1; + + /* + * If clusters > 2, then should be multi-chassis. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); +} +#endif + +/* + * APIC command line parameters + */ static int __init setup_disableapic(char *arg) { disable_apic = 1; diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index eeb69838c2f..dca6c246e73 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -69,6 +69,9 @@ static int __init parse_lapic(char *arg) return 0; } early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + #endif #ifdef CONFIG_X86_64 @@ -1279,6 +1282,7 @@ static int __init detect_init_APIC(void) return 0; } +#ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) { unsigned long phys_addr; @@ -1302,6 +1306,7 @@ void __init early_init_lapic_mapping(void) */ boot_cpu_physical_apicid = read_apic_id(); } +#endif /** * init_apic_mappings - initialize APIC mappings @@ -1334,7 +1339,8 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - boot_cpu_physical_apicid = read_apic_id(); + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = read_apic_id(); } /* @@ -1782,6 +1788,7 @@ static void apic_pm_activate(void) { } #endif /* CONFIG_PM */ +#ifdef CONFIG_X86_64 /* * apic_is_clustered_box() -- Check if we can expect good TSC * @@ -1857,6 +1864,7 @@ __cpuinit int apic_is_clustered_box(void) */ return (clusters > 2); } +#endif /* * APIC command line parameters -- cgit v1.2.3 From fa2bd35a8d5c88c03b638c72daf7f38a132d0e8c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:50 -0700 Subject: x86: merge APIC_init_uniprocessor Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 51 +++++++++++++++++++++++++++++++++++++++++------ arch/x86/kernel/apic_64.c | 48 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index bfac26a1609..8f8b0e1f3eb 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1355,6 +1355,17 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { +#ifdef CONFIG_X86_64 + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } +#else if (!smp_found_config && !cpu_has_apic) return -1; @@ -1368,34 +1379,62 @@ int __init APIC_init_uniprocessor(void) clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; } +#endif +#ifdef HAVE_X2APIC + enable_IR_x2apic(); +#endif +#ifdef CONFIG_X86_64 + setup_apic_routing(); +#endif verify_local_APIC(); - connect_bsp_APIC(); +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else /* * Hack: In case of kdump, after a crash, kernel might be booting * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid * might be zero if read from MP tables. Get it from LAPIC. */ -#ifdef CONFIG_CRASH_DUMP +# ifdef CONFIG_CRASH_DUMP boot_cpu_physical_apicid = read_apic_id(); +# endif #endif physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); +#ifdef CONFIG_X86_64 + /* + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP + */ + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); +#endif + #ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) #endif localise_nmi_watchdog(); end_local_APIC_setup(); + #ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +# ifdef CONFIG_X86_64 + else + nr_ioapics = 0; +# endif #endif + +#ifdef CONFIG_X86_64 + setup_boot_APIC_clock(); + check_nmi_watchdog(); +#else setup_boot_clock(); +#endif return 0; } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index dca6c246e73..16a30c22b07 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1351,6 +1351,7 @@ int apic_version[MAX_APICS]; int __init APIC_init_uniprocessor(void) { +#ifdef CONFIG_X86_64 if (disable_apic) { printk(KERN_INFO "Apic disabled\n"); return -1; @@ -1360,37 +1361,78 @@ int __init APIC_init_uniprocessor(void) printk(KERN_INFO "Apic disabled by BIOS\n"); return -1; } +#else + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && + APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + return -1; + } +#endif + #ifdef HAVE_X2APIC enable_IR_x2apic(); #endif +#ifdef CONFIG_X86_64 setup_apic_routing(); +#endif verify_local_APIC(); - connect_bsp_APIC(); - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); +#ifdef CONFIG_X86_64 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); - +#else + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +# ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = read_apic_id(); +# endif +#endif + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); setup_local_APIC(); +#ifdef CONFIG_X86_64 /* * Now enable IO-APICs, actually call clear_IO_APIC * We need clear_IO_APIC before enabling vector on BP */ if (!skip_ioapic_setup && nr_ioapics) enable_IO_APIC(); +#endif +#ifdef CONFIG_X86_IO_APIC if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) +#endif localise_nmi_watchdog(); end_local_APIC_setup(); +#ifdef CONFIG_X86_IO_APIC if (smp_found_config && !skip_ioapic_setup && nr_ioapics) setup_IO_APIC(); +# ifdef CONFIG_X86_64 else nr_ioapics = 0; +# endif +#endif + +#ifdef CONFIG_X86_64 setup_boot_APIC_clock(); check_nmi_watchdog(); +#else + setup_boot_clock(); +#endif + return 0; } -- cgit v1.2.3 From be7a656fe131cba088912bcafb079b029320504d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:51 -0700 Subject: x86: copy detect_init_APIC to the other Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 20 ++++++++++++ arch/x86/kernel/apic_64.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 101 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 8f8b0e1f3eb..1103e05aa1b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1214,6 +1214,25 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef CONFIG_X86_64 +/* + * Detect and enable local APICs on non-SMP boards. + * Original code written by Keir Fraser. + * On AMD64 we trust the BIOS - if it says no APIC it is likely + * not correctly set up (usually the APIC timer won't work etc.) + */ +static int __init detect_init_APIC(void) +{ + if (!cpu_has_apic) { + printk(KERN_INFO "No local APIC present\n"); + return -1; + } + + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + boot_cpu_physical_apicid = 0; + return 0; +} +#else /* * Detect and initialize APIC */ @@ -1292,6 +1311,7 @@ no_apic: printk(KERN_INFO "No local APIC present or hardware disabled\n"); return -1; } +#endif #ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 16a30c22b07..b7268f5c8b1 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -684,7 +684,6 @@ int setup_profiling_timer(unsigned int multiplier) return -EINVAL; } - /* * Local APIC start and shutdown */ @@ -1264,6 +1263,7 @@ end: } #endif /* HAVE_X2APIC */ +#ifdef CONFIG_X86_64 /* * Detect and enable local APICs on non-SMP boards. * Original code written by Keir Fraser. @@ -1281,6 +1281,86 @@ static int __init detect_init_APIC(void) boot_cpu_physical_apicid = 0; return 0; } +#else +/* + * Detect and initialize APIC + */ +static int __init detect_init_APIC(void) +{ + u32 h, l, features; + + /* Disabled by kernel option? */ + if (disable_apic) + return -1; + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || + (boot_cpu_data.x86 == 15)) + break; + goto no_apic; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || + (boot_cpu_data.x86 == 5 && cpu_has_apic)) + break; + goto no_apic; + default: + goto no_apic; + } + + if (!cpu_has_apic) { + /* + * Over-ride BIOS and try to enable the local APIC only if + * "lapic" specified. + */ + if (!force_enable_local_apic) { + printk(KERN_INFO "Local APIC disabled by BIOS -- " + "you can enable it with \"lapic\"\n"); + return -1; + } + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + printk(KERN_INFO + "Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + printk(KERN_WARNING "Could not enable APIC!\n"); + return -1; + } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + printk(KERN_INFO "Found and enabled local APIC!\n"); + + apic_pm_activate(); + + return 0; + +no_apic: + printk(KERN_INFO "No local APIC present or hardware disabled\n"); + return -1; +} +#endif #ifdef CONFIG_X86_64 void __init early_init_lapic_mapping(void) -- cgit v1.2.3 From 773763df7de881e65ff2600c024c9ce2dde64750 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:52 -0700 Subject: x86: merge header files in apic_xx.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 8 ++++++++ arch/x86/kernel/apic_64.c | 7 ++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 1103e05aa1b..0ec8321e687 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -36,8 +38,14 @@ #include #include #include +#include #include #include +#include +#include +#include +#include +#include #include #include diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index b7268f5c8b1..ebe417b4d7f 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -24,9 +24,11 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -34,8 +36,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -43,8 +47,9 @@ #include #include -#include #include +#include +#include /* * Sanity check -- cgit v1.2.3 From dc1528dd864a0b79fa67b60b3ca5674fe94fdce5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:53 -0700 Subject: x86: apic unify smp_spurious/error_interrupt Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 24 +++++++++++++++++++++--- arch/x86/kernel/apic_64.c | 24 ++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 0ec8321e687..60a901b2d4f 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -1474,10 +1474,17 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_spurious_interrupt(void) +#else void smp_spurious_interrupt(struct pt_regs *regs) +#endif { - unsigned long v; + u32 v; +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -1488,20 +1495,31 @@ void smp_spurious_interrupt(struct pt_regs *regs) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); +#ifdef CONFIG_X86_64 + add_pda(irq_spurious_count, 1); +#else /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); __get_cpu_var(irq_stat).irq_spurious_count++; +#endif irq_exit(); } /* * This interrupt should never happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_error_interrupt(void) +#else void smp_error_interrupt(struct pt_regs *regs) +#endif { - unsigned long v, v1; + u32 v, v1; +#ifdef CONFIG_X86_64 + exit_idle(); +#endif irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); @@ -1520,7 +1538,7 @@ void smp_error_interrupt(struct pt_regs *regs) 6: Received illegal vector 7: Illegal register address */ - printk(KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n", + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", smp_processor_id(), v , v1); irq_exit(); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index ebe417b4d7f..c728885e4f4 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -1528,10 +1528,17 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 asmlinkage void smp_spurious_interrupt(void) +#else +void smp_spurious_interrupt(struct pt_regs *regs) +#endif { - unsigned int v; + u32 v; + +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); /* * Check if this really is a spurious interrupt and ACK it @@ -1542,18 +1549,31 @@ asmlinkage void smp_spurious_interrupt(void) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); +#ifdef CONFIG_X86_64 add_pda(irq_spurious_count, 1); +#else + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " + "should never happen.\n", smp_processor_id()); + __get_cpu_var(irq_stat).irq_spurious_count++; +#endif irq_exit(); } /* * This interrupt should never happen with our APIC/SMP architecture */ +#ifdef CONFIG_X86_64 asmlinkage void smp_error_interrupt(void) +#else +void smp_error_interrupt(struct pt_regs *regs) +#endif { - unsigned int v, v1; + u32 v, v1; +#ifdef CONFIG_X86_64 exit_idle(); +#endif irq_enter(); /* First tickle the hardware, only then report what went on. -- REW */ v = apic_read(APIC_ESR); -- cgit v1.2.3 From 2f04fa888d270951b9e0fe9e641ddd560d77ad1b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:54 -0700 Subject: x86: apic copy calibrate_APIC_clock to each other in apic_32/64.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 86 +++++++++++++++++++ arch/x86/kernel/apic_64.c | 215 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 60a901b2d4f..05498c37f8d 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -424,6 +424,90 @@ static void __cpuinit setup_APIC_timer(void) clockevents_register_device(levt); } +#ifdef CONFIG_X86_64 +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +#define TICK_COUNT 100000000 + +static int __init calibrate_APIC_clock(void) +{ + unsigned apic, apic_start; + unsigned long tsc, tsc_start; + int result; + + local_irq_disable(); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + * + * No interrupt enable ! + */ + __setup_APIC_LVTT(250000000, 0, 0); + + apic_start = apic_read(APIC_TMCCT); +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ + apic = apic_read(APIC_TMCCT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscll(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscll(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic_start - apic) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * tsc_khz / + (tsc - tsc_start); + } + + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + + printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", + result / 1000 / 1000, result / 1000 % 1000); + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (result * APIC_DIVISOR) / HZ; + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + return 0; +} + +#else /* * In this functions we calibrate APIC bus clocks to the external timer. * @@ -635,6 +719,8 @@ static int __init calibrate_APIC_clock(void) return 0; } +#endif + /* * Setup the boot APIC * diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index c728885e4f4..6e99af5ce67 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -478,6 +478,7 @@ static void __cpuinit setup_APIC_timer(void) clockevents_register_device(levt); } +#ifdef CONFIG_X86_64 /* * In this function we calibrate APIC bus clocks to the external * timer. Unfortunately we cannot use jiffies and the timer irq @@ -560,6 +561,220 @@ static int __init calibrate_APIC_clock(void) return 0; } +#else +/* + * In this functions we calibrate APIC bus clocks to the external timer. + * + * We want to do the calibration only once since we want to have local timer + * irqs syncron. CPUs connected by the same APIC bus have the very same bus + * frequency. + * + * This was previously done by reading the PIT/HPET and waiting for a wrap + * around to find out, that a tick has elapsed. I have a box, where the PIT + * readout is broken, so it never gets out of the wait loop again. This was + * also reported by others. + * + * Monitoring the jiffies value is inaccurate and the clockevents + * infrastructure allows us to do a simple substitution of the interrupt + * handler. + * + * The calibration routine also uses the pm_timer when possible, as the PIT + * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes + * back to normal later in the boot process). + */ + +#define LAPIC_CAL_LOOPS (HZ/10) + +static __initdata int lapic_cal_loops = -1; +static __initdata long lapic_cal_t1, lapic_cal_t2; +static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; +static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; + +/* + * Temporary interrupt handler. + */ +static void __init lapic_cal_handler(struct clock_event_device *dev) +{ + unsigned long long tsc = 0; + long tapic = apic_read(APIC_TMCCT); + unsigned long pm = acpi_pm_read_early(); + + if (cpu_has_tsc) + rdtscll(tsc); + + switch (lapic_cal_loops++) { + case 0: + lapic_cal_t1 = tapic; + lapic_cal_tsc1 = tsc; + lapic_cal_pm1 = pm; + lapic_cal_j1 = jiffies; + break; + + case LAPIC_CAL_LOOPS: + lapic_cal_t2 = tapic; + lapic_cal_tsc2 = tsc; + if (pm < lapic_cal_pm1) + pm += ACPI_PM_OVRRUN; + lapic_cal_pm2 = pm; + lapic_cal_j2 = jiffies; + break; + } +} + +static int __init calibrate_APIC_clock(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + const long pm_100ms = PMTMR_TICKS_PER_SEC/10; + const long pm_thresh = pm_100ms/100; + void (*real_handler)(struct clock_event_device *dev); + unsigned long deltaj; + long delta, deltapm; + int pm_referenced = 0; + + local_irq_disable(); + + /* Replace the global interrupt handler */ + real_handler = global_clock_event->event_handler; + global_clock_event->event_handler = lapic_cal_handler; + + /* + * Setup the APIC counter to 1e9. There is no way the lapic + * can underflow in the 100ms detection time frame + */ + __setup_APIC_LVTT(1000000000, 0, 0); + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Restore the real event handler */ + global_clock_event->event_handler = real_handler; + + /* Build delta t1-t2 as apic timer counts down */ + delta = lapic_cal_t1 - lapic_cal_t2; + apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + + /* Check, if the PM timer is available */ + deltapm = lapic_cal_pm2 - lapic_cal_pm1; + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + if (deltapm) { + unsigned long mult; + u64 res; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64) deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64) delta) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long) res, delta); + delta = (long) res; + } + pm_referenced = 1; + } + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + + apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); + apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); + apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", + calibration_result); + + if (cpu_has_tsc) { + delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + apic_printk(APIC_VERBOSE, "..... CPU clock speed is " + "%ld.%04ld MHz.\n", + (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + } + + apic_printk(APIC_VERBOSE, "..... host bus clock speed is " + "%u.%04u MHz.\n", + calibration_result / (1000000 / HZ), + calibration_result % (1000000 / HZ)); + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + local_irq_enable(); + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; + + /* We trust the pm timer based calibration */ + if (!pm_referenced) { + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); + + /* + * Setup the apic timer manually + */ + levt->event_handler = lapic_cal_handler; + lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); + lapic_cal_loops = -1; + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Stop the lapic timer */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + + local_irq_enable(); + + /* Jiffies delta */ + deltaj = lapic_cal_j2 - lapic_cal_j1; + apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + + /* Check, if the jiffies result is consistent */ + if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) + apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); + else + levt->features |= CLOCK_EVT_FEAT_DUMMY; + } else + local_irq_enable(); + + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { + printk(KERN_WARNING + "APIC timer disabled due to verification failure.\n"); + return -1; + } + + return 0; +} + +#endif + /* * Setup the boot APIC * -- cgit v1.2.3 From 6c15822752a13748241e1a34068f2b15b660d28e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:55 -0700 Subject: x86: apic copy apic_64.c to apic_32.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic_32.c | 188 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 188 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 05498c37f8d..0b11d6e3f09 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -90,6 +90,24 @@ static __init int setup_apicpmtimer(char *s) __setup("apicpmtimer", setup_apicpmtimer); #endif +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC +int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif + unsigned long mp_lapic_addr; int disable_apic; /* Disable local APIC timer from the kernel commandline or via dmi quirk */ @@ -231,6 +249,42 @@ static struct apic_ops xapic_ops = { struct apic_ops __read_mostly *apic_ops = &xapic_ops; EXPORT_SYMBOL_GPL(apic_ops); +#ifdef HAVE_X2APIC +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; +#endif + /** * enable_NMI_through_LVT0 - enable NMI through local vector table 0 */ @@ -1308,6 +1362,127 @@ void __cpuinit end_local_APIC_setup(void) apic_pm_activate(); } +#ifdef HAVE_X2APIC +void check_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + + if (msr & X2APIC_ENABLE) { + printk("x2apic enabled by BIOS, switching to x2apic ops\n"); + x2apic_preenabled = x2apic = 1; + apic_ops = &x2apic_ops; + } +} + +void enable_x2apic(void) +{ + int msr, msr2; + + rdmsr(MSR_IA32_APICBASE, msr, msr2); + if (!(msr & X2APIC_ENABLE)) { + printk("Enabling x2apic\n"); + wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); + } +} + +void enable_IR_x2apic(void) +{ +#ifdef CONFIG_INTR_REMAP + int ret; + unsigned long flags; + + if (!cpu_has_x2apic) + return; + + if (!x2apic_preenabled && disable_x2apic) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of nox2apic\n"); + return; + } + + if (x2apic_preenabled && disable_x2apic) + panic("Bios already enabled x2apic, can't enforce nox2apic"); + + if (!x2apic_preenabled && skip_ioapic_setup) { + printk(KERN_INFO + "Skipped enabling x2apic and Interrupt-remapping " + "because of skipping io-apic setup\n"); + return; + } + + ret = dmar_table_init(); + if (ret) { + printk(KERN_INFO + "dmar_table_init() failed with %d:\n", ret); + + if (x2apic_preenabled) + panic("x2apic enabled by bios. But IR enabling failed"); + else + printk(KERN_INFO + "Not enabling x2apic,Intr-remapping\n"); + return; + } + + local_irq_save(flags); + mask_8259A(); + save_mask_IO_APIC_setup(); + + ret = enable_intr_remapping(1); + + if (ret && x2apic_preenabled) { + local_irq_restore(flags); + panic("x2apic enabled by bios. But IR enabling failed"); + } + + if (ret) + goto end; + + if (!x2apic) { + x2apic = 1; + apic_ops = &x2apic_ops; + enable_x2apic(); + } +end: + if (ret) + /* + * IR enabling failed + */ + restore_IO_APIC_setup(); + else + reinit_intr_remapped_IO_APIC(x2apic_preenabled); + + unmask_8259A(); + local_irq_restore(flags); + + if (!ret) { + if (!x2apic_preenabled) + printk(KERN_INFO + "Enabled x2apic and interrupt-remapping\n"); + else + printk(KERN_INFO + "Enabled Interrupt-remapping\n"); + } else + printk(KERN_ERR + "Failed to enable Interrupt-remapping and x2apic\n"); +#else + if (!cpu_has_x2apic) + return; + + if (x2apic_preenabled) + panic("x2apic enabled prior OS handover," + " enable CONFIG_INTR_REMAP"); + + printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " + " and x2apic\n"); +#endif + + return; +} +#endif /* HAVE_X2APIC */ + #ifdef CONFIG_X86_64 /* * Detect and enable local APICs on non-SMP boards. @@ -1438,6 +1613,13 @@ void __init early_init_lapic_mapping(void) */ void __init init_apic_mappings(void) { +#ifdef HAVE_X2APIC + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } +#endif + /* * If no local APIC can be found then set up a fake all * zeroes page to simulate the local APIC and another @@ -1501,6 +1683,7 @@ int __init APIC_init_uniprocessor(void) #ifdef CONFIG_X86_64 setup_apic_routing(); #endif + verify_local_APIC(); connect_bsp_APIC(); @@ -1879,6 +2062,11 @@ static int lapic_resume(struct sys_device *dev) local_irq_save(flags); +#ifdef HAVE_X2APIC + if (x2apic) + enable_x2apic(); + else +#endif { /* * Make sure the APICBASE points to the right address -- cgit v1.2.3 From 0f611ffaea81b8e1c69682188ba1ccaf7683a2ba Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 02:01:56 -0700 Subject: x86: rename apic_32.c and apic_64.c to apic.c Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/apic.c | 2311 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/apic_32.c | 2312 --------------------------------------------- arch/x86/kernel/apic_64.c | 2311 -------------------------------------------- 4 files changed, 2312 insertions(+), 4624 deletions(-) create mode 100644 arch/x86/kernel/apic.c delete mode 100644 arch/x86/kernel/apic_32.c delete mode 100644 arch/x86/kernel/apic_64.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 7264f9c3af8..45cecc59d89 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -60,7 +60,7 @@ obj-$(CONFIG_X86_32_SMP) += smpcommon.o obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o obj-$(CONFIG_X86_MPPARSE) += mpparse.o -obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o +obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c new file mode 100644 index 00000000000..6e99af5ce67 --- /dev/null +++ b/arch/x86/kernel/apic.c @@ -0,0 +1,2311 @@ +/* + * Local APIC handling, local APIC timers + * + * (c) 1999, 2000 Ingo Molnar + * + * Fixes + * Maciej W. Rozycki : Bits for genuine 82489DX APICs; + * thanks to Eric Gilmore + * and Rolf G. Tews + * for testing these extensively. + * Maciej W. Rozycki : Various updates and fixes. + * Mikael Pettersson : Power Management for UP-APIC. + * Pavel Machek and + * Mikael Pettersson : PM converted to driver model. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* + * Sanity check + */ +#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) +# error SPURIOUS_APIC_VECTOR definition error +#endif + +#ifdef CONFIG_X86_32 +/* + * Knob to control our willingness to enable the local APIC. + * + * +1=force-enable + */ +static int force_enable_local_apic; +/* + * APIC command line parameters + */ +static int __init parse_lapic(char *arg) +{ + force_enable_local_apic = 1; + return 0; +} +early_param("lapic", parse_lapic); +/* Local APIC was disabled by the BIOS and enabled by the kernel */ +static int enabled_via_apicbase; + +#endif + +#ifdef CONFIG_X86_64 +static int apic_calibrate_pmtmr __initdata; +static __init int setup_apicpmtimer(char *s) +{ + apic_calibrate_pmtmr = 1; + notsc_setup(NULL); + return 0; +} +__setup("apicpmtimer", setup_apicpmtimer); +#endif + +#ifdef CONFIG_X86_64 +#define HAVE_X2APIC +#endif + +#ifdef HAVE_X2APIC +int x2apic; +/* x2apic enabled before OS handover */ +int x2apic_preenabled; +int disable_x2apic; +static __init int setup_nox2apic(char *str) +{ + disable_x2apic = 1; + setup_clear_cpu_cap(X86_FEATURE_X2APIC); + return 0; +} +early_param("nox2apic", setup_nox2apic); +#endif + +unsigned long mp_lapic_addr; +int disable_apic; +/* Disable local APIC timer from the kernel commandline or via dmi quirk */ +static int disable_apic_timer __cpuinitdata; +/* Local APIC timer works in C2 */ +int local_apic_timer_c2_ok; +EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); + +int first_system_vector = 0xfe; + +char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; + +/* + * Debug level, exported for io_apic.c + */ +unsigned int apic_verbosity; + +int pic_mode; + +/* Have we found an MP table */ +int smp_found_config; + +static struct resource lapic_resource = { + .name = "Local APIC", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + +static unsigned int calibration_result; + +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt); +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt); +static void lapic_timer_broadcast(cpumask_t mask); +static void apic_pm_activate(void); + +/* + * The local apic timer can be used for any function which is CPU local. + */ +static struct clock_event_device lapic_clockevent = { + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT + | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_mode = lapic_timer_setup, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, +}; +static DEFINE_PER_CPU(struct clock_event_device, lapic_events); + +static unsigned long apic_phys; + +/* + * Get the LAPIC version + */ +static inline int lapic_get_version(void) +{ + return GET_APIC_VERSION(apic_read(APIC_LVR)); +} + +/* + * Check, if the APIC is integrated or a separate chip + */ +static inline int lapic_is_integrated(void) +{ +#ifdef CONFIG_X86_64 + return 1; +#else + return APIC_INTEGRATED(lapic_get_version()); +#endif +} + +/* + * Check, whether this is a modern or a first generation APIC + */ +static int modern_apic(void) +{ + /* AMD systems use old APIC versions, so check the CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && + boot_cpu_data.x86 >= 0xf) + return 1; + return lapic_get_version() >= 0x14; +} + +/* + * Paravirt kernels also might be using these below ops. So we still + * use generic apic_read()/apic_write(), which might be pointing to different + * ops in PARAVIRT case. + */ +void xapic_wait_icr_idle(void) +{ + while (apic_read(APIC_ICR) & APIC_ICR_BUSY) + cpu_relax(); +} + +u32 safe_xapic_wait_icr_idle(void) +{ + u32 send_status; + int timeout; + + timeout = 0; + do { + send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; + if (!send_status) + break; + udelay(100); + } while (timeout++ < 1000); + + return send_status; +} + +void xapic_icr_write(u32 low, u32 id) +{ + apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); + apic_write(APIC_ICR, low); +} + +u64 xapic_icr_read(void) +{ + u32 icr1, icr2; + + icr2 = apic_read(APIC_ICR2); + icr1 = apic_read(APIC_ICR); + + return icr1 | ((u64)icr2 << 32); +} + +static struct apic_ops xapic_ops = { + .read = native_apic_mem_read, + .write = native_apic_mem_write, + .icr_read = xapic_icr_read, + .icr_write = xapic_icr_write, + .wait_icr_idle = xapic_wait_icr_idle, + .safe_wait_icr_idle = safe_xapic_wait_icr_idle, +}; + +struct apic_ops __read_mostly *apic_ops = &xapic_ops; +EXPORT_SYMBOL_GPL(apic_ops); + +#ifdef HAVE_X2APIC +static void x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return; +} + +static u32 safe_x2apic_wait_icr_idle(void) +{ + /* no need to wait for icr idle in x2apic */ + return 0; +} + +void x2apic_icr_write(u32 low, u32 id) +{ + wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); +} + +u64 x2apic_icr_read(void) +{ + unsigned long val; + + rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); + return val; +} + +static struct apic_ops x2apic_ops = { + .read = native_apic_msr_read, + .write = native_apic_msr_write, + .icr_read = x2apic_icr_read, + .icr_write = x2apic_icr_write, + .wait_icr_idle = x2apic_wait_icr_idle, + .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, +}; +#endif + +/** + * enable_NMI_through_LVT0 - enable NMI through local vector table 0 + */ +void __cpuinit enable_NMI_through_LVT0(void) +{ + unsigned int v; + + /* unmask and set to NMI */ + v = APIC_DM_NMI; + + /* Level triggered for 82489DX (32bit mode) */ + if (!lapic_is_integrated()) + v |= APIC_LVT_LEVEL_TRIGGER; + + apic_write(APIC_LVT0, v); +} + +#ifdef CONFIG_X86_32 +/** + * get_physical_broadcast - Get number of physical broadcast IDs + */ +int get_physical_broadcast(void) +{ + return modern_apic() ? 0xff : 0xf; +} +#endif + +/** + * lapic_get_maxlvt - get the maximum number of local vector table entries + */ +int lapic_get_maxlvt(void) +{ + unsigned int v; + + v = apic_read(APIC_LVR); + /* + * - we always have APIC integrated on 64bit mode + * - 82489DXs do not report # of LVT entries + */ + return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; +} + +/* + * Local APIC timer + */ + +/* Clock divisor */ +#ifdef CONFG_X86_64 +#define APIC_DIVISOR 1 +#else +#define APIC_DIVISOR 16 +#endif + +/* + * This function sets up the local APIC timer, with a timeout of + * 'clocks' APIC bus clock. During calibration we actually call + * this function twice on the boot CPU, once with a bogus timeout + * value, second time for real. The other (noncalibrating) CPUs + * call this function only once, with the real, calibrated value. + * + * We do reads before writes even if unnecessary, to get around the + * P5 APIC double write bug. + */ +static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) +{ + unsigned int lvtt_value, tmp_value; + + lvtt_value = LOCAL_TIMER_VECTOR; + if (!oneshot) + lvtt_value |= APIC_LVT_TIMER_PERIODIC; + if (!lapic_is_integrated()) + lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); + + if (!irqen) + lvtt_value |= APIC_LVT_MASKED; + + apic_write(APIC_LVTT, lvtt_value); + + /* + * Divide PICLK by 16 + */ + tmp_value = apic_read(APIC_TDCR); + apic_write(APIC_TDCR, + (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | + APIC_TDR_DIV_16); + + if (!oneshot) + apic_write(APIC_TMICT, clocks / APIC_DIVISOR); +} + +/* + * Setup extended LVT, AMD specific (K8, family 10h) + * + * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and + * MCE interrupts are supported. Thus MCE offset must be set to 0. + * + * If mask=1, the LVT entry does not generate interrupts while mask=0 + * enables the vector. See also the BKDGs. + */ + +#define APIC_EILVT_LVTOFF_MCE 0 +#define APIC_EILVT_LVTOFF_IBS 1 + +static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) +{ + unsigned long reg = (lvt_off << 4) + APIC_EILVT0; + unsigned int v = (mask << 16) | (msg_type << 8) | vector; + + apic_write(reg, v); +} + +u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_MCE; +} + +u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) +{ + setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); + return APIC_EILVT_LVTOFF_IBS; +} +EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); + +/* + * Program the next event, relative to now + */ +static int lapic_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + apic_write(APIC_TMICT, delta); + return 0; +} + +/* + * Setup the lapic timer in periodic or oneshot mode + */ +static void lapic_timer_setup(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + unsigned long flags; + unsigned int v; + + /* Lapic used as dummy for broadcast ? */ + if (evt->features & CLOCK_EVT_FEAT_DUMMY) + return; + + local_irq_save(flags); + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + case CLOCK_EVT_MODE_ONESHOT: + __setup_APIC_LVTT(calibration_result, + mode != CLOCK_EVT_MODE_PERIODIC, 1); + break; + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + v = apic_read(APIC_LVTT); + v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); + apic_write(APIC_LVTT, v); + break; + case CLOCK_EVT_MODE_RESUME: + /* Nothing to do here */ + break; + } + + local_irq_restore(flags); +} + +/* + * Local APIC timer broadcast function + */ +static void lapic_timer_broadcast(cpumask_t mask) +{ +#ifdef CONFIG_SMP + send_IPI_mask(mask, LOCAL_TIMER_VECTOR); +#endif +} + +/* + * Setup the local APIC timer for this CPU. Copy the initilized values + * of the boot CPU and register the clock event in the framework. + */ +static void __cpuinit setup_APIC_timer(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + + memcpy(levt, &lapic_clockevent, sizeof(*levt)); + levt->cpumask = cpumask_of_cpu(smp_processor_id()); + + clockevents_register_device(levt); +} + +#ifdef CONFIG_X86_64 +/* + * In this function we calibrate APIC bus clocks to the external + * timer. Unfortunately we cannot use jiffies and the timer irq + * to calibrate, since some later bootup code depends on getting + * the first irq? Ugh. + * + * We want to do the calibration only once since we + * want to have local timer irqs syncron. CPUs connected + * by the same APIC bus have the very same bus frequency. + * And we want to have irqs off anyways, no accidental + * APIC irq that way. + */ + +#define TICK_COUNT 100000000 + +static int __init calibrate_APIC_clock(void) +{ + unsigned apic, apic_start; + unsigned long tsc, tsc_start; + int result; + + local_irq_disable(); + + /* + * Put whatever arbitrary (but long enough) timeout + * value into the APIC clock, we just want to get the + * counter running for calibration. + * + * No interrupt enable ! + */ + __setup_APIC_LVTT(250000000, 0, 0); + + apic_start = apic_read(APIC_TMCCT); +#ifdef CONFIG_X86_PM_TIMER + if (apic_calibrate_pmtmr && pmtmr_ioport) { + pmtimer_wait(5000); /* 5ms wait */ + apic = apic_read(APIC_TMCCT); + result = (apic_start - apic) * 1000L / 5; + } else +#endif + { + rdtscll(tsc_start); + + do { + apic = apic_read(APIC_TMCCT); + rdtscll(tsc); + } while ((tsc - tsc_start) < TICK_COUNT && + (apic_start - apic) < TICK_COUNT); + + result = (apic_start - apic) * 1000L * tsc_khz / + (tsc - tsc_start); + } + + local_irq_enable(); + + printk(KERN_DEBUG "APIC timer calibration result %d\n", result); + + printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", + result / 1000 / 1000, result / 1000 % 1000); + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (result * APIC_DIVISOR) / HZ; + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + return 0; +} + +#else +/* + * In this functions we calibrate APIC bus clocks to the external timer. + * + * We want to do the calibration only once since we want to have local timer + * irqs syncron. CPUs connected by the same APIC bus have the very same bus + * frequency. + * + * This was previously done by reading the PIT/HPET and waiting for a wrap + * around to find out, that a tick has elapsed. I have a box, where the PIT + * readout is broken, so it never gets out of the wait loop again. This was + * also reported by others. + * + * Monitoring the jiffies value is inaccurate and the clockevents + * infrastructure allows us to do a simple substitution of the interrupt + * handler. + * + * The calibration routine also uses the pm_timer when possible, as the PIT + * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes + * back to normal later in the boot process). + */ + +#define LAPIC_CAL_LOOPS (HZ/10) + +static __initdata int lapic_cal_loops = -1; +static __initdata long lapic_cal_t1, lapic_cal_t2; +static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; +static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; +static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; + +/* + * Temporary interrupt handler. + */ +static void __init lapic_cal_handler(struct clock_event_device *dev) +{ + unsigned long long tsc = 0; + long tapic = apic_read(APIC_TMCCT); + unsigned long pm = acpi_pm_read_early(); + + if (cpu_has_tsc) + rdtscll(tsc); + + switch (lapic_cal_loops++) { + case 0: + lapic_cal_t1 = tapic; + lapic_cal_tsc1 = tsc; + lapic_cal_pm1 = pm; + lapic_cal_j1 = jiffies; + break; + + case LAPIC_CAL_LOOPS: + lapic_cal_t2 = tapic; + lapic_cal_tsc2 = tsc; + if (pm < lapic_cal_pm1) + pm += ACPI_PM_OVRRUN; + lapic_cal_pm2 = pm; + lapic_cal_j2 = jiffies; + break; + } +} + +static int __init calibrate_APIC_clock(void) +{ + struct clock_event_device *levt = &__get_cpu_var(lapic_events); + const long pm_100ms = PMTMR_TICKS_PER_SEC/10; + const long pm_thresh = pm_100ms/100; + void (*real_handler)(struct clock_event_device *dev); + unsigned long deltaj; + long delta, deltapm; + int pm_referenced = 0; + + local_irq_disable(); + + /* Replace the global interrupt handler */ + real_handler = global_clock_event->event_handler; + global_clock_event->event_handler = lapic_cal_handler; + + /* + * Setup the APIC counter to 1e9. There is no way the lapic + * can underflow in the 100ms detection time frame + */ + __setup_APIC_LVTT(1000000000, 0, 0); + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Restore the real event handler */ + global_clock_event->event_handler = real_handler; + + /* Build delta t1-t2 as apic timer counts down */ + delta = lapic_cal_t1 - lapic_cal_t2; + apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); + + /* Check, if the PM timer is available */ + deltapm = lapic_cal_pm2 - lapic_cal_pm1; + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + if (deltapm) { + unsigned long mult; + u64 res; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64) deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64) delta) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long) res, delta); + delta = (long) res; + } + pm_referenced = 1; + } + + /* Calculate the scaled math multiplication factor */ + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); + lapic_clockevent.max_delta_ns = + clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); + lapic_clockevent.min_delta_ns = + clockevent_delta2ns(0xF, &lapic_clockevent); + + calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + + apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); + apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); + apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", + calibration_result); + + if (cpu_has_tsc) { + delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); + apic_printk(APIC_VERBOSE, "..... CPU clock speed is " + "%ld.%04ld MHz.\n", + (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), + (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); + } + + apic_printk(APIC_VERBOSE, "..... host bus clock speed is " + "%u.%04u MHz.\n", + calibration_result / (1000000 / HZ), + calibration_result % (1000000 / HZ)); + + /* + * Do a sanity check on the APIC calibration result + */ + if (calibration_result < (1000000 / HZ)) { + local_irq_enable(); + printk(KERN_WARNING + "APIC frequency too slow, disabling apic timer\n"); + return -1; + } + + levt->features &= ~CLOCK_EVT_FEAT_DUMMY; + + /* We trust the pm timer based calibration */ + if (!pm_referenced) { + apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); + + /* + * Setup the apic timer manually + */ + levt->event_handler = lapic_cal_handler; + lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); + lapic_cal_loops = -1; + + /* Let the interrupts run */ + local_irq_enable(); + + while (lapic_cal_loops <= LAPIC_CAL_LOOPS) + cpu_relax(); + + local_irq_disable(); + + /* Stop the lapic timer */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); + + local_irq_enable(); + + /* Jiffies delta */ + deltaj = lapic_cal_j2 - lapic_cal_j1; + apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); + + /* Check, if the jiffies result is consistent */ + if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) + apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); + else + levt->features |= CLOCK_EVT_FEAT_DUMMY; + } else + local_irq_enable(); + + if (levt->features & CLOCK_EVT_FEAT_DUMMY) { + printk(KERN_WARNING + "APIC timer disabled due to verification failure.\n"); + return -1; + } + + return 0; +} + +#endif + +/* + * Setup the boot APIC + * + * Calibrate and verify the result. + */ +void __init setup_boot_APIC_clock(void) +{ + /* + * The local apic timer can be disabled via the kernel + * commandline or from the CPU detection code. Register the lapic + * timer as a dummy clock event source on SMP systems, so the + * broadcast mechanism is used. On UP systems simply ignore it. + */ + if (disable_apic_timer) { + printk(KERN_INFO "Disabling APIC timer\n"); + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) { + lapic_clockevent.mult = 1; + setup_APIC_timer(); + } + return; + } + + apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" + "calibrating APIC timer ...\n"); + + if (calibrate_APIC_clock()) { + /* No broadcast on UP ! */ + if (num_possible_cpus() > 1) + setup_APIC_timer(); + return; + } + + /* + * If nmi_watchdog is set to IO_APIC, we need the + * PIT/HPET going. Otherwise register lapic as a dummy + * device. + */ + if (nmi_watchdog != NMI_IO_APIC) + lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; + else + printk(KERN_WARNING "APIC timer registered as dummy," + " due to nmi_watchdog=%d!\n", nmi_watchdog); + + /* Setup the lapic or request the broadcast */ + setup_APIC_timer(); +} + +void __cpuinit setup_secondary_APIC_clock(void) +{ + setup_APIC_timer(); +} + +/* + * The guts of the apic timer interrupt + */ +static void local_apic_timer_interrupt(void) +{ + int cpu = smp_processor_id(); + struct clock_event_device *evt = &per_cpu(lapic_events, cpu); + + /* + * Normally we should not be here till LAPIC has been initialized but + * in some cases like kdump, its possible that there is a pending LAPIC + * timer interrupt from previous kernel's context and is delivered in + * new kernel the moment interrupts are enabled. + * + * Interrupts are enabled early and LAPIC is setup much later, hence + * its possible that when we get here evt->event_handler is NULL. + * Check for event_handler being NULL and discard the interrupt as + * spurious. + */ + if (!evt->event_handler) { + printk(KERN_WARNING + "Spurious LAPIC timer interrupt on cpu %d\n", cpu); + /* Switch it off */ + lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); + return; + } + + /* + * the NMI deadlock-detector uses this. + */ +#ifdef CONFIG_X86_64 + add_pda(apic_timer_irqs, 1); +#else + per_cpu(irq_stat, cpu).apic_timer_irqs++; +#endif + + evt->event_handler(evt); +} + +/* + * Local APIC timer interrupt. This is the most natural way for doing + * local interrupts, but local timer interrupts can be emulated by + * broadcast interrupts too. [in case the hw doesn't support APIC timers] + * + * [ if a single-CPU system runs an SMP kernel then we call the local + * interrupt as well. Thus we cannot inline the local irq ... ] + */ +void smp_apic_timer_interrupt(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + + /* + * NOTE! We'd better ACK the irq immediately, + * because timer handling can be slow. + */ + ack_APIC_irq(); + /* + * update_process_times() expects us to have done irq_enter(). + * Besides, if we don't timer interrupts ignore the global + * interrupt lock, which is the WrongThing (tm) to do. + */ +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + local_apic_timer_interrupt(); + irq_exit(); + + set_irq_regs(old_regs); +} + +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + +/* + * Local APIC start and shutdown + */ + +/** + * clear_local_APIC - shutdown the local APIC + * + * This is called, when a CPU is disabled and before rebooting, so the state of + * the local APIC has no dangling leftovers. Also used to cleanout any BIOS + * leftovers during boot. + */ +void clear_local_APIC(void) +{ + int maxlvt; + u32 v; + + /* APIC hasn't been mapped yet */ + if (!apic_phys) + return; + + maxlvt = lapic_get_maxlvt(); + /* + * Masking an LVT entry can trigger a local APIC error + * if the vector is zero. Mask LVTERR first to prevent this. + */ + if (maxlvt >= 3) { + v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ + apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); + } + /* + * Careful: we have to set masks only first to deassert + * any level-triggered sources. + */ + v = apic_read(APIC_LVTT); + apic_write(APIC_LVTT, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT0); + apic_write(APIC_LVT0, v | APIC_LVT_MASKED); + v = apic_read(APIC_LVT1); + apic_write(APIC_LVT1, v | APIC_LVT_MASKED); + if (maxlvt >= 4) { + v = apic_read(APIC_LVTPC); + apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); + } + + /* lets not touch this if we didn't frob it */ +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) + if (maxlvt >= 5) { + v = apic_read(APIC_LVTTHMR); + apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); + } +#endif + /* + * Clean APIC state for other OSs: + */ + apic_write(APIC_LVTT, APIC_LVT_MASKED); + apic_write(APIC_LVT0, APIC_LVT_MASKED); + apic_write(APIC_LVT1, APIC_LVT_MASKED); + if (maxlvt >= 3) + apic_write(APIC_LVTERR, APIC_LVT_MASKED); + if (maxlvt >= 4) + apic_write(APIC_LVTPC, APIC_LVT_MASKED); + + /* Integrated APIC (!82489DX) ? */ + if (lapic_is_integrated()) { + if (maxlvt > 3) + /* Clear ESR due to Pentium errata 3AP and 11AP */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } +} + +/** + * disable_local_APIC - clear and disable the local APIC + */ +void disable_local_APIC(void) +{ + unsigned int value; + + clear_local_APIC(); + + /* + * Disable APIC (implies clearing of registers + * for 82489DX!). + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* + * When LAPIC was disabled by the BIOS and enabled by the kernel, + * restore the disabled state. + */ + if (enabled_via_apicbase) { + unsigned int l, h; + + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_ENABLE; + wrmsr(MSR_IA32_APICBASE, l, h); + } +#endif +} + +/* + * If Linux enabled the LAPIC against the BIOS default disable it down before + * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and + * not power-off. Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. + */ +void lapic_shutdown(void) +{ + unsigned long flags; + + if (!cpu_has_apic) + return; + + local_irq_save(flags); + +#ifdef CONFIG_X86_32 + if (!enabled_via_apicbase) + clear_local_APIC(); + else +#endif + disable_local_APIC(); + + + local_irq_restore(flags); +} + +/* + * This is to verify that we're looking at a real local APIC. + * Check these against your board if the CPUs aren't getting + * started for no apparent reason. + */ +int __init verify_local_APIC(void) +{ + unsigned int reg0, reg1; + + /* + * The version register is read-only in a real APIC. + */ + reg0 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); + apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); + reg1 = apic_read(APIC_LVR); + apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); + + /* + * The two version reads above should print the same + * numbers. If the second one is different, then we + * poke at a non-APIC. + */ + if (reg1 != reg0) + return 0; + + /* + * Check if the version looks reasonably. + */ + reg1 = GET_APIC_VERSION(reg0); + if (reg1 == 0x00 || reg1 == 0xff) + return 0; + reg1 = lapic_get_maxlvt(); + if (reg1 < 0x02 || reg1 == 0xff) + return 0; + + /* + * The ID register is read/write in a real APIC. + */ + reg0 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); + apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); + reg1 = apic_read(APIC_ID); + apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); + apic_write(APIC_ID, reg0); + if (reg1 != (reg0 ^ APIC_ID_MASK)) + return 0; + + /* + * The next two are just to see if we have sane values. + * They're only really relevant if we're in Virtual Wire + * compatibility mode, but most boxes are anymore. + */ + reg0 = apic_read(APIC_LVT0); + apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); + reg1 = apic_read(APIC_LVT1); + apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); + + return 1; +} + +/** + * sync_Arb_IDs - synchronize APIC bus arbitration IDs + */ +void __init sync_Arb_IDs(void) +{ + /* + * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not + * needed on AMD. + */ + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) + return; + + /* + * Wait for idle. + */ + apic_wait_icr_idle(); + + apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); + apic_write(APIC_ICR, APIC_DEST_ALLINC | + APIC_INT_LEVELTRIG | APIC_DM_INIT); +} + +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) +{ + unsigned int value; + + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !cpu_has_apic) + return; + + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV, value); + + /* + * Set up the virtual wire mode. + */ + apic_write(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + apic_write(APIC_LVT1, value); +} + +static void __cpuinit lapic_setup_esr(void) +{ + unsigned long oldvalue, value, maxlvt; + if (lapic_is_integrated() && !esr_disable) { + if (esr_disable) { + /* + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh + */ + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; + } + /* !82489DX */ + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08lx after: 0x%08lx\n", + oldvalue, value); + } else { + printk(KERN_INFO "No ESR for 82489DX.\n"); + } +} + + +/** + * setup_local_APIC - setup the local APIC + */ +void __cpuinit setup_local_APIC(void) +{ + unsigned int value; + int i, j; + +#ifdef CONFIG_X86_32 + /* Pound the ESR really hard over the head with a big hammer - mbligh */ + if (esr_disable) { + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + } +#endif + + preempt_disable(); + + /* + * Double-check whether this APIC is really registered. + * This is meaningless in clustered apic mode, so we skip it. + */ + if (!apic_id_registered()) + BUG(); + + /* + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ + init_apic_ldr(); + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write(APIC_TASKPRI, value); + + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< 1) || + (boot_cpu_data.x86 == 15)) + break; + goto no_apic; + case X86_VENDOR_INTEL: + if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || + (boot_cpu_data.x86 == 5 && cpu_has_apic)) + break; + goto no_apic; + default: + goto no_apic; + } + + if (!cpu_has_apic) { + /* + * Over-ride BIOS and try to enable the local APIC only if + * "lapic" specified. + */ + if (!force_enable_local_apic) { + printk(KERN_INFO "Local APIC disabled by BIOS -- " + "you can enable it with \"lapic\"\n"); + return -1; + } + /* + * Some BIOSes disable the local APIC in the APIC_BASE + * MSR. This can only be done in software for Intel P6 or later + * and AMD K7 (Model > 1) or later. + */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (!(l & MSR_IA32_APICBASE_ENABLE)) { + printk(KERN_INFO + "Local APIC disabled by BIOS -- reenabling.\n"); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + wrmsr(MSR_IA32_APICBASE, l, h); + enabled_via_apicbase = 1; + } + } + /* + * The APIC feature bit should now be enabled + * in `cpuid' + */ + features = cpuid_edx(1); + if (!(features & (1 << X86_FEATURE_APIC))) { + printk(KERN_WARNING "Could not enable APIC!\n"); + return -1; + } + set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; + + /* The BIOS may have set up the APIC at some other address */ + rdmsr(MSR_IA32_APICBASE, l, h); + if (l & MSR_IA32_APICBASE_ENABLE) + mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; + + printk(KERN_INFO "Found and enabled local APIC!\n"); + + apic_pm_activate(); + + return 0; + +no_apic: + printk(KERN_INFO "No local APIC present or hardware disabled\n"); + return -1; +} +#endif + +#ifdef CONFIG_X86_64 +void __init early_init_lapic_mapping(void) +{ + unsigned long phys_addr; + + /* + * If no local APIC can be found then go out + * : it means there is no mpatable and MADT + */ + if (!smp_found_config) + return; + + phys_addr = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, phys_addr); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, phys_addr); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + boot_cpu_physical_apicid = read_apic_id(); +} +#endif + +/** + * init_apic_mappings - initialize APIC mappings + */ +void __init init_apic_mappings(void) +{ +#ifdef HAVE_X2APIC + if (x2apic) { + boot_cpu_physical_apicid = read_apic_id(); + return; + } +#endif + + /* + * If no local APIC can be found then set up a fake all + * zeroes page to simulate the local APIC and another + * one for the IO-APIC. + */ + if (!smp_found_config && detect_init_APIC()) { + apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); + apic_phys = __pa(apic_phys); + } else + apic_phys = mp_lapic_addr; + + set_fixmap_nocache(FIX_APIC_BASE, apic_phys); + apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + APIC_BASE, apic_phys); + + /* + * Fetch the APIC ID of the BSP in case we have a + * default configuration (or the MP table is broken). + */ + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = read_apic_id(); +} + +/* + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. + */ +int apic_version[MAX_APICS]; + +int __init APIC_init_uniprocessor(void) +{ +#ifdef CONFIG_X86_64 + if (disable_apic) { + printk(KERN_INFO "Apic disabled\n"); + return -1; + } + if (!cpu_has_apic) { + disable_apic = 1; + printk(KERN_INFO "Apic disabled by BIOS\n"); + return -1; + } +#else + if (!smp_found_config && !cpu_has_apic) + return -1; + + /* + * Complain if the BIOS pretends there is one. + */ + if (!cpu_has_apic && + APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); + return -1; + } +#endif + +#ifdef HAVE_X2APIC + enable_IR_x2apic(); +#endif +#ifdef CONFIG_X86_64 + setup_apic_routing(); +#endif + + verify_local_APIC(); + connect_bsp_APIC(); + +#ifdef CONFIG_X86_64 + apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); +#else + /* + * Hack: In case of kdump, after a crash, kernel might be booting + * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid + * might be zero if read from MP tables. Get it from LAPIC. + */ +# ifdef CONFIG_CRASH_DUMP + boot_cpu_physical_apicid = read_apic_id(); +# endif +#endif + physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); + setup_local_APIC(); + +#ifdef CONFIG_X86_64 + /* + * Now enable IO-APICs, actually call clear_IO_APIC + * We need clear_IO_APIC before enabling vector on BP + */ + if (!skip_ioapic_setup && nr_ioapics) + enable_IO_APIC(); +#endif + +#ifdef CONFIG_X86_IO_APIC + if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) +#endif + localise_nmi_watchdog(); + end_local_APIC_setup(); + +#ifdef CONFIG_X86_IO_APIC + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +# ifdef CONFIG_X86_64 + else + nr_ioapics = 0; +# endif +#endif + +#ifdef CONFIG_X86_64 + setup_boot_APIC_clock(); + check_nmi_watchdog(); +#else + setup_boot_clock(); +#endif + + return 0; +} + +/* + * Local APIC interrupts + */ + +/* + * This interrupt should _never_ happen with our APIC/SMP architecture + */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_spurious_interrupt(void) +#else +void smp_spurious_interrupt(struct pt_regs *regs) +#endif +{ + u32 v; + +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + /* + * Check if this really is a spurious interrupt and ACK it + * if it is a vectored one. Just in case... + * Spurious interrupts should not be ACKed. + */ + v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); + if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) + ack_APIC_irq(); + +#ifdef CONFIG_X86_64 + add_pda(irq_spurious_count, 1); +#else + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ + printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " + "should never happen.\n", smp_processor_id()); + __get_cpu_var(irq_stat).irq_spurious_count++; +#endif + irq_exit(); +} + +/* + * This interrupt should never happen with our APIC/SMP architecture + */ +#ifdef CONFIG_X86_64 +asmlinkage void smp_error_interrupt(void) +#else +void smp_error_interrupt(struct pt_regs *regs) +#endif +{ + u32 v, v1; + +#ifdef CONFIG_X86_64 + exit_idle(); +#endif + irq_enter(); + /* First tickle the hardware, only then report what went on. -- REW */ + v = apic_read(APIC_ESR); + apic_write(APIC_ESR, 0); + v1 = apic_read(APIC_ESR); + ack_APIC_irq(); + atomic_inc(&irq_err_count); + + /* Here is what the APIC error bits mean: + 0: Send CS error + 1: Receive CS error + 2: Send accept error + 3: Receive accept error + 4: Reserved + 5: Send illegal vector + 6: Received illegal vector + 7: Illegal register address + */ + printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", + smp_processor_id(), v , v1); + irq_exit(); +} + +/** + * connect_bsp_APIC - attach the APIC to the interrupt system + */ +void __init connect_bsp_APIC(void) +{ +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + /* + * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's + * local APIC to INT and NMI lines. + */ + apic_printk(APIC_VERBOSE, "leaving PIC mode, " + "enabling APIC mode.\n"); + outb(0x70, 0x22); + outb(0x01, 0x23); + } +#endif + enable_apic_mode(); +} + +/** + * disconnect_bsp_APIC - detach the APIC from the interrupt system + * @virt_wire_setup: indicates, whether virtual wire mode is selected + * + * Virtual wire mode is necessary to deliver legacy interrupts even when the + * APIC is disabled. + */ +void disconnect_bsp_APIC(int virt_wire_setup) +{ + unsigned int value; + +#ifdef CONFIG_X86_32 + if (pic_mode) { + /* + * Put the board back into PIC mode (has an effect only on + * certain older boards). Note that APIC interrupts, including + * IPIs, won't work beyond this point! The only exception are + * INIT IPIs. + */ + apic_printk(APIC_VERBOSE, "disabling APIC mode, " + "entering PIC mode.\n"); + outb(0x70, 0x22); + outb(0x00, 0x23); + return; + } +#endif + + /* Go back to Virtual Wire compatibility mode */ + + /* For the spurious interrupt use vector F, and enable it */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + value |= 0xf; + apic_write(APIC_SPIV, value); + + if (!virt_wire_setup) { + /* + * For LVT0 make it edge triggered, active high, + * external and enabled + */ + value = apic_read(APIC_LVT0); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); + apic_write(APIC_LVT0, value); + } else { + /* Disable LVT0 */ + apic_write(APIC_LVT0, APIC_LVT_MASKED); + } + + /* + * For LVT1 make it edge triggered, active high, + * nmi and enabled + */ + value = apic_read(APIC_LVT1); + value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | + APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | + APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); + value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; + value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); + apic_write(APIC_LVT1, value); +} + +void __cpuinit generic_processor_info(int apicid, int version) +{ + int cpu; + cpumask_t tmp_map; + + /* + * Validate version + */ + if (version == 0x0) { + printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " + "fixing up to 0x10. (tell your hw vendor)\n", + version); + version = 0x10; + } + apic_version[apicid] = version; + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + + physid_set(apicid, phys_cpu_present_map); + if (apicid == boot_cpu_physical_apicid) { + /* + * x86_bios_cpu_apicid is required to have processors listed + * in same order as logical cpu numbers. Hence the first + * entry is BSP, and so on. + */ + cpu = 0; + } + if (apicid > max_physical_apicid) + max_physical_apicid = apicid; + +#ifdef CONFIG_X86_32 + /* + * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y + * but we need to work other dependencies like SMP_SUSPEND etc + * before this can be done without some confusion. + * if (CPU_HOTPLUG_ENABLED || num_processors > 8) + * - Ashok Raj + */ + if (max_physical_apicid >= 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } +#endif + +#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) + /* are we being called early in kernel startup? */ + if (early_per_cpu_ptr(x86_cpu_to_apicid)) { + u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); + u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + + cpu_to_apicid[cpu] = apicid; + bios_cpu_apicid[cpu] = apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = apicid; + per_cpu(x86_bios_cpu_apicid, cpu) = apicid; + } +#endif + + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); +} + +#ifdef CONFIG_X86_64 +int hard_smp_processor_id(void) +{ + return read_apic_id(); +} +#endif + +/* + * Power management + */ +#ifdef CONFIG_PM + +static struct { + /* + * 'active' is true if the local APIC was enabled by us and + * not the BIOS; this signifies that we are also responsible + * for disabling it before entering apm/acpi suspend + */ + int active; + /* r/w apic fields */ + unsigned int apic_id; + unsigned int apic_taskpri; + unsigned int apic_ldr; + unsigned int apic_dfr; + unsigned int apic_spiv; + unsigned int apic_lvtt; + unsigned int apic_lvtpc; + unsigned int apic_lvt0; + unsigned int apic_lvt1; + unsigned int apic_lvterr; + unsigned int apic_tmict; + unsigned int apic_tdcr; + unsigned int apic_thmr; +} apic_pm_state; + +static int lapic_suspend(struct sys_device *dev, pm_message_t state) +{ + unsigned long flags; + int maxlvt; + + if (!apic_pm_state.active) + return 0; + + maxlvt = lapic_get_maxlvt(); + + apic_pm_state.apic_id = apic_read(APIC_ID); + apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); + apic_pm_state.apic_ldr = apic_read(APIC_LDR); + apic_pm_state.apic_dfr = apic_read(APIC_DFR); + apic_pm_state.apic_spiv = apic_read(APIC_SPIV); + apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); + if (maxlvt >= 4) + apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); + apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); + apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); + apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); + apic_pm_state.apic_tmict = apic_read(APIC_TMICT); + apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) + if (maxlvt >= 5) + apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); +#endif + + local_irq_save(flags); + disable_local_APIC(); + local_irq_restore(flags); + return 0; +} + +static int lapic_resume(struct sys_device *dev) +{ + unsigned int l, h; + unsigned long flags; + int maxlvt; + + if (!apic_pm_state.active) + return 0; + + maxlvt = lapic_get_maxlvt(); + + local_irq_save(flags); + +#ifdef HAVE_X2APIC + if (x2apic) + enable_x2apic(); + else +#endif + { + /* + * Make sure the APICBASE points to the right address + * + * FIXME! This will be wrong if we ever support suspend on + * SMP! We'll need to do this as part of the CPU restore! + */ + rdmsr(MSR_IA32_APICBASE, l, h); + l &= ~MSR_IA32_APICBASE_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; + wrmsr(MSR_IA32_APICBASE, l, h); + } + + apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); + apic_write(APIC_ID, apic_pm_state.apic_id); + apic_write(APIC_DFR, apic_pm_state.apic_dfr); + apic_write(APIC_LDR, apic_pm_state.apic_ldr); + apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); + apic_write(APIC_SPIV, apic_pm_state.apic_spiv); + apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); + apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); +#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) + if (maxlvt >= 5) + apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); +#endif + if (maxlvt >= 4) + apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); + apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); + apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); + apic_write(APIC_TMICT, apic_pm_state.apic_tmict); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + + local_irq_restore(flags); + + return 0; +} + +/* + * This device has no shutdown method - fully functioning local APICs + * are needed on every CPU up until machine_halt/restart/poweroff. + */ + +static struct sysdev_class lapic_sysclass = { + .name = "lapic", + .resume = lapic_resume, + .suspend = lapic_suspend, +}; + +static struct sys_device device_lapic = { + .id = 0, + .cls = &lapic_sysclass, +}; + +static void __cpuinit apic_pm_activate(void) +{ + apic_pm_state.active = 1; +} + +static int __init init_lapic_sysfs(void) +{ + int error; + + if (!cpu_has_apic) + return 0; + /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ + + error = sysdev_class_register(&lapic_sysclass); + if (!error) + error = sysdev_register(&device_lapic); + return error; +} +device_initcall(init_lapic_sysfs); + +#else /* CONFIG_PM */ + +static void apic_pm_activate(void) { } + +#endif /* CONFIG_PM */ + +#ifdef CONFIG_X86_64 +/* + * apic_is_clustered_box() -- Check if we can expect good TSC + * + * Thus far, the major user of this is IBM's Summit2 series: + * + * Clustered boxes may have unsynced TSC problems if they are + * multi-chassis. Use available data to take a good guess. + * If in doubt, go HPET. + */ +__cpuinit int apic_is_clustered_box(void) +{ + int i, clusters, zeros; + unsigned id; + u16 *bios_cpu_apicid; + DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); + + /* + * there is not this kind of box with AMD CPU yet. + * Some AMD box with quadcore cpu and 8 sockets apicid + * will be [4, 0x23] or [8, 0x27] could be thought to + * vsmp box still need checking... + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) + return 0; + + bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); + bitmap_zero(clustermap, NUM_APIC_CLUSTERS); + + for (i = 0; i < NR_CPUS; i++) { + /* are we being called early in kernel startup? */ + if (bios_cpu_apicid) { + id = bios_cpu_apicid[i]; + } + else if (i < nr_cpu_ids) { + if (cpu_present(i)) + id = per_cpu(x86_bios_cpu_apicid, i); + else + continue; + } + else + break; + + if (id != BAD_APICID) + __set_bit(APIC_CLUSTERID(id), clustermap); + } + + /* Problem: Partially populated chassis may not have CPUs in some of + * the APIC clusters they have been allocated. Only present CPUs have + * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. + * Since clusters are allocated sequentially, count zeros only if + * they are bounded by ones. + */ + clusters = 0; + zeros = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { + if (test_bit(i, clustermap)) { + clusters += 1 + zeros; + zeros = 0; + } else + ++zeros; + } + + /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are + * not guaranteed to be synced between boards + */ + if (is_vsmp_box() && clusters > 1) + return 1; + + /* + * If clusters > 2, then should be multi-chassis. + * May have to revisit this when multi-core + hyperthreaded CPUs come + * out, but AFAIK this will work even for them. + */ + return (clusters > 2); +} +#endif + +/* + * APIC command line parameters + */ +static int __init setup_disableapic(char *arg) +{ + disable_apic = 1; + setup_clear_cpu_cap(X86_FEATURE_APIC); + return 0; +} +early_param("disableapic", setup_disableapic); + +/* same as disableapic, for compatibility */ +static int __init setup_nolapic(char *arg) +{ + return setup_disableapic(arg); +} +early_param("nolapic", setup_nolapic); + +static int __init parse_lapic_timer_c2_ok(char *arg) +{ + local_apic_timer_c2_ok = 1; + return 0; +} +early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); + +static int __init parse_disable_apic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("noapictimer", parse_disable_apic_timer); + +static int __init parse_nolapic_timer(char *arg) +{ + disable_apic_timer = 1; + return 0; +} +early_param("nolapic_timer", parse_nolapic_timer); + +static int __init apic_set_verbosity(char *arg) +{ + if (!arg) { +#ifdef CONFIG_X86_64 + skip_ioapic_setup = 0; + return 0; +#endif + return -EINVAL; + } + + if (strcmp("debug", arg) == 0) + apic_verbosity = APIC_DEBUG; + else if (strcmp("verbose", arg) == 0) + apic_verbosity = APIC_VERBOSE; + else { + printk(KERN_WARNING "APIC Verbosity level %s not recognised" + " use apic=verbose or apic=debug\n", arg); + return -EINVAL; + } + + return 0; +} +early_param("apic", apic_set_verbosity); + +static int __init lapic_insert_resource(void) +{ + if (!apic_phys) + return -1; + + /* Put local APIC into the resource map. */ + lapic_resource.start = apic_phys; + lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; + insert_resource(&iomem_resource, &lapic_resource); + + return 0; +} + +/* + * need call insert after e820_reserve_resources() + * that is using request_resource + */ +late_initcall(lapic_insert_resource); diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c deleted file mode 100644 index 0b11d6e3f09..00000000000 --- a/arch/x86/kernel/apic_32.c +++ /dev/null @@ -1,2312 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - -#ifdef CONFIG_X86_32 -/* - * Knob to control our willingness to enable the local APIC. - * - * +1=force-enable - */ -static int force_enable_local_apic; -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -#endif - -#ifdef CONFIG_X86_64 -static int apic_calibrate_pmtmr __initdata; -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - -#ifdef CONFIG_X86_64 -#define HAVE_X2APIC -#endif - -#ifdef HAVE_X2APIC -int x2apic; -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - -unsigned long mp_lapic_addr; -int disable_apic; -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -/* - * Debug level, exported for io_apic.c - */ -unsigned int apic_verbosity; - -int pic_mode; - -/* Have we found an MP table */ -int smp_found_config; - -static struct resource lapic_resource = { - .name = "Local APIC", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -static unsigned long apic_phys; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a separate chip - */ -static inline int lapic_is_integrated(void) -{ -#ifdef CONFIG_X86_64 - return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 safe_xapic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -#ifdef HAVE_X2APIC -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; -#endif - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void __cpuinit enable_NMI_through_LVT0(void) -{ - unsigned int v; - - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - - apic_write(APIC_LVT0, v); -} - -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v; - - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else -#define APIC_DIVISOR 16 -#endif - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -} - -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - * - * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} -EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void __cpuinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -#ifdef CONFIG_X86_64 -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -#else -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -static int __init calibrate_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltapm; - int pm_referenced = 0; - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to 1e9. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(1000000000, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); - - if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - local_irq_enable(); - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - - /* We trust the pm timer based calibration */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - local_irq_enable(); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - levt->features |= CLOCK_EVT_FEAT_DUMMY; - } else - local_irq_enable(); - - if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - printk(KERN_WARNING - "APIC timer disabled due to verification failure.\n"); - return -1; - } - - return 0; -} - -#endif - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - if (calibrate_APIC_clock()) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - /* - * the NMI deadlock-detector uses this. - */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt; - u32 v; - - /* APIC hasn't been mapped yet */ - if (!apic_phys) - return; - - maxlvt = lapic_get_maxlvt(); - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned int value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned int value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); -} - -static void __cpuinit lapic_setup_esr(void) -{ - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); - } -} - - -/** - * setup_local_APIC - setup the local APIC - */ -void __cpuinit setup_local_APIC(void) -{ - unsigned int value; - int i, j; - -#ifdef CONFIG_X86_32 - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } -#endif - - preempt_disable(); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 1) || - (boot_cpu_data.x86 == 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (!force_enable_local_apic) { - printk(KERN_INFO "Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk(KERN_INFO - "Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk(KERN_WARNING "Could not enable APIC!\n"); - return -1; - } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - printk(KERN_INFO "Found and enabled local APIC!\n"); - - apic_pm_activate(); - - return 0; - -no_apic: - printk(KERN_INFO "No local APIC present or hardware disabled\n"); - return -1; -} -#endif - -#ifdef CONFIG_X86_64 -void __init early_init_lapic_mapping(void) -{ - unsigned long phys_addr; - - /* - * If no local APIC can be found then go out - * : it means there is no mpatable and MADT - */ - if (!smp_found_config) - return; - - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} -#endif - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ -#ifdef HAVE_X2APIC - if (x2apic) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } -#endif - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int apic_version[MAX_APICS]; - -int __init APIC_init_uniprocessor(void) -{ -#ifdef CONFIG_X86_64 - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; - } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - return -1; - } -#endif - -#ifdef HAVE_X2APIC - enable_IR_x2apic(); -#endif -#ifdef CONFIG_X86_64 - setup_apic_routing(); -#endif - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_64 - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - -#ifdef CONFIG_X86_IO_APIC - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) -#endif - localise_nmi_watchdog(); - end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -# ifdef CONFIG_X86_64 - else - nr_ioapics = 0; -# endif -#endif - -#ifdef CONFIG_X86_64 - setup_boot_APIC_clock(); - check_nmi_watchdog(); -#else - setup_boot_clock(); -#endif - - return 0; -} - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_spurious_interrupt(void) -#else -void smp_spurious_interrupt(struct pt_regs *regs) -#endif -{ - u32 v; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - -#ifdef CONFIG_X86_64 - add_pda(irq_spurious_count, 1); -#else - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - __get_cpu_var(irq_stat).irq_spurious_count++; -#endif - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_error_interrupt(void) -#else -void smp_error_interrupt(struct pt_regs *regs) -#endif -{ - u32 v, v1; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -#endif - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - return; - } -#endif - - /* Go back to Virtual Wire compatibility mode */ - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); -} - -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - } - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } -#endif - - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - -#ifdef CONFIG_X86_64 -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} -#endif - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - -#ifdef HAVE_X2APIC - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - local_irq_restore(flags); - - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - .name = "lapic", - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -#ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < NR_CPUS; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } - else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } - else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) - return 1; - - /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. - */ - return (clusters > 2); -} -#endif - -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) -{ - disable_apic = 1; - setup_clear_cpu_cap(X86_FEATURE_APIC); - return 0; -} -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} -early_param("nolapic", setup_nolapic); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - ioapic_force = 1; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - -static int __init lapic_insert_resource(void) -{ - if (!apic_phys) - return -1; - - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - - return 0; -} - -/* - * need call insert after e820_reserve_resources() - * that is using request_resource - */ -late_initcall(lapic_insert_resource); diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c deleted file mode 100644 index 6e99af5ce67..00000000000 --- a/arch/x86/kernel/apic_64.c +++ /dev/null @@ -1,2311 +0,0 @@ -/* - * Local APIC handling, local APIC timers - * - * (c) 1999, 2000 Ingo Molnar - * - * Fixes - * Maciej W. Rozycki : Bits for genuine 82489DX APICs; - * thanks to Eric Gilmore - * and Rolf G. Tews - * for testing these extensively. - * Maciej W. Rozycki : Various updates and fixes. - * Mikael Pettersson : Power Management for UP-APIC. - * Pavel Machek and - * Mikael Pettersson : PM converted to driver model. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* - * Sanity check - */ -#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) -# error SPURIOUS_APIC_VECTOR definition error -#endif - -#ifdef CONFIG_X86_32 -/* - * Knob to control our willingness to enable the local APIC. - * - * +1=force-enable - */ -static int force_enable_local_apic; -/* - * APIC command line parameters - */ -static int __init parse_lapic(char *arg) -{ - force_enable_local_apic = 1; - return 0; -} -early_param("lapic", parse_lapic); -/* Local APIC was disabled by the BIOS and enabled by the kernel */ -static int enabled_via_apicbase; - -#endif - -#ifdef CONFIG_X86_64 -static int apic_calibrate_pmtmr __initdata; -static __init int setup_apicpmtimer(char *s) -{ - apic_calibrate_pmtmr = 1; - notsc_setup(NULL); - return 0; -} -__setup("apicpmtimer", setup_apicpmtimer); -#endif - -#ifdef CONFIG_X86_64 -#define HAVE_X2APIC -#endif - -#ifdef HAVE_X2APIC -int x2apic; -/* x2apic enabled before OS handover */ -int x2apic_preenabled; -int disable_x2apic; -static __init int setup_nox2apic(char *str) -{ - disable_x2apic = 1; - setup_clear_cpu_cap(X86_FEATURE_X2APIC); - return 0; -} -early_param("nox2apic", setup_nox2apic); -#endif - -unsigned long mp_lapic_addr; -int disable_apic; -/* Disable local APIC timer from the kernel commandline or via dmi quirk */ -static int disable_apic_timer __cpuinitdata; -/* Local APIC timer works in C2 */ -int local_apic_timer_c2_ok; -EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); - -int first_system_vector = 0xfe; - -char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; - -/* - * Debug level, exported for io_apic.c - */ -unsigned int apic_verbosity; - -int pic_mode; - -/* Have we found an MP table */ -int smp_found_config; - -static struct resource lapic_resource = { - .name = "Local APIC", - .flags = IORESOURCE_MEM | IORESOURCE_BUSY, -}; - -static unsigned int calibration_result; - -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt); -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt); -static void lapic_timer_broadcast(cpumask_t mask); -static void apic_pm_activate(void); - -/* - * The local apic timer can be used for any function which is CPU local. - */ -static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT - | CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_mode = lapic_timer_setup, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, -}; -static DEFINE_PER_CPU(struct clock_event_device, lapic_events); - -static unsigned long apic_phys; - -/* - * Get the LAPIC version - */ -static inline int lapic_get_version(void) -{ - return GET_APIC_VERSION(apic_read(APIC_LVR)); -} - -/* - * Check, if the APIC is integrated or a separate chip - */ -static inline int lapic_is_integrated(void) -{ -#ifdef CONFIG_X86_64 - return 1; -#else - return APIC_INTEGRATED(lapic_get_version()); -#endif -} - -/* - * Check, whether this is a modern or a first generation APIC - */ -static int modern_apic(void) -{ - /* AMD systems use old APIC versions, so check the CPU */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && - boot_cpu_data.x86 >= 0xf) - return 1; - return lapic_get_version() >= 0x14; -} - -/* - * Paravirt kernels also might be using these below ops. So we still - * use generic apic_read()/apic_write(), which might be pointing to different - * ops in PARAVIRT case. - */ -void xapic_wait_icr_idle(void) -{ - while (apic_read(APIC_ICR) & APIC_ICR_BUSY) - cpu_relax(); -} - -u32 safe_xapic_wait_icr_idle(void) -{ - u32 send_status; - int timeout; - - timeout = 0; - do { - send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; - if (!send_status) - break; - udelay(100); - } while (timeout++ < 1000); - - return send_status; -} - -void xapic_icr_write(u32 low, u32 id) -{ - apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); - apic_write(APIC_ICR, low); -} - -u64 xapic_icr_read(void) -{ - u32 icr1, icr2; - - icr2 = apic_read(APIC_ICR2); - icr1 = apic_read(APIC_ICR); - - return icr1 | ((u64)icr2 << 32); -} - -static struct apic_ops xapic_ops = { - .read = native_apic_mem_read, - .write = native_apic_mem_write, - .icr_read = xapic_icr_read, - .icr_write = xapic_icr_write, - .wait_icr_idle = xapic_wait_icr_idle, - .safe_wait_icr_idle = safe_xapic_wait_icr_idle, -}; - -struct apic_ops __read_mostly *apic_ops = &xapic_ops; -EXPORT_SYMBOL_GPL(apic_ops); - -#ifdef HAVE_X2APIC -static void x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return; -} - -static u32 safe_x2apic_wait_icr_idle(void) -{ - /* no need to wait for icr idle in x2apic */ - return 0; -} - -void x2apic_icr_write(u32 low, u32 id) -{ - wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); -} - -u64 x2apic_icr_read(void) -{ - unsigned long val; - - rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val); - return val; -} - -static struct apic_ops x2apic_ops = { - .read = native_apic_msr_read, - .write = native_apic_msr_write, - .icr_read = x2apic_icr_read, - .icr_write = x2apic_icr_write, - .wait_icr_idle = x2apic_wait_icr_idle, - .safe_wait_icr_idle = safe_x2apic_wait_icr_idle, -}; -#endif - -/** - * enable_NMI_through_LVT0 - enable NMI through local vector table 0 - */ -void __cpuinit enable_NMI_through_LVT0(void) -{ - unsigned int v; - - /* unmask and set to NMI */ - v = APIC_DM_NMI; - - /* Level triggered for 82489DX (32bit mode) */ - if (!lapic_is_integrated()) - v |= APIC_LVT_LEVEL_TRIGGER; - - apic_write(APIC_LVT0, v); -} - -#ifdef CONFIG_X86_32 -/** - * get_physical_broadcast - Get number of physical broadcast IDs - */ -int get_physical_broadcast(void) -{ - return modern_apic() ? 0xff : 0xf; -} -#endif - -/** - * lapic_get_maxlvt - get the maximum number of local vector table entries - */ -int lapic_get_maxlvt(void) -{ - unsigned int v; - - v = apic_read(APIC_LVR); - /* - * - we always have APIC integrated on 64bit mode - * - 82489DXs do not report # of LVT entries - */ - return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; -} - -/* - * Local APIC timer - */ - -/* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else -#define APIC_DIVISOR 16 -#endif - -/* - * This function sets up the local APIC timer, with a timeout of - * 'clocks' APIC bus clock. During calibration we actually call - * this function twice on the boot CPU, once with a bogus timeout - * value, second time for real. The other (noncalibrating) CPUs - * call this function only once, with the real, calibrated value. - * - * We do reads before writes even if unnecessary, to get around the - * P5 APIC double write bug. - */ -static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) -{ - unsigned int lvtt_value, tmp_value; - - lvtt_value = LOCAL_TIMER_VECTOR; - if (!oneshot) - lvtt_value |= APIC_LVT_TIMER_PERIODIC; - if (!lapic_is_integrated()) - lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV); - - if (!irqen) - lvtt_value |= APIC_LVT_MASKED; - - apic_write(APIC_LVTT, lvtt_value); - - /* - * Divide PICLK by 16 - */ - tmp_value = apic_read(APIC_TDCR); - apic_write(APIC_TDCR, - (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | - APIC_TDR_DIV_16); - - if (!oneshot) - apic_write(APIC_TMICT, clocks / APIC_DIVISOR); -} - -/* - * Setup extended LVT, AMD specific (K8, family 10h) - * - * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and - * MCE interrupts are supported. Thus MCE offset must be set to 0. - * - * If mask=1, the LVT entry does not generate interrupts while mask=0 - * enables the vector. See also the BKDGs. - */ - -#define APIC_EILVT_LVTOFF_MCE 0 -#define APIC_EILVT_LVTOFF_IBS 1 - -static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) -{ - unsigned long reg = (lvt_off << 4) + APIC_EILVT0; - unsigned int v = (mask << 16) | (msg_type << 8) | vector; - - apic_write(reg, v); -} - -u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_MCE; -} - -u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask) -{ - setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask); - return APIC_EILVT_LVTOFF_IBS; -} -EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs); - -/* - * Program the next event, relative to now - */ -static int lapic_next_event(unsigned long delta, - struct clock_event_device *evt) -{ - apic_write(APIC_TMICT, delta); - return 0; -} - -/* - * Setup the lapic timer in periodic or oneshot mode - */ -static void lapic_timer_setup(enum clock_event_mode mode, - struct clock_event_device *evt) -{ - unsigned long flags; - unsigned int v; - - /* Lapic used as dummy for broadcast ? */ - if (evt->features & CLOCK_EVT_FEAT_DUMMY) - return; - - local_irq_save(flags); - - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - case CLOCK_EVT_MODE_ONESHOT: - __setup_APIC_LVTT(calibration_result, - mode != CLOCK_EVT_MODE_PERIODIC, 1); - break; - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - v = apic_read(APIC_LVTT); - v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); - apic_write(APIC_LVTT, v); - break; - case CLOCK_EVT_MODE_RESUME: - /* Nothing to do here */ - break; - } - - local_irq_restore(flags); -} - -/* - * Local APIC timer broadcast function - */ -static void lapic_timer_broadcast(cpumask_t mask) -{ -#ifdef CONFIG_SMP - send_IPI_mask(mask, LOCAL_TIMER_VECTOR); -#endif -} - -/* - * Setup the local APIC timer for this CPU. Copy the initilized values - * of the boot CPU and register the clock event in the framework. - */ -static void __cpuinit setup_APIC_timer(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - - memcpy(levt, &lapic_clockevent, sizeof(*levt)); - levt->cpumask = cpumask_of_cpu(smp_processor_id()); - - clockevents_register_device(levt); -} - -#ifdef CONFIG_X86_64 -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -#else -/* - * In this functions we calibrate APIC bus clocks to the external timer. - * - * We want to do the calibration only once since we want to have local timer - * irqs syncron. CPUs connected by the same APIC bus have the very same bus - * frequency. - * - * This was previously done by reading the PIT/HPET and waiting for a wrap - * around to find out, that a tick has elapsed. I have a box, where the PIT - * readout is broken, so it never gets out of the wait loop again. This was - * also reported by others. - * - * Monitoring the jiffies value is inaccurate and the clockevents - * infrastructure allows us to do a simple substitution of the interrupt - * handler. - * - * The calibration routine also uses the pm_timer when possible, as the PIT - * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes - * back to normal later in the boot process). - */ - -#define LAPIC_CAL_LOOPS (HZ/10) - -static __initdata int lapic_cal_loops = -1; -static __initdata long lapic_cal_t1, lapic_cal_t2; -static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2; -static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2; -static __initdata unsigned long lapic_cal_j1, lapic_cal_j2; - -/* - * Temporary interrupt handler. - */ -static void __init lapic_cal_handler(struct clock_event_device *dev) -{ - unsigned long long tsc = 0; - long tapic = apic_read(APIC_TMCCT); - unsigned long pm = acpi_pm_read_early(); - - if (cpu_has_tsc) - rdtscll(tsc); - - switch (lapic_cal_loops++) { - case 0: - lapic_cal_t1 = tapic; - lapic_cal_tsc1 = tsc; - lapic_cal_pm1 = pm; - lapic_cal_j1 = jiffies; - break; - - case LAPIC_CAL_LOOPS: - lapic_cal_t2 = tapic; - lapic_cal_tsc2 = tsc; - if (pm < lapic_cal_pm1) - pm += ACPI_PM_OVRRUN; - lapic_cal_pm2 = pm; - lapic_cal_j2 = jiffies; - break; - } -} - -static int __init calibrate_APIC_clock(void) -{ - struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; - void (*real_handler)(struct clock_event_device *dev); - unsigned long deltaj; - long delta, deltapm; - int pm_referenced = 0; - - local_irq_disable(); - - /* Replace the global interrupt handler */ - real_handler = global_clock_event->event_handler; - global_clock_event->event_handler = lapic_cal_handler; - - /* - * Setup the APIC counter to 1e9. There is no way the lapic - * can underflow in the 100ms detection time frame - */ - __setup_APIC_LVTT(1000000000, 0, 0); - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Restore the real event handler */ - global_clock_event->event_handler = real_handler; - - /* Build delta t1-t2 as apic timer counts down */ - delta = lapic_cal_t1 - lapic_cal_t2; - apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); - - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; - - apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); - apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); - apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - calibration_result); - - if (cpu_has_tsc) { - delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1); - apic_printk(APIC_VERBOSE, "..... CPU clock speed is " - "%ld.%04ld MHz.\n", - (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), - (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); - } - - apic_printk(APIC_VERBOSE, "..... host bus clock speed is " - "%u.%04u MHz.\n", - calibration_result / (1000000 / HZ), - calibration_result % (1000000 / HZ)); - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - local_irq_enable(); - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - - /* We trust the pm timer based calibration */ - if (!pm_referenced) { - apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); - - /* - * Setup the apic timer manually - */ - levt->event_handler = lapic_cal_handler; - lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt); - lapic_cal_loops = -1; - - /* Let the interrupts run */ - local_irq_enable(); - - while (lapic_cal_loops <= LAPIC_CAL_LOOPS) - cpu_relax(); - - local_irq_disable(); - - /* Stop the lapic timer */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); - - local_irq_enable(); - - /* Jiffies delta */ - deltaj = lapic_cal_j2 - lapic_cal_j1; - apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); - - /* Check, if the jiffies result is consistent */ - if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) - apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); - else - levt->features |= CLOCK_EVT_FEAT_DUMMY; - } else - local_irq_enable(); - - if (levt->features & CLOCK_EVT_FEAT_DUMMY) { - printk(KERN_WARNING - "APIC timer disabled due to verification failure.\n"); - return -1; - } - - return 0; -} - -#endif - -/* - * Setup the boot APIC - * - * Calibrate and verify the result. - */ -void __init setup_boot_APIC_clock(void) -{ - /* - * The local apic timer can be disabled via the kernel - * commandline or from the CPU detection code. Register the lapic - * timer as a dummy clock event source on SMP systems, so the - * broadcast mechanism is used. On UP systems simply ignore it. - */ - if (disable_apic_timer) { - printk(KERN_INFO "Disabling APIC timer\n"); - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) { - lapic_clockevent.mult = 1; - setup_APIC_timer(); - } - return; - } - - apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" - "calibrating APIC timer ...\n"); - - if (calibrate_APIC_clock()) { - /* No broadcast on UP ! */ - if (num_possible_cpus() > 1) - setup_APIC_timer(); - return; - } - - /* - * If nmi_watchdog is set to IO_APIC, we need the - * PIT/HPET going. Otherwise register lapic as a dummy - * device. - */ - if (nmi_watchdog != NMI_IO_APIC) - lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; - else - printk(KERN_WARNING "APIC timer registered as dummy," - " due to nmi_watchdog=%d!\n", nmi_watchdog); - - /* Setup the lapic or request the broadcast */ - setup_APIC_timer(); -} - -void __cpuinit setup_secondary_APIC_clock(void) -{ - setup_APIC_timer(); -} - -/* - * The guts of the apic timer interrupt - */ -static void local_apic_timer_interrupt(void) -{ - int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(lapic_events, cpu); - - /* - * Normally we should not be here till LAPIC has been initialized but - * in some cases like kdump, its possible that there is a pending LAPIC - * timer interrupt from previous kernel's context and is delivered in - * new kernel the moment interrupts are enabled. - * - * Interrupts are enabled early and LAPIC is setup much later, hence - * its possible that when we get here evt->event_handler is NULL. - * Check for event_handler being NULL and discard the interrupt as - * spurious. - */ - if (!evt->event_handler) { - printk(KERN_WARNING - "Spurious LAPIC timer interrupt on cpu %d\n", cpu); - /* Switch it off */ - lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); - return; - } - - /* - * the NMI deadlock-detector uses this. - */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif - - evt->event_handler(evt); -} - -/* - * Local APIC timer interrupt. This is the most natural way for doing - * local interrupts, but local timer interrupts can be emulated by - * broadcast interrupts too. [in case the hw doesn't support APIC timers] - * - * [ if a single-CPU system runs an SMP kernel then we call the local - * interrupt as well. Thus we cannot inline the local irq ... ] - */ -void smp_apic_timer_interrupt(struct pt_regs *regs) -{ - struct pt_regs *old_regs = set_irq_regs(regs); - - /* - * NOTE! We'd better ACK the irq immediately, - * because timer handling can be slow. - */ - ack_APIC_irq(); - /* - * update_process_times() expects us to have done irq_enter(). - * Besides, if we don't timer interrupts ignore the global - * interrupt lock, which is the WrongThing (tm) to do. - */ -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - local_apic_timer_interrupt(); - irq_exit(); - - set_irq_regs(old_regs); -} - -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} - -/* - * Local APIC start and shutdown - */ - -/** - * clear_local_APIC - shutdown the local APIC - * - * This is called, when a CPU is disabled and before rebooting, so the state of - * the local APIC has no dangling leftovers. Also used to cleanout any BIOS - * leftovers during boot. - */ -void clear_local_APIC(void) -{ - int maxlvt; - u32 v; - - /* APIC hasn't been mapped yet */ - if (!apic_phys) - return; - - maxlvt = lapic_get_maxlvt(); - /* - * Masking an LVT entry can trigger a local APIC error - * if the vector is zero. Mask LVTERR first to prevent this. - */ - if (maxlvt >= 3) { - v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ - apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); - } - /* - * Careful: we have to set masks only first to deassert - * any level-triggered sources. - */ - v = apic_read(APIC_LVTT); - apic_write(APIC_LVTT, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT0); - apic_write(APIC_LVT0, v | APIC_LVT_MASKED); - v = apic_read(APIC_LVT1); - apic_write(APIC_LVT1, v | APIC_LVT_MASKED); - if (maxlvt >= 4) { - v = apic_read(APIC_LVTPC); - apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); - } - - /* lets not touch this if we didn't frob it */ -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL) - if (maxlvt >= 5) { - v = apic_read(APIC_LVTTHMR); - apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); - } -#endif - /* - * Clean APIC state for other OSs: - */ - apic_write(APIC_LVTT, APIC_LVT_MASKED); - apic_write(APIC_LVT0, APIC_LVT_MASKED); - apic_write(APIC_LVT1, APIC_LVT_MASKED); - if (maxlvt >= 3) - apic_write(APIC_LVTERR, APIC_LVT_MASKED); - if (maxlvt >= 4) - apic_write(APIC_LVTPC, APIC_LVT_MASKED); - - /* Integrated APIC (!82489DX) ? */ - if (lapic_is_integrated()) { - if (maxlvt > 3) - /* Clear ESR due to Pentium errata 3AP and 11AP */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - } -} - -/** - * disable_local_APIC - clear and disable the local APIC - */ -void disable_local_APIC(void) -{ - unsigned int value; - - clear_local_APIC(); - - /* - * Disable APIC (implies clearing of registers - * for 82489DX!). - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* - * When LAPIC was disabled by the BIOS and enabled by the kernel, - * restore the disabled state. - */ - if (enabled_via_apicbase) { - unsigned int l, h; - - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_ENABLE; - wrmsr(MSR_IA32_APICBASE, l, h); - } -#endif -} - -/* - * If Linux enabled the LAPIC against the BIOS default disable it down before - * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and - * not power-off. Additionally clear all LVT entries before disable_local_APIC - * for the case where Linux didn't enable the LAPIC. - */ -void lapic_shutdown(void) -{ - unsigned long flags; - - if (!cpu_has_apic) - return; - - local_irq_save(flags); - -#ifdef CONFIG_X86_32 - if (!enabled_via_apicbase) - clear_local_APIC(); - else -#endif - disable_local_APIC(); - - - local_irq_restore(flags); -} - -/* - * This is to verify that we're looking at a real local APIC. - * Check these against your board if the CPUs aren't getting - * started for no apparent reason. - */ -int __init verify_local_APIC(void) -{ - unsigned int reg0, reg1; - - /* - * The version register is read-only in a real APIC. - */ - reg0 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0); - apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK); - reg1 = apic_read(APIC_LVR); - apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1); - - /* - * The two version reads above should print the same - * numbers. If the second one is different, then we - * poke at a non-APIC. - */ - if (reg1 != reg0) - return 0; - - /* - * Check if the version looks reasonably. - */ - reg1 = GET_APIC_VERSION(reg0); - if (reg1 == 0x00 || reg1 == 0xff) - return 0; - reg1 = lapic_get_maxlvt(); - if (reg1 < 0x02 || reg1 == 0xff) - return 0; - - /* - * The ID register is read/write in a real APIC. - */ - reg0 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); - apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); - reg1 = apic_read(APIC_ID); - apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); - apic_write(APIC_ID, reg0); - if (reg1 != (reg0 ^ APIC_ID_MASK)) - return 0; - - /* - * The next two are just to see if we have sane values. - * They're only really relevant if we're in Virtual Wire - * compatibility mode, but most boxes are anymore. - */ - reg0 = apic_read(APIC_LVT0); - apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0); - reg1 = apic_read(APIC_LVT1); - apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1); - - return 1; -} - -/** - * sync_Arb_IDs - synchronize APIC bus arbitration IDs - */ -void __init sync_Arb_IDs(void) -{ - /* - * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not - * needed on AMD. - */ - if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return; - - /* - * Wait for idle. - */ - apic_wait_icr_idle(); - - apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); - apic_write(APIC_ICR, APIC_DEST_ALLINC | - APIC_INT_LEVELTRIG | APIC_DM_INIT); -} - -/* - * An initial setup of the virtual wire mode. - */ -void __init init_bsp_APIC(void) -{ - unsigned int value; - - /* - * Don't do the setup now if we have a SMP BIOS as the - * through-I/O-APIC virtual wire mode might be active. - */ - if (smp_found_config || !cpu_has_apic) - return; - - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - - /* - * Enable APIC. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - -#ifdef CONFIG_X86_32 - /* This bit is reserved on P4/Xeon and should be cleared */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - (boot_cpu_data.x86 == 15)) - value &= ~APIC_SPIV_FOCUS_DISABLED; - else -#endif - value |= APIC_SPIV_FOCUS_DISABLED; - value |= SPURIOUS_APIC_VECTOR; - apic_write(APIC_SPIV, value); - - /* - * Set up the virtual wire mode. - */ - apic_write(APIC_LVT0, APIC_DM_EXTINT); - value = APIC_DM_NMI; - if (!lapic_is_integrated()) /* 82489DX */ - value |= APIC_LVT_LEVEL_TRIGGER; - apic_write(APIC_LVT1, value); -} - -static void __cpuinit lapic_setup_esr(void) -{ - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); - - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); - /* - * spec says clear errors after enabling vector. - */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); - } -} - - -/** - * setup_local_APIC - setup the local APIC - */ -void __cpuinit setup_local_APIC(void) -{ - unsigned int value; - int i, j; - -#ifdef CONFIG_X86_32 - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } -#endif - - preempt_disable(); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - if (!apic_id_registered()) - BUG(); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - init_apic_ldr(); - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - - /* - * After a crash, we no longer service the interrupts and a pending - * interrupt from previous kernel might still have ISR bit set. - * - * Most probably by now CPU has serviced that pending interrupt and - * it might not have done the ack_APIC_irq() because it thought, - * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it - * does not clear the ISR bit and cpu thinks it has already serivced - * the interrupt. Hence a vector might get locked. It was noticed - * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. - */ - for (i = APIC_ISR_NR - 1; i >= 0; i--) { - value = apic_read(APIC_ISR + i*0x10); - for (j = 31; j >= 0; j--) { - if (value & (1< 1) || - (boot_cpu_data.x86 == 15)) - break; - goto no_apic; - case X86_VENDOR_INTEL: - if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 || - (boot_cpu_data.x86 == 5 && cpu_has_apic)) - break; - goto no_apic; - default: - goto no_apic; - } - - if (!cpu_has_apic) { - /* - * Over-ride BIOS and try to enable the local APIC only if - * "lapic" specified. - */ - if (!force_enable_local_apic) { - printk(KERN_INFO "Local APIC disabled by BIOS -- " - "you can enable it with \"lapic\"\n"); - return -1; - } - /* - * Some BIOSes disable the local APIC in the APIC_BASE - * MSR. This can only be done in software for Intel P6 or later - * and AMD K7 (Model > 1) or later. - */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (!(l & MSR_IA32_APICBASE_ENABLE)) { - printk(KERN_INFO - "Local APIC disabled by BIOS -- reenabling.\n"); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; - wrmsr(MSR_IA32_APICBASE, l, h); - enabled_via_apicbase = 1; - } - } - /* - * The APIC feature bit should now be enabled - * in `cpuid' - */ - features = cpuid_edx(1); - if (!(features & (1 << X86_FEATURE_APIC))) { - printk(KERN_WARNING "Could not enable APIC!\n"); - return -1; - } - set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; - - /* The BIOS may have set up the APIC at some other address */ - rdmsr(MSR_IA32_APICBASE, l, h); - if (l & MSR_IA32_APICBASE_ENABLE) - mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; - - printk(KERN_INFO "Found and enabled local APIC!\n"); - - apic_pm_activate(); - - return 0; - -no_apic: - printk(KERN_INFO "No local APIC present or hardware disabled\n"); - return -1; -} -#endif - -#ifdef CONFIG_X86_64 -void __init early_init_lapic_mapping(void) -{ - unsigned long phys_addr; - - /* - * If no local APIC can be found then go out - * : it means there is no mpatable and MADT - */ - if (!smp_found_config) - return; - - phys_addr = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, phys_addr); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, phys_addr); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - boot_cpu_physical_apicid = read_apic_id(); -} -#endif - -/** - * init_apic_mappings - initialize APIC mappings - */ -void __init init_apic_mappings(void) -{ -#ifdef HAVE_X2APIC - if (x2apic) { - boot_cpu_physical_apicid = read_apic_id(); - return; - } -#endif - - /* - * If no local APIC can be found then set up a fake all - * zeroes page to simulate the local APIC and another - * one for the IO-APIC. - */ - if (!smp_found_config && detect_init_APIC()) { - apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); - apic_phys = __pa(apic_phys); - } else - apic_phys = mp_lapic_addr; - - set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", - APIC_BASE, apic_phys); - - /* - * Fetch the APIC ID of the BSP in case we have a - * default configuration (or the MP table is broken). - */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = read_apic_id(); -} - -/* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. - */ -int apic_version[MAX_APICS]; - -int __init APIC_init_uniprocessor(void) -{ -#ifdef CONFIG_X86_64 - if (disable_apic) { - printk(KERN_INFO "Apic disabled\n"); - return -1; - } - if (!cpu_has_apic) { - disable_apic = 1; - printk(KERN_INFO "Apic disabled by BIOS\n"); - return -1; - } -#else - if (!smp_found_config && !cpu_has_apic) - return -1; - - /* - * Complain if the BIOS pretends there is one. - */ - if (!cpu_has_apic && - APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); - return -1; - } -#endif - -#ifdef HAVE_X2APIC - enable_IR_x2apic(); -#endif -#ifdef CONFIG_X86_64 - setup_apic_routing(); -#endif - - verify_local_APIC(); - connect_bsp_APIC(); - -#ifdef CONFIG_X86_64 - apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); -#else - /* - * Hack: In case of kdump, after a crash, kernel might be booting - * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid - * might be zero if read from MP tables. Get it from LAPIC. - */ -# ifdef CONFIG_CRASH_DUMP - boot_cpu_physical_apicid = read_apic_id(); -# endif -#endif - physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); - setup_local_APIC(); - -#ifdef CONFIG_X86_64 - /* - * Now enable IO-APICs, actually call clear_IO_APIC - * We need clear_IO_APIC before enabling vector on BP - */ - if (!skip_ioapic_setup && nr_ioapics) - enable_IO_APIC(); -#endif - -#ifdef CONFIG_X86_IO_APIC - if (!smp_found_config || skip_ioapic_setup || !nr_ioapics) -#endif - localise_nmi_watchdog(); - end_local_APIC_setup(); - -#ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -# ifdef CONFIG_X86_64 - else - nr_ioapics = 0; -# endif -#endif - -#ifdef CONFIG_X86_64 - setup_boot_APIC_clock(); - check_nmi_watchdog(); -#else - setup_boot_clock(); -#endif - - return 0; -} - -/* - * Local APIC interrupts - */ - -/* - * This interrupt should _never_ happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_spurious_interrupt(void) -#else -void smp_spurious_interrupt(struct pt_regs *regs) -#endif -{ - u32 v; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* - * Check if this really is a spurious interrupt and ACK it - * if it is a vectored one. Just in case... - * Spurious interrupts should not be ACKed. - */ - v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1)); - if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) - ack_APIC_irq(); - -#ifdef CONFIG_X86_64 - add_pda(irq_spurious_count, 1); -#else - /* see sw-dev-man vol 3, chapter 7.4.13.5 */ - printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " - "should never happen.\n", smp_processor_id()); - __get_cpu_var(irq_stat).irq_spurious_count++; -#endif - irq_exit(); -} - -/* - * This interrupt should never happen with our APIC/SMP architecture - */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_error_interrupt(void) -#else -void smp_error_interrupt(struct pt_regs *regs) -#endif -{ - u32 v, v1; - -#ifdef CONFIG_X86_64 - exit_idle(); -#endif - irq_enter(); - /* First tickle the hardware, only then report what went on. -- REW */ - v = apic_read(APIC_ESR); - apic_write(APIC_ESR, 0); - v1 = apic_read(APIC_ESR); - ack_APIC_irq(); - atomic_inc(&irq_err_count); - - /* Here is what the APIC error bits mean: - 0: Send CS error - 1: Receive CS error - 2: Send accept error - 3: Receive accept error - 4: Reserved - 5: Send illegal vector - 6: Received illegal vector - 7: Illegal register address - */ - printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", - smp_processor_id(), v , v1); - irq_exit(); -} - -/** - * connect_bsp_APIC - attach the APIC to the interrupt system - */ -void __init connect_bsp_APIC(void) -{ -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Do not trust the local APIC being empty at bootup. - */ - clear_local_APIC(); - /* - * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's - * local APIC to INT and NMI lines. - */ - apic_printk(APIC_VERBOSE, "leaving PIC mode, " - "enabling APIC mode.\n"); - outb(0x70, 0x22); - outb(0x01, 0x23); - } -#endif - enable_apic_mode(); -} - -/** - * disconnect_bsp_APIC - detach the APIC from the interrupt system - * @virt_wire_setup: indicates, whether virtual wire mode is selected - * - * Virtual wire mode is necessary to deliver legacy interrupts even when the - * APIC is disabled. - */ -void disconnect_bsp_APIC(int virt_wire_setup) -{ - unsigned int value; - -#ifdef CONFIG_X86_32 - if (pic_mode) { - /* - * Put the board back into PIC mode (has an effect only on - * certain older boards). Note that APIC interrupts, including - * IPIs, won't work beyond this point! The only exception are - * INIT IPIs. - */ - apic_printk(APIC_VERBOSE, "disabling APIC mode, " - "entering PIC mode.\n"); - outb(0x70, 0x22); - outb(0x00, 0x23); - return; - } -#endif - - /* Go back to Virtual Wire compatibility mode */ - - /* For the spurious interrupt use vector F, and enable it */ - value = apic_read(APIC_SPIV); - value &= ~APIC_VECTOR_MASK; - value |= APIC_SPIV_APIC_ENABLED; - value |= 0xf; - apic_write(APIC_SPIV, value); - - if (!virt_wire_setup) { - /* - * For LVT0 make it edge triggered, active high, - * external and enabled - */ - value = apic_read(APIC_LVT0); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); - apic_write(APIC_LVT0, value); - } else { - /* Disable LVT0 */ - apic_write(APIC_LVT0, APIC_LVT_MASKED); - } - - /* - * For LVT1 make it edge triggered, active high, - * nmi and enabled - */ - value = apic_read(APIC_LVT1); - value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | - APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | - APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); - value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; - value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); - apic_write(APIC_LVT1, value); -} - -void __cpuinit generic_processor_info(int apicid, int version) -{ - int cpu; - cpumask_t tmp_map; - - /* - * Validate version - */ - if (version == 0x0) { - printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - num_processors++; - cpus_complement(tmp_map, cpu_present_map); - cpu = first_cpu(tmp_map); - - physid_set(apicid, phys_cpu_present_map); - if (apicid == boot_cpu_physical_apicid) { - /* - * x86_bios_cpu_apicid is required to have processors listed - * in same order as logical cpu numbers. Hence the first - * entry is BSP, and so on. - */ - cpu = 0; - } - if (apicid > max_physical_apicid) - max_physical_apicid = apicid; - -#ifdef CONFIG_X86_32 - /* - * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y - * but we need to work other dependencies like SMP_SUSPEND etc - * before this can be done without some confusion. - * if (CPU_HOTPLUG_ENABLED || num_processors > 8) - * - Ashok Raj - */ - if (max_physical_apicid >= 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - -#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) - /* are we being called early in kernel startup? */ - if (early_per_cpu_ptr(x86_cpu_to_apicid)) { - u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); - u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - - cpu_to_apicid[cpu] = apicid; - bios_cpu_apicid[cpu] = apicid; - } else { - per_cpu(x86_cpu_to_apicid, cpu) = apicid; - per_cpu(x86_bios_cpu_apicid, cpu) = apicid; - } -#endif - - cpu_set(cpu, cpu_possible_map); - cpu_set(cpu, cpu_present_map); -} - -#ifdef CONFIG_X86_64 -int hard_smp_processor_id(void) -{ - return read_apic_id(); -} -#endif - -/* - * Power management - */ -#ifdef CONFIG_PM - -static struct { - /* - * 'active' is true if the local APIC was enabled by us and - * not the BIOS; this signifies that we are also responsible - * for disabling it before entering apm/acpi suspend - */ - int active; - /* r/w apic fields */ - unsigned int apic_id; - unsigned int apic_taskpri; - unsigned int apic_ldr; - unsigned int apic_dfr; - unsigned int apic_spiv; - unsigned int apic_lvtt; - unsigned int apic_lvtpc; - unsigned int apic_lvt0; - unsigned int apic_lvt1; - unsigned int apic_lvterr; - unsigned int apic_tmict; - unsigned int apic_tdcr; - unsigned int apic_thmr; -} apic_pm_state; - -static int lapic_suspend(struct sys_device *dev, pm_message_t state) -{ - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - apic_pm_state.apic_id = apic_read(APIC_ID); - apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); - apic_pm_state.apic_ldr = apic_read(APIC_LDR); - apic_pm_state.apic_dfr = apic_read(APIC_DFR); - apic_pm_state.apic_spiv = apic_read(APIC_SPIV); - apic_pm_state.apic_lvtt = apic_read(APIC_LVTT); - if (maxlvt >= 4) - apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC); - apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0); - apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1); - apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); - apic_pm_state.apic_tmict = apic_read(APIC_TMICT); - apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); -#endif - - local_irq_save(flags); - disable_local_APIC(); - local_irq_restore(flags); - return 0; -} - -static int lapic_resume(struct sys_device *dev) -{ - unsigned int l, h; - unsigned long flags; - int maxlvt; - - if (!apic_pm_state.active) - return 0; - - maxlvt = lapic_get_maxlvt(); - - local_irq_save(flags); - -#ifdef HAVE_X2APIC - if (x2apic) - enable_x2apic(); - else -#endif - { - /* - * Make sure the APICBASE points to the right address - * - * FIXME! This will be wrong if we ever support suspend on - * SMP! We'll need to do this as part of the CPU restore! - */ - rdmsr(MSR_IA32_APICBASE, l, h); - l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; - wrmsr(MSR_IA32_APICBASE, l, h); - } - - apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); - apic_write(APIC_ID, apic_pm_state.apic_id); - apic_write(APIC_DFR, apic_pm_state.apic_dfr); - apic_write(APIC_LDR, apic_pm_state.apic_ldr); - apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri); - apic_write(APIC_SPIV, apic_pm_state.apic_spiv); - apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); - apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); -#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) - if (maxlvt >= 5) - apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); -#endif - if (maxlvt >= 4) - apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc); - apic_write(APIC_LVTT, apic_pm_state.apic_lvtt); - apic_write(APIC_TDCR, apic_pm_state.apic_tdcr); - apic_write(APIC_TMICT, apic_pm_state.apic_tmict); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); - - local_irq_restore(flags); - - return 0; -} - -/* - * This device has no shutdown method - fully functioning local APICs - * are needed on every CPU up until machine_halt/restart/poweroff. - */ - -static struct sysdev_class lapic_sysclass = { - .name = "lapic", - .resume = lapic_resume, - .suspend = lapic_suspend, -}; - -static struct sys_device device_lapic = { - .id = 0, - .cls = &lapic_sysclass, -}; - -static void __cpuinit apic_pm_activate(void) -{ - apic_pm_state.active = 1; -} - -static int __init init_lapic_sysfs(void) -{ - int error; - - if (!cpu_has_apic) - return 0; - /* XXX: remove suspend/resume procs if !apic_pm_state.active? */ - - error = sysdev_class_register(&lapic_sysclass); - if (!error) - error = sysdev_register(&device_lapic); - return error; -} -device_initcall(init_lapic_sysfs); - -#else /* CONFIG_PM */ - -static void apic_pm_activate(void) { } - -#endif /* CONFIG_PM */ - -#ifdef CONFIG_X86_64 -/* - * apic_is_clustered_box() -- Check if we can expect good TSC - * - * Thus far, the major user of this is IBM's Summit2 series: - * - * Clustered boxes may have unsynced TSC problems if they are - * multi-chassis. Use available data to take a good guess. - * If in doubt, go HPET. - */ -__cpuinit int apic_is_clustered_box(void) -{ - int i, clusters, zeros; - unsigned id; - u16 *bios_cpu_apicid; - DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); - - /* - * there is not this kind of box with AMD CPU yet. - * Some AMD box with quadcore cpu and 8 sockets apicid - * will be [4, 0x23] or [8, 0x27] could be thought to - * vsmp box still need checking... - */ - if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) - return 0; - - bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); - bitmap_zero(clustermap, NUM_APIC_CLUSTERS); - - for (i = 0; i < NR_CPUS; i++) { - /* are we being called early in kernel startup? */ - if (bios_cpu_apicid) { - id = bios_cpu_apicid[i]; - } - else if (i < nr_cpu_ids) { - if (cpu_present(i)) - id = per_cpu(x86_bios_cpu_apicid, i); - else - continue; - } - else - break; - - if (id != BAD_APICID) - __set_bit(APIC_CLUSTERID(id), clustermap); - } - - /* Problem: Partially populated chassis may not have CPUs in some of - * the APIC clusters they have been allocated. Only present CPUs have - * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap. - * Since clusters are allocated sequentially, count zeros only if - * they are bounded by ones. - */ - clusters = 0; - zeros = 0; - for (i = 0; i < NUM_APIC_CLUSTERS; i++) { - if (test_bit(i, clustermap)) { - clusters += 1 + zeros; - zeros = 0; - } else - ++zeros; - } - - /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are - * not guaranteed to be synced between boards - */ - if (is_vsmp_box() && clusters > 1) - return 1; - - /* - * If clusters > 2, then should be multi-chassis. - * May have to revisit this when multi-core + hyperthreaded CPUs come - * out, but AFAIK this will work even for them. - */ - return (clusters > 2); -} -#endif - -/* - * APIC command line parameters - */ -static int __init setup_disableapic(char *arg) -{ - disable_apic = 1; - setup_clear_cpu_cap(X86_FEATURE_APIC); - return 0; -} -early_param("disableapic", setup_disableapic); - -/* same as disableapic, for compatibility */ -static int __init setup_nolapic(char *arg) -{ - return setup_disableapic(arg); -} -early_param("nolapic", setup_nolapic); - -static int __init parse_lapic_timer_c2_ok(char *arg) -{ - local_apic_timer_c2_ok = 1; - return 0; -} -early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); - -static int __init parse_disable_apic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("noapictimer", parse_disable_apic_timer); - -static int __init parse_nolapic_timer(char *arg) -{ - disable_apic_timer = 1; - return 0; -} -early_param("nolapic_timer", parse_nolapic_timer); - -static int __init apic_set_verbosity(char *arg) -{ - if (!arg) { -#ifdef CONFIG_X86_64 - skip_ioapic_setup = 0; - return 0; -#endif - return -EINVAL; - } - - if (strcmp("debug", arg) == 0) - apic_verbosity = APIC_DEBUG; - else if (strcmp("verbose", arg) == 0) - apic_verbosity = APIC_VERBOSE; - else { - printk(KERN_WARNING "APIC Verbosity level %s not recognised" - " use apic=verbose or apic=debug\n", arg); - return -EINVAL; - } - - return 0; -} -early_param("apic", apic_set_verbosity); - -static int __init lapic_insert_resource(void) -{ - if (!apic_phys) - return -1; - - /* Put local APIC into the resource map. */ - lapic_resource.start = apic_phys; - lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; - insert_resource(&iomem_resource, &lapic_resource); - - return 0; -} - -/* - * need call insert after e820_reserve_resources() - * that is using request_resource - */ -late_initcall(lapic_insert_resource); -- cgit v1.2.3 From e492c5ae85428d4a3815d06bf308c590120b928b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 24 Aug 2008 22:41:26 -0700 Subject: x86: let 64 bit to use 32 bit calibrate_apic_clock Use the 32-bit APIC calibration code - it's more mature. Signed-of-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 88 ++------------------------------------------------ 1 file changed, 2 insertions(+), 86 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 6e99af5ce67..ca5ef71f420 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -478,90 +478,6 @@ static void __cpuinit setup_APIC_timer(void) clockevents_register_device(levt); } -#ifdef CONFIG_X86_64 -/* - * In this function we calibrate APIC bus clocks to the external - * timer. Unfortunately we cannot use jiffies and the timer irq - * to calibrate, since some later bootup code depends on getting - * the first irq? Ugh. - * - * We want to do the calibration only once since we - * want to have local timer irqs syncron. CPUs connected - * by the same APIC bus have the very same bus frequency. - * And we want to have irqs off anyways, no accidental - * APIC irq that way. - */ - -#define TICK_COUNT 100000000 - -static int __init calibrate_APIC_clock(void) -{ - unsigned apic, apic_start; - unsigned long tsc, tsc_start; - int result; - - local_irq_disable(); - - /* - * Put whatever arbitrary (but long enough) timeout - * value into the APIC clock, we just want to get the - * counter running for calibration. - * - * No interrupt enable ! - */ - __setup_APIC_LVTT(250000000, 0, 0); - - apic_start = apic_read(APIC_TMCCT); -#ifdef CONFIG_X86_PM_TIMER - if (apic_calibrate_pmtmr && pmtmr_ioport) { - pmtimer_wait(5000); /* 5ms wait */ - apic = apic_read(APIC_TMCCT); - result = (apic_start - apic) * 1000L / 5; - } else -#endif - { - rdtscll(tsc_start); - - do { - apic = apic_read(APIC_TMCCT); - rdtscll(tsc); - } while ((tsc - tsc_start) < TICK_COUNT && - (apic_start - apic) < TICK_COUNT); - - result = (apic_start - apic) * 1000L * tsc_khz / - (tsc - tsc_start); - } - - local_irq_enable(); - - printk(KERN_DEBUG "APIC timer calibration result %d\n", result); - - printk(KERN_INFO "Detected %d.%03d MHz APIC timer.\n", - result / 1000 / 1000, result / 1000 % 1000); - - /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, - lapic_clockevent.shift); - lapic_clockevent.max_delta_ns = - clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); - lapic_clockevent.min_delta_ns = - clockevent_delta2ns(0xF, &lapic_clockevent); - - calibration_result = (result * APIC_DIVISOR) / HZ; - - /* - * Do a sanity check on the APIC calibration result - */ - if (calibration_result < (1000000 / HZ)) { - printk(KERN_WARNING - "APIC frequency too slow, disabling apic timer\n"); - return -1; - } - - return 0; -} - -#else /* * In this functions we calibrate APIC bus clocks to the external timer. * @@ -659,6 +575,7 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); +#ifdef CONFIG_X86_PM_TIMER /* Check, if the PM timer is available */ deltapm = lapic_cal_pm2 - lapic_cal_pm1; apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); @@ -687,6 +604,7 @@ static int __init calibrate_APIC_clock(void) } pm_referenced = 1; } +#endif /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -773,8 +691,6 @@ static int __init calibrate_APIC_clock(void) return 0; } -#endif - /* * Setup the boot APIC * -- cgit v1.2.3 From 1dd6ba2e179773597e20f17f66049a64e6c4b2ec Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 25 Aug 2008 21:27:26 +0400 Subject: x86: apic - unify smp_spurious/error_interrupt declaration According to entry_64.S we do pass pt_regs pointer into interrupt handlers but don't use them. So we safely may merge the declarations. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index ca5ef71f420..0556f375e40 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1659,11 +1659,7 @@ int __init APIC_init_uniprocessor(void) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_spurious_interrupt(void) -#else void smp_spurious_interrupt(struct pt_regs *regs) -#endif { u32 v; @@ -1694,11 +1690,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) /* * This interrupt should never happen with our APIC/SMP architecture */ -#ifdef CONFIG_X86_64 -asmlinkage void smp_error_interrupt(void) -#else void smp_error_interrupt(struct pt_regs *regs) -#endif { u32 v, v1; -- cgit v1.2.3 From a11b5abef50722e42a7d13f6b799c4f606fcb797 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 3 Sep 2008 16:58:31 -0700 Subject: x2apic: fix reserved APIC register accesses in print_local_APIC() APIC_ARBPRI is a reserved register for XAPIC and beyond. APIC_RRR is a reserved register except for 82489DX, APIC for Pentium processors. APIC_EOI is a write only register. APIC_DFR is reserved in x2apic mode. Access to these registers in x2apic will result in #GP fault. Fix these apic register accesses. Signed-off-by: Yinghai Lu Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index d28128e0392..93ceb19d1e9 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1751,21 +1751,30 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK); if (APIC_INTEGRATED(ver)) { /* !82489DX */ - v = apic_read(APIC_ARBPRI); - printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, - v & APIC_ARBPRI_MASK); + if (!APIC_XAPIC(ver)) { + v = apic_read(APIC_ARBPRI); + printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v, + v & APIC_ARBPRI_MASK); + } v = apic_read(APIC_PROCPRI); printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v); } - v = apic_read(APIC_EOI); - printk(KERN_DEBUG "... APIC EOI: %08x\n", v); - v = apic_read(APIC_RRR); - printk(KERN_DEBUG "... APIC RRR: %08x\n", v); + /* + * Remote read supported only in the 82489DX and local APIC for + * Pentium processors. + */ + if (!APIC_INTEGRATED(ver) || maxlvt == 3) { + v = apic_read(APIC_RRR); + printk(KERN_DEBUG "... APIC RRR: %08x\n", v); + } + v = apic_read(APIC_LDR); printk(KERN_DEBUG "... APIC LDR: %08x\n", v); - v = apic_read(APIC_DFR); - printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + if (!x2apic_enabled()) { + v = apic_read(APIC_DFR); + printk(KERN_DEBUG "... APIC DFR: %08x\n", v); + } v = apic_read(APIC_SPIV); printk(KERN_DEBUG "... APIC SPIV: %08x\n", v); -- cgit v1.2.3 From 02c1df199c990cd21c09a4dffaa06d4e0b7bf2bf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 4 Sep 2008 20:57:11 +0200 Subject: x86: print out if acpi want physical flat of all Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/genapic_flat_64.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 9eca5ba7a6b..2ec2de8d8c4 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c @@ -179,8 +179,10 @@ static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) * is an example). */ if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { + printk(KERN_DEBUG "system APIC only can use physical flat"); return 1; + } #endif return 0; -- cgit v1.2.3 From 676f4a920be27160747439fe71026aa15ec78e5a Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 4 Sep 2008 22:37:49 +0400 Subject: x86: io-apic - use ARRAY_SIZE macro Make the code width a bit shorter with ARRAY_SIZE macro. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 93ceb19d1e9..9b01fdadcb9 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -166,7 +166,7 @@ static void __init init_work(void *data) memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - legacy_count = sizeof(irq_cfg_legacy)/sizeof(irq_cfg_legacy[0]); + legacy_count = ARRAY_SIZE(irq_cfg_legacy); for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); -- cgit v1.2.3 From ac54a6c9371bacb86bee1db23f7d82e8685c7e17 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 4 Sep 2008 22:37:50 +0400 Subject: x86: io-apic - declare irq_cfg_lock for SPARSE_IRQ only We use irq_cfg_lock lock in SPARSE_IRQ only context so move it under #ifdef and compiler will be happy. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b01fdadcb9..d22fecf828b 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -147,14 +147,15 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) static struct irq_cfg *irq_cfgx; +#ifdef CONFIG_HAVE_SPARSE_IRQ /* * Protect the irq_cfgx_free freelist: */ static DEFINE_SPINLOCK(irq_cfg_lock); -#ifdef CONFIG_HAVE_SPARSE_IRQ static struct irq_cfg *irq_cfgx_free; #endif + static void __init init_work(void *data) { struct dyn_array *da = data; -- cgit v1.2.3 From b40d575bf0679c45aaf9e1161fc51a6b041b7210 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 5 Sep 2008 18:02:16 -0700 Subject: x86: HPET_MSI Refactor code in preparation for HPET_MSI Preparatory patch before the actual HPET MSI changes. Sets up hpet_set_mode and hpet_next_event for the MSI related changes. Just the code refactoring and should be zero functional change. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index acf62fc233d..f7cb5e9e261 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -227,44 +227,44 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } -static void hpet_legacy_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +static void hpet_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt, int timer) { unsigned long cfg, cmp, now; uint64_t delta; switch(mode) { case CLOCK_EVT_MODE_PERIODIC: - delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * hpet_clockevent.mult; - delta >>= hpet_clockevent.shift; + delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; + delta >>= evt->shift; now = hpet_readl(HPET_COUNTER); cmp = now + (unsigned long) delta; - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); /* * The first write after writing TN_SETVAL to the * config register sets the counter value, the second * write sets the period. */ - hpet_writel(cmp, HPET_T0_CMP); + hpet_writel(cmp, HPET_Tn_CMP(timer)); udelay(1); - hpet_writel((unsigned long) delta, HPET_T0_CMP); + hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); break; case CLOCK_EVT_MODE_ONESHOT: - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg &= ~HPET_TN_PERIODIC; cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); break; case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: - cfg = hpet_readl(HPET_T0_CFG); + cfg = hpet_readl(HPET_Tn_CFG(timer)); cfg &= ~HPET_TN_ENABLE; - hpet_writel(cfg, HPET_T0_CFG); + hpet_writel(cfg, HPET_Tn_CFG(timer)); break; case CLOCK_EVT_MODE_RESUME: @@ -273,14 +273,14 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, } } -static int hpet_legacy_next_event(unsigned long delta, - struct clock_event_device *evt) +static int hpet_next_event(unsigned long delta, + struct clock_event_device *evt, int timer) { u32 cnt; cnt = hpet_readl(HPET_COUNTER); cnt += (u32) delta; - hpet_writel(cnt, HPET_T0_CMP); + hpet_writel(cnt, HPET_Tn_CMP(timer)); /* * We need to read back the CMP register to make sure that @@ -292,6 +292,18 @@ static int hpet_legacy_next_event(unsigned long delta, return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } +static void hpet_legacy_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + hpet_set_mode(mode, evt, 0); +} + +static int hpet_legacy_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + return hpet_next_event(delta, evt, 0); +} + /* * Clock source related code */ -- cgit v1.2.3 From 58ac1e76ce77d515bd5cb65dbc465a040da341c6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 5 Sep 2008 18:02:17 -0700 Subject: x86: HPET_MSI Basic HPET_MSI setup code Basic HPET MSI setup code. Routines to perform basic MSI read write in HPET memory map and setting up irq_chip for HPET MSI. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 54 +++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index f7cb5e9e261..3f10d16a834 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include @@ -25,6 +27,15 @@ unsigned long hpet_address; static void __iomem *hpet_virt_address; +struct hpet_dev { + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int flags; + char name[10]; +}; + unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); @@ -304,6 +315,49 @@ static int hpet_legacy_next_event(unsigned long delta, return hpet_next_event(delta, evt, 0); } +/* + * HPET MSI Support + */ + +void hpet_msi_unmask(unsigned int irq) +{ + struct hpet_dev *hdev = get_irq_data(irq); + unsigned long cfg; + + /* unmask it */ + cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg |= HPET_TN_FSB; + hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); +} + +void hpet_msi_mask(unsigned int irq) +{ + unsigned long cfg; + struct hpet_dev *hdev = get_irq_data(irq); + + /* mask it */ + cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); + cfg &= ~HPET_TN_FSB; + hpet_writel(cfg, HPET_Tn_CFG(hdev->num)); +} + +void hpet_msi_write(unsigned int irq, struct msi_msg *msg) +{ + struct hpet_dev *hdev = get_irq_data(irq); + + hpet_writel(msg->data, HPET_Tn_ROUTE(hdev->num)); + hpet_writel(msg->address_lo, HPET_Tn_ROUTE(hdev->num) + 4); +} + +void hpet_msi_read(unsigned int irq, struct msi_msg *msg) +{ + struct hpet_dev *hdev = get_irq_data(irq); + + msg->data = hpet_readl(HPET_Tn_ROUTE(hdev->num)); + msg->address_lo = hpet_readl(HPET_Tn_ROUTE(hdev->num) + 4); + msg->address_hi = 0; +} + /* * Clock source related code */ diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index d22fecf828b..77fa155becf 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -41,6 +41,7 @@ #endif #include #include +#include #include #include @@ -56,6 +57,7 @@ #include #include #include +#include #include #include @@ -3522,6 +3524,68 @@ int arch_setup_dmar_msi(unsigned int irq) } #endif +#ifdef CONFIG_HPET_TIMER + +#ifdef CONFIG_SMP +static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cfg = irq_cfg(irq); + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + hpet_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + hpet_msi_write(irq, &msg); + desc = irq_to_desc(irq); + desc->affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip hpet_msi_type = { + .name = "HPET_MSI", + .unmask = hpet_msi_unmask, + .mask = hpet_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = hpet_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_hpet_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + + hpet_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif + #endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support -- cgit v1.2.3 From 4588c1f0354ac96a358b3f9e8e4331c51cf3336f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Sep 2008 14:19:17 +0200 Subject: x86: HPET_MSI Basic HPET_MSI setup code, cleanups small style cleanups. Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 3f10d16a834..03d3655734b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1,39 +1,39 @@ #include #include +#include +#include #include #include #include #include -#include -#include -#include #include +#include +#include #include -#include #include -#include +#include -#define HPET_MASK CLOCKSOURCE_MASK(32) -#define HPET_SHIFT 22 +#define HPET_MASK CLOCKSOURCE_MASK(32) +#define HPET_SHIFT 22 /* FSEC = 10^-15 NSEC = 10^-9 */ -#define FSEC_PER_NSEC 1000000L +#define FSEC_PER_NSEC 1000000L /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ -unsigned long hpet_address; -static void __iomem *hpet_virt_address; +unsigned long hpet_address; +static void __iomem *hpet_virt_address; struct hpet_dev { - struct clock_event_device evt; - unsigned int num; - int cpu; - unsigned int irq; - unsigned int flags; - char name[10]; + struct clock_event_device evt; + unsigned int num; + int cpu; + unsigned int irq; + unsigned int flags; + char name[10]; }; unsigned long hpet_readl(unsigned long a) @@ -70,7 +70,7 @@ static inline void hpet_clear_mapping(void) static int boot_hpet_disable; int hpet_force_user; -static int __init hpet_setup(char* str) +static int __init hpet_setup(char *str) { if (str) { if (!strncmp("disable", str, 7)) @@ -91,7 +91,7 @@ __setup("nohpet", disable_hpet); static inline int is_hpet_capable(void) { - return (!boot_hpet_disable && hpet_address); + return !boot_hpet_disable && hpet_address; } /* @@ -122,10 +122,10 @@ static void hpet_reserve_platform_timers(unsigned long id) nrtimers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1; - memset(&hd, 0, sizeof (hd)); - hd.hd_phys_address = hpet_address; - hd.hd_address = hpet; - hd.hd_nirqs = nrtimers; + memset(&hd, 0, sizeof(hd)); + hd.hd_phys_address = hpet_address; + hd.hd_address = hpet; + hd.hd_nirqs = nrtimers; hpet_reserve_timer(&hd, 0); #ifdef CONFIG_HPET_EMULATE_RTC @@ -141,8 +141,8 @@ static void hpet_reserve_platform_timers(unsigned long id) hd.hd_irq[1] = HPET_LEGACY_RTC; for (i = 2; i < nrtimers; timer++, i++) { - hd.hd_irq[i] = (readl(&timer->hpet_config) & Tn_INT_ROUTE_CNF_MASK) >> - Tn_INT_ROUTE_CNF_SHIFT; + hd.hd_irq[i] = (readl(&timer->hpet_config) & + Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } hpet_alloc(&hd); @@ -244,7 +244,7 @@ static void hpet_set_mode(enum clock_event_mode mode, unsigned long cfg, cmp, now; uint64_t delta; - switch(mode) { + switch (mode) { case CLOCK_EVT_MODE_PERIODIC: delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; delta >>= evt->shift; -- cgit v1.2.3 From 26afe5f2fbf06ea0765aaa316640c4dd472310c0 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 5 Sep 2008 18:02:18 -0700 Subject: x86: HPET_MSI Initialise per-cpu HPET timers Initialize a per CPU HPET MSI timer when possible. We retain the HPET timer 0 (IRQ 0) and timer 1 (IRQ 8) as is when legacy mode is being used. We setup the remaining HPET timers as per CPU MSI based timers. This per CPU timer will eliminate the need for timer broadcasting with IRQ 0 when there is non-functional LAPIC timer across CPU deep C-states. If there are more CPUs than number of available timers, CPUs that do not find any timer to use will continue using LAPIC and IRQ 0 broadcast. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 295 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 293 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 03d3655734b..31e9191b7e1 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -21,10 +21,19 @@ NSEC = 10^-9 */ #define FSEC_PER_NSEC 1000000L +#define HPET_DEV_USED_BIT 2 +#define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) +#define HPET_DEV_VALID 0x8 +#define HPET_DEV_FSB_CAP 0x1000 +#define HPET_DEV_PERI_CAP 0x2000 + +#define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) + /* * HPET address is set in acpi/boot.c, when an ACPI entry exists */ unsigned long hpet_address; +unsigned long hpet_num_timers; static void __iomem *hpet_virt_address; struct hpet_dev { @@ -36,6 +45,10 @@ struct hpet_dev { char name[10]; }; +static struct hpet_dev *hpet_devs; + +static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); + unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); @@ -145,6 +158,16 @@ static void hpet_reserve_platform_timers(unsigned long id) Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } + for (i = 0; i < nrtimers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd.hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(&hd, hdev->num); + } + hpet_alloc(&hd); } @@ -238,6 +261,8 @@ static void hpet_legacy_clockevent_register(void) printk(KERN_DEBUG "hpet clockevent registered\n"); } +static int hpet_setup_msi_irq(unsigned int irq); + static void hpet_set_mode(enum clock_event_mode mode, struct clock_event_device *evt, int timer) { @@ -279,7 +304,15 @@ static void hpet_set_mode(enum clock_event_mode mode, break; case CLOCK_EVT_MODE_RESUME: - hpet_enable_legacy_int(); + if (timer == 0) { + hpet_enable_legacy_int(); + } else { + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + hpet_setup_msi_irq(hdev->irq); + disable_irq(hdev->irq); + irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); + enable_irq(hdev->irq); + } break; } } @@ -318,7 +351,7 @@ static int hpet_legacy_next_event(unsigned long delta, /* * HPET MSI Support */ - +#ifdef CONFIG_PCI_MSI void hpet_msi_unmask(unsigned int irq) { struct hpet_dev *hdev = get_irq_data(irq); @@ -358,6 +391,253 @@ void hpet_msi_read(unsigned int irq, struct msi_msg *msg) msg->address_hi = 0; } +static void hpet_msi_set_mode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + hpet_set_mode(mode, evt, hdev->num); +} + +static int hpet_msi_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + return hpet_next_event(delta, evt, hdev->num); +} + +static int hpet_setup_msi_irq(unsigned int irq) +{ + if (arch_setup_hpet_msi(irq)) { + destroy_irq(irq); + return -EINVAL; + } + return 0; +} + +static int hpet_assign_irq(struct hpet_dev *dev) +{ + unsigned int irq; + + irq = create_irq(); + if (!irq) + return -EINVAL; + + set_irq_data(irq, dev); + + if (hpet_setup_msi_irq(irq)) + return -EINVAL; + + dev->irq = irq; + return 0; +} + +static irqreturn_t hpet_interrupt_handler(int irq, void *data) +{ + struct hpet_dev *dev = (struct hpet_dev *)data; + struct clock_event_device *hevt = &dev->evt; + + if (!hevt->event_handler) { + printk(KERN_INFO "Spurious HPET timer interrupt on HPET timer %d\n", + dev->num); + return IRQ_HANDLED; + } + + hevt->event_handler(hevt); + return IRQ_HANDLED; +} + +static int hpet_setup_irq(struct hpet_dev *dev) +{ + + if (request_irq(dev->irq, hpet_interrupt_handler, + IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + return -1; + + disable_irq(dev->irq); + irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); + enable_irq(dev->irq); + + return 0; +} + +/* This should be called in specific @cpu */ +static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) +{ + struct clock_event_device *evt = &hdev->evt; + uint64_t hpet_freq; + + WARN_ON(cpu != smp_processor_id()); + if (!(hdev->flags & HPET_DEV_VALID)) + return; + + if (hpet_setup_msi_irq(hdev->irq)) + return; + + hdev->cpu = cpu; + per_cpu(cpu_hpet_dev, cpu) = hdev; + evt->name = hdev->name; + hpet_setup_irq(hdev); + evt->irq = hdev->irq; + + evt->rating = 110; + evt->features = CLOCK_EVT_FEAT_ONESHOT; + if (hdev->flags & HPET_DEV_PERI_CAP) + evt->features |= CLOCK_EVT_FEAT_PERIODIC; + + evt->set_mode = hpet_msi_set_mode; + evt->set_next_event = hpet_msi_next_event; + evt->shift = 32; + + /* + * The period is a femto seconds value. We need to calculate the + * scaled math multiplication factor for nanosecond to hpet tick + * conversion. + */ + hpet_freq = 1000000000000000ULL; + do_div(hpet_freq, hpet_period); + evt->mult = div_sc((unsigned long) hpet_freq, + NSEC_PER_SEC, evt->shift); + /* Calculate the max delta */ + evt->max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, evt); + /* 5 usec minimum reprogramming delta. */ + evt->min_delta_ns = 5000; + + evt->cpumask = cpumask_of_cpu(hdev->cpu); + clockevents_register_device(evt); +} + +#ifdef CONFIG_HPET +/* Reserve at least one timer for userspace (/dev/hpet) */ +#define RESERVE_TIMERS 1 +#else +#define RESERVE_TIMERS 0 +#endif +void hpet_msi_capability_lookup(unsigned int start_timer) +{ + unsigned int id; + unsigned int num_timers; + unsigned int num_timers_used = 0; + int i; + + id = hpet_readl(HPET_ID); + + num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); + num_timers++; /* Value read out starts from 0 */ + + hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL); + if (!hpet_devs) + return; + + hpet_num_timers = num_timers; + + for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { + struct hpet_dev *hdev = &hpet_devs[num_timers_used]; + unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); + + /* Only consider HPET timer with MSI support */ + if (!(cfg & HPET_TN_FSB_CAP)) + continue; + + hdev->flags = 0; + if (cfg & HPET_TN_PERIODIC_CAP) + hdev->flags |= HPET_DEV_PERI_CAP; + hdev->num = i; + + sprintf(hdev->name, "hpet%d", i); + if (hpet_assign_irq(hdev)) + continue; + + hdev->flags |= HPET_DEV_FSB_CAP; + hdev->flags |= HPET_DEV_VALID; + num_timers_used++; + if (num_timers_used == num_possible_cpus()) + break; + } + + printk(KERN_INFO "HPET: %d timers in total, %d timers will be used for per-cpu timer\n", + num_timers, num_timers_used); +} + +static struct hpet_dev *hpet_get_unused_timer(void) +{ + int i; + + if (!hpet_devs) + return NULL; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + if (test_and_set_bit(HPET_DEV_USED_BIT, + (unsigned long *)&hdev->flags)) + continue; + return hdev; + } + return NULL; +} + +struct hpet_work_struct { + struct delayed_work work; + struct completion complete; +}; + +static void hpet_work(struct work_struct *w) +{ + struct hpet_dev *hdev; + int cpu = smp_processor_id(); + struct hpet_work_struct *hpet_work; + + hpet_work = container_of(w, struct hpet_work_struct, work.work); + + hdev = hpet_get_unused_timer(); + if (hdev) + init_one_hpet_msi_clockevent(hdev, cpu); + + complete(&hpet_work->complete); +} + +static int hpet_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct hpet_work_struct work; + struct hpet_dev *hdev = per_cpu(cpu_hpet_dev, cpu); + + switch (action & 0xf) { + case CPU_ONLINE: + INIT_DELAYED_WORK(&work.work, hpet_work); + init_completion(&work.complete); + /* FIXME: add schedule_work_on() */ + schedule_delayed_work_on(cpu, &work.work, 0); + wait_for_completion(&work.complete); + break; + case CPU_DEAD: + if (hdev) { + free_irq(hdev->irq, hdev); + hdev->flags &= ~HPET_DEV_USED; + per_cpu(cpu_hpet_dev, cpu) = NULL; + } + break; + } + return NOTIFY_OK; +} +#else + +void hpet_msi_capability_lookup(unsigned int start_timer) +{ + return; +} + +static int hpet_cpuhp_notify(struct notifier_block *n, + unsigned long action, void *hcpu) +{ + return NOTIFY_OK; +} + +#endif + /* * Clock source related code */ @@ -493,8 +773,10 @@ int __init hpet_enable(void) if (id & HPET_ID_LEGSUP) { hpet_legacy_clockevent_register(); + hpet_msi_capability_lookup(2); return 1; } + hpet_msi_capability_lookup(0); return 0; out_nohpet: @@ -511,6 +793,8 @@ out_nohpet: */ static __init int hpet_late_init(void) { + int cpu; + if (boot_hpet_disable) return -ENODEV; @@ -526,6 +810,13 @@ static __init int hpet_late_init(void) hpet_reserve_platform_timers(hpet_readl(HPET_ID)); + for_each_online_cpu(cpu) { + hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); + } + + /* This notifier should be called after workqueue is ready */ + hotcpu_notifier(hpet_cpuhp_notify, -20); + return 0; } fs_initcall(hpet_late_init); -- cgit v1.2.3 From 3c2cbd2490656fb4b6ede586c557a2b09811a432 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 6 Sep 2008 14:15:33 +0400 Subject: x86: io-apic - code style cleaning for setup_IO_APIC_irqs By changing printout form we are able to shrink (and clean up) code a bit. Former printout example: init IO_APIC IRQs IO-APIC (apicid-pin) 1-1, 1-2, 1-3 not connected. IO-APIC (apicid-pin) 2-1, 2-2, 2-3 not connected. New printout example: init IO_APIC IRQs 1-1 1-2 1-3 (apicid-pin) not connected 2-1 2-2 2-3 (apicid-pin) not connected Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 53 +++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 77fa155becf..4040d575a21 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1518,41 +1518,44 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, static void __init setup_IO_APIC_irqs(void) { - int apic, pin, idx, irq, first_notcon = 1; + int apic, pin, idx, irq; + int notcon = 0; apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - - idx = find_irq_entry(apic,pin,mp_INT); - if (idx == -1) { - if (first_notcon) { - apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin); - first_notcon = 0; - } else - apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin); - continue; - } - if (!first_notcon) { - apic_printk(APIC_VERBOSE, " not connected.\n"); - first_notcon = 1; - } + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + + idx = find_irq_entry(apic, pin, mp_INT); + if (idx == -1) { + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic].mp_apicid, pin); + if (!notcon) + notcon = 1; + continue; + } - irq = pin_2_irq(idx, apic, pin); + irq = pin_2_irq(idx, apic, pin); #ifdef CONFIG_X86_32 - if (multi_timer_check(apic, irq)) - continue; + if (multi_timer_check(apic, irq)) + continue; #endif - add_pin_to_irq(irq, apic, pin); + add_pin_to_irq(irq, apic, pin); - setup_IO_APIC_irq(apic, pin, irq, - irq_trigger(idx), irq_polarity(idx)); - } + setup_IO_APIC_irq(apic, pin, irq, + irq_trigger(idx), irq_polarity(idx)); + } + if (notcon) { + apic_printk(APIC_VERBOSE, + KERN_DEBUG " (apicid-pin) not connected\n"); + notcon = 0; + } } - if (!first_notcon) - apic_printk(APIC_VERBOSE, " not connected.\n"); + if (notcon) + apic_printk(APIC_VERBOSE, + KERN_DEBUG " (apicid-pin) not connected\n"); } /* -- cgit v1.2.3 From 79c09698cac8df16c5539447d5e1047fde9742e5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 7 Sep 2008 17:58:57 -0700 Subject: x86: lapic address print out like io apic addr Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 0556f375e40..f3f50858048 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1548,7 +1548,7 @@ void __init init_apic_mappings(void) apic_phys = mp_lapic_addr; set_fixmap_nocache(FIX_APIC_BASE, apic_phys); - apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", + apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); /* -- cgit v1.2.3 From 2a554fb132cf804477087057b9b0ff2162984507 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 8 Sep 2008 19:38:06 +0400 Subject: x86: io-apic - do not use KERN_DEBUG marker too much Do not use KERN_DEBUG several times on the same line being printed. Introduced by mine previous patch, sorry. Reported-by: Yinghai Lu Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4040d575a21..12e59df026d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1528,11 +1528,16 @@ static void __init setup_IO_APIC_irqs(void) idx = find_irq_entry(apic, pin, mp_INT); if (idx == -1) { - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic].mp_apicid, pin); - if (!notcon) + if (!notcon) { notcon = 1; + apic_printk(APIC_VERBOSE, + KERN_DEBUG " %d-%d", + mp_ioapics[apic].mp_apicid, + pin); + } else + apic_printk(APIC_VERBOSE, " %d-%d", + mp_ioapics[apic].mp_apicid, + pin); continue; } @@ -1548,14 +1553,14 @@ static void __init setup_IO_APIC_irqs(void) } if (notcon) { apic_printk(APIC_VERBOSE, - KERN_DEBUG " (apicid-pin) not connected\n"); + " (apicid-pin) not connected\n"); notcon = 0; } } if (notcon) apic_printk(APIC_VERBOSE, - KERN_DEBUG " (apicid-pin) not connected\n"); + " (apicid-pin) not connected\n"); } /* -- cgit v1.2.3 From f0ed4e695faf6766927c8cfbda2bc7530c7210c2 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Mon, 8 Sep 2008 10:18:40 -0700 Subject: x86: using HPET in MSI mode and setting up per CPU HPET timers, fix On Sat, Sep 06, 2008 at 06:03:53AM -0700, Ingo Molnar wrote: > > it crashes two testsystems, the fault on a NULL pointer in hpet init, > with: > > initcall print_all_ICs+0x0/0x520 returned 0 after 26 msecs > calling hpet_late_init+0x0/0x1c0 > BUG: unable to handle kernel NULL pointer dereference at 000000000000008c > IP: [] hpet_late_init+0xfe/0x1c0 > PGD 0 > Oops: 0000 [1] SMP > CPU 0 > Modules linked in: > Pid: 1, comm: swapper Not tainted 2.6.27-rc5 #29725 > RIP: 0010:[] [] hpet_late_init+0xfe/0x1c0 > RSP: 0018:ffff88003fa07dd0 EFLAGS: 00010246 > RAX: 0000000000000000 RBX: 0000000000000003 RCX: 0000000000000000 > RDX: ffffc20000000160 RSI: 0000000000000000 RDI: 0000000000000003 > RBP: ffff88003fa07e90 R08: 0000000000000000 R09: ffff88003fa07dd0 > R10: 0000000000000001 R11: 0000000000000000 R12: ffff88003fa07dd0 > R13: 0000000000000002 R14: ffffc20000000000 R15: 000000006f57e511 > FS: 0000000000000000(0000) GS:ffffffff80cf6a80(0000) knlGS:0000000000000000 > CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b > CR2: 000000000000008c CR3: 0000000000201000 CR4: 00000000000006e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process swapper (pid: 1, threadinfo ffff88003fa06000, task ffff88003fa08000) > Stack: 00000000fed00000 ffffc20000000000 0000000100000003 0000000800000002 > 0000000000000000 0000000000000000 0000000000000000 0000000000000000 > 0000000000000000 0000000000000000 0000000000000000 0000000000000000 > Call Trace: > [] ? hpet_late_init+0x0/0x1c0 > [] do_one_initcall+0x45/0x190 > [] ? register_irq_proc+0x19/0xe0 > [] ? early_idt_handler+0x0/0x73 > [] kernel_init+0x14c/0x1b0 > [] ? trace_hardirqs_on_thunk+0x3a/0x3f > [] child_rip+0xa/0x11 > [] ? restore_args+0x0/0x30 > [] ? kernel_init+0x0/0x1b0 > [] ? child_rip+0x0/0x11 > Code: 20 48 83 c1 01 48 39 f1 75 e3 44 89 e8 4c 8b 05 29 29 22 00 31 f6 48 8d 78 01 66 66 90 89 f0 48 8d 04 80 48 c1 e0 05 4a 8d 0c 00 81 8c 00 00 00 08 74 26 8b 81 80 00 00 00 8b 91 88 00 00 00 > RIP [] hpet_late_init+0xfe/0x1c0 > RSP > CR2: 000000000000008c > Kernel panic - not syncing: Fatal exception There was one code path, with CONFIG_PCI_MSI disabled, where we were accessing hpet_devs without initialization. That resulted in the above crash. The change below adds a check for hpet_devs. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Shaohua Li Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 31e9191b7e1..01005aeda7d 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -126,6 +126,24 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) +{ + int i; + + if (!hpet_devs) + return; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd->hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(hd, hdev->num); + } +} + static void hpet_reserve_platform_timers(unsigned long id) { struct hpet __iomem *hpet = hpet_virt_address; @@ -158,15 +176,7 @@ static void hpet_reserve_platform_timers(unsigned long id) Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; } - for (i = 0; i < nrtimers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; - - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - - hd.hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(&hd, hdev->num); - } + hpet_reserve_msi_timers(&hd); hpet_alloc(&hd); -- cgit v1.2.3 From ba374c9baef910fbc5373541d98c50f15e82c3f8 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Mon, 8 Sep 2008 16:19:09 -0700 Subject: x86: fix HPET compiler error when not using CONFIG_PCI_MSI Added dummy function for hpet_setup_msi_irq(). Signed-off-by: Steven Noonan Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 01005aeda7d..422c577ef69 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -635,6 +635,10 @@ static int hpet_cpuhp_notify(struct notifier_block *n, } #else +static int hpet_setup_msi_irq(unsigned int irq) +{ + return 0; +} void hpet_msi_capability_lookup(unsigned int start_timer) { return; -- cgit v1.2.3 From 87783be4c26d79f5cde245e6e8a9fd2b295e92d7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 10 Sep 2008 22:19:50 +0400 Subject: x86: io-apic - get rid of __DO_ACTION macro Replace __DO_ACTION macro with io_apic_modify_irq function. This allow us to 'grep' definitions being hided by __DO_ACTION macro: __unmask_IO_APIC_irq __mask_IO_APIC_irq __mask_and_edge_IO_APIC_irq __unmask_and_level_IO_APIC_irq Signed-off-by: Cyrill Gorcunov Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 103 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 12e59df026d..ac47384724e 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -643,65 +643,66 @@ static void __init replace_pin_at_irq(unsigned int irq, add_pin_to_irq(irq, newapic, newpin); } -#define __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ - \ -{ \ - int pin; \ - struct irq_cfg *cfg; \ - struct irq_pin_list *entry; \ - \ - cfg = irq_cfg(irq); \ - entry = cfg->irq_2_pin; \ - for (;;) { \ - unsigned int reg; \ - if (!entry) \ - break; \ - pin = entry->pin; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION_DISABLE; \ - reg ACTION_ENABLE; \ - io_apic_modify(entry->apic, 0x10 + R + pin*2, reg); \ - FINAL; \ - if (!entry->next) \ - break; \ - entry = entry->next; \ - } \ -} - -#define DO_ACTION(name,R, ACTION_ENABLE, ACTION_DISABLE, FINAL) \ - \ - static void name##_IO_APIC_irq (unsigned int irq) \ - __DO_ACTION(R, ACTION_ENABLE, ACTION_DISABLE, FINAL) - -/* mask = 0 */ -DO_ACTION(__unmask, 0, |= 0, &= ~IO_APIC_REDIR_MASKED, ) +static inline void io_apic_modify_irq(unsigned int irq, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + int pin; + struct irq_cfg *cfg; + struct irq_pin_list *entry; + + cfg = irq_cfg(irq); + for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { + unsigned int reg; + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); + } +} + +static void __unmask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); +} #ifdef CONFIG_X86_64 -/* - * Synchronize the IO-APIC and the CPU by doing - * a dummy read from the IO-APIC - */ -static inline void io_apic_sync(unsigned int apic) +void io_apic_sync(struct irq_pin_list *entry) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + /* + * Synchronize the IO-APIC and the CPU by doing + * a dummy read from the IO-APIC + */ + struct io_apic __iomem *io_apic; + io_apic = io_apic_base(entry->apic); readl(&io_apic->data); } -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, io_apic_sync(entry->apic)) - -#else - -/* mask = 1 */ -DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, &= ~0, ) - -/* mask = 1, trigger = 0 */ -DO_ACTION(__mask_and_edge, 0, |= IO_APIC_REDIR_MASKED, &= ~IO_APIC_REDIR_LEVEL_TRIGGER, ) +static void __mask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); +} +#else /* CONFIG_X86_32 */ +static void __mask_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); +} -/* mask = 0, trigger = 1 */ -DO_ACTION(__unmask_and_level, 0, |= IO_APIC_REDIR_LEVEL_TRIGGER, &= ~IO_APIC_REDIR_MASKED, ) +static void __mask_and_edge_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} -#endif +static void __unmask_and_level_IO_APIC_irq(unsigned int irq) +{ + io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); +} +#endif /* CONFIG_X86_32 */ static void mask_IO_APIC_irq (unsigned int irq) { -- cgit v1.2.3 From 823b259b80158a5fb694f6784e18b5bae669c599 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Sep 2008 21:56:46 -0700 Subject: x86: print out apic id in hex format Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- arch/x86/kernel/cpu/amd.c | 2 +- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/smpboot.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index f3f50858048..11cb1863958 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1586,7 +1586,7 @@ int __init APIC_init_uniprocessor(void) */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", boot_cpu_physical_apicid); clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); return -1; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 32e73520adf..8f1e31db2ad 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -249,7 +249,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 99468dbd08d..cce0b6118d5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -174,7 +174,7 @@ static void __cpuinit srat_detect_node(void) node = first_node(node_online_map); numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node); + printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8c3aca7cb34..f84de2ff933 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -543,10 +543,10 @@ static inline void __inquire_remote_apic(int apicid) int timeout; u32 status; - printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); + printk(KERN_INFO "Inquiring remote APIC 0x%x...\n", apicid); for (i = 0; i < ARRAY_SIZE(regs); i++) { - printk(KERN_INFO "... APIC #%d %s: ", apicid, names[i]); + printk(KERN_INFO "... APIC 0x%x %s: ", apicid, names[i]); /* * Wait for idle. @@ -874,7 +874,7 @@ do_rest: start_ip = setup_trampoline(); /* So we see what's up */ - printk(KERN_INFO "Booting processor %d/%d ip %lx\n", + printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", cpu, apicid, start_ip); /* -- cgit v1.2.3 From 9df08f109572e88fbdab9a61d5cb0f0eae4ac9fa Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 14 Sep 2008 11:55:37 +0400 Subject: x86: apic - lapic_setup_esr does not handle esr_disable - fix it lapic_setup_esr doesn't handle esr_disable inquire. The error brought in during unification process. Fix it. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 63 ++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 11cb1863958..59550cc017d 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1081,40 +1081,43 @@ void __init init_bsp_APIC(void) static void __cpuinit lapic_setup_esr(void) { - unsigned long oldvalue, value, maxlvt; - if (lapic_is_integrated() && !esr_disable) { - if (esr_disable) { - /* - * Something untraceable is creating bad interrupts on - * secondary quads ... for the moment, just leave the - * ESR disabled - we can't do anything useful with the - * errors anyway - mbligh - */ - printk(KERN_INFO "Leaving ESR disabled.\n"); - return; - } - /* !82489DX */ - maxlvt = lapic_get_maxlvt(); - if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ - apic_write(APIC_ESR, 0); - oldvalue = apic_read(APIC_ESR); + unsigned int oldvalue, value, maxlvt; + + if (!lapic_is_integrated()) { + printk(KERN_INFO "No ESR for 82489DX.\n"); + return; + } - /* enables sending errors */ - value = ERROR_APIC_VECTOR; - apic_write(APIC_LVTERR, value); + if (esr_disable) { /* - * spec says clear errors after enabling vector. + * Something untraceable is creating bad interrupts on + * secondary quads ... for the moment, just leave the + * ESR disabled - we can't do anything useful with the + * errors anyway - mbligh */ - if (maxlvt > 3) - apic_write(APIC_ESR, 0); - value = apic_read(APIC_ESR); - if (value != oldvalue) - apic_printk(APIC_VERBOSE, "ESR value before enabling " - "vector: 0x%08lx after: 0x%08lx\n", - oldvalue, value); - } else { - printk(KERN_INFO "No ESR for 82489DX.\n"); + printk(KERN_INFO "Leaving ESR disabled.\n"); + return; } + + maxlvt = lapic_get_maxlvt(); + if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ + apic_write(APIC_ESR, 0); + oldvalue = apic_read(APIC_ESR); + + /* enables sending errors */ + value = ERROR_APIC_VECTOR; + apic_write(APIC_LVTERR, value); + + /* + * spec says clear errors after enabling vector. + */ + if (maxlvt > 3) + apic_write(APIC_ESR, 0); + value = apic_read(APIC_ESR); + if (value != oldvalue) + apic_printk(APIC_VERBOSE, "ESR value before enabling " + "vector: 0x%08x after: 0x%08x\n", + oldvalue, value); } -- cgit v1.2.3 From 08ad776e3c54e6fe8b50939f176538063b69f89e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 14 Sep 2008 11:55:38 +0400 Subject: x86: apic - skip writting ESR register if we dont have on On 82489DX we don't have ESR register so we should not write it. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 59550cc017d..d730e8d3172 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1131,7 +1131,7 @@ void __cpuinit setup_local_APIC(void) #ifdef CONFIG_X86_32 /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (esr_disable) { + if (lapic_is_integrated() && esr_disable) { apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); apic_write(APIC_ESR, 0); -- cgit v1.2.3 From b189892de4da4634560657aedd774752094dbfa0 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 12 Sep 2008 23:58:24 +0400 Subject: x86: apic - fix unused vars warning in calibrate_APIC_clock If we don't have CONFIG_X86_PM_TIMER=y compiler warns about unused variables. Move PM timer based calibration into a separate function and make the code cleaner and the compiler happy as well. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 81 +++++++++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index d730e8d3172..784116933c7 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -538,14 +538,51 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) } } +static int __init calibrate_by_pmtimer(long deltapm, long *delta) +{ + const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; + const long pm_thresh = pm_100ms / 100; + unsigned long mult; + u64 res; + +#ifndef CONFIG_X86_PM_TIMER + return -1; +#endif + + apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); + + /* Check, if the PM timer is available */ + if (!deltapm) + return -1; + + mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); + + if (deltapm > (pm_100ms - pm_thresh) && + deltapm < (pm_100ms + pm_thresh)) { + apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); + } else { + res = (((u64)deltapm) * mult) >> 22; + do_div(res, 1000000); + printk(KERN_WARNING "APIC calibration not consistent " + "with PM Timer: %ldms instead of 100ms\n", + (long)res); + /* Correct the lapic counter value */ + res = (((u64)(*delta)) * pm_100ms); + do_div(res, deltapm); + printk(KERN_INFO "APIC delta adjusted to PM-Timer: " + "%lu (%ld)\n", (unsigned long)res, *delta); + *delta = (long)res; + } + + return 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = &__get_cpu_var(lapic_events); - const long pm_100ms = PMTMR_TICKS_PER_SEC/10; - const long pm_thresh = pm_100ms/100; void (*real_handler)(struct clock_event_device *dev); unsigned long deltaj; - long delta, deltapm; + long delta; int pm_referenced = 0; local_irq_disable(); @@ -575,36 +612,9 @@ static int __init calibrate_APIC_clock(void) delta = lapic_cal_t1 - lapic_cal_t2; apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); -#ifdef CONFIG_X86_PM_TIMER - /* Check, if the PM timer is available */ - deltapm = lapic_cal_pm2 - lapic_cal_pm1; - apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); - - if (deltapm) { - unsigned long mult; - u64 res; - - mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22); - - if (deltapm > (pm_100ms - pm_thresh) && - deltapm < (pm_100ms + pm_thresh)) { - apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); - } else { - res = (((u64) deltapm) * mult) >> 22; - do_div(res, 1000000); - printk(KERN_WARNING "APIC calibration not consistent " - "with PM Timer: %ldms instead of 100ms\n", - (long)res); - /* Correct the lapic counter value */ - res = (((u64) delta) * pm_100ms); - do_div(res, deltapm); - printk(KERN_INFO "APIC delta adjusted to PM-Timer: " - "%lu (%ld)\n", (unsigned long) res, delta); - delta = (long) res; - } - pm_referenced = 1; - } -#endif + /* we trust the PM based calibration if possible */ + pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, + &delta); /* Calculate the scaled math multiplication factor */ lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, @@ -646,7 +656,10 @@ static int __init calibrate_APIC_clock(void) levt->features &= ~CLOCK_EVT_FEAT_DUMMY; - /* We trust the pm timer based calibration */ + /* + * PM timer calibration failed or not turned on + * so lets try APIC timer based calibration + */ if (!pm_referenced) { apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); -- cgit v1.2.3 From 56ffa1a028b9fce3860a247c6fe79fce7cbf425b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 13 Sep 2008 13:11:16 +0400 Subject: x86: io-apic - do not use KERN_DEBUG marker too much, fix Yinghai Lu reported: | > 0 add_pin_to_irq: irq 15 --> apic 0 pin 15 | > IOAPIC[0]: Set routing entry (8-15 -> 0x3f -> IRQ 15 Mode:0 Active:0) | > 8-16 8-17 8-18 8-19 8-20 8-21 8-22 8-23 (apicid-pin) not connected | > 9-0 9-1 9-2 9-3 9-4 9-5 9-6 9-7 9-8 9-9 9-10 9-11 9-12 9-13 9-14 9-15 | > 9-16 9-17 9-18 9-19 9-20 9-21 9-22 9-23 (apicid-pin) not connected | > | | only first one not connected at first, and ... here is a quick fix for this. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ac47384724e..6ce5873a406 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1541,6 +1541,11 @@ static void __init setup_IO_APIC_irqs(void) pin); continue; } + if (notcon) { + apic_printk(APIC_VERBOSE, + " (apicid-pin) not connected\n"); + notcon = 0; + } irq = pin_2_irq(idx, apic, pin); #ifdef CONFIG_X86_32 @@ -1552,11 +1557,6 @@ static void __init setup_IO_APIC_irqs(void) setup_IO_APIC_irq(apic, pin, irq, irq_trigger(idx), irq_polarity(idx)); } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } } if (notcon) -- cgit v1.2.3 From 9d98598d2fc286c8dbcd0b681168639528428db0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 19 Sep 2008 00:12:26 -0700 Subject: sparseirq: remove some debug print out Signed-off-by: Yinghai Lu Cc: Yinghai Lu Cc: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6ce5873a406..01419fdc1d5 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -277,23 +277,6 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) spin_unlock_irqrestore(&irq_cfg_lock, flags); - printk(KERN_DEBUG "found new irq_cfg for irq %d\n", cfg->irq); -#ifdef CONFIG_HAVE_SPARSE_IRQ_DEBUG - { - /* dump the results */ - struct irq_cfg *cfg; - unsigned long phys; - unsigned long bytes = sizeof(struct irq_cfg); - - printk(KERN_DEBUG "=========================== %d\n", irq); - printk(KERN_DEBUG "irq_cfg dump after get that for %d\n", irq); - for_each_irq_cfg(cfg) { - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg %d ==> [%#lx - %#lx]\n", cfg->irq, phys, phys + bytes); - } - printk(KERN_DEBUG "===========================\n"); - } -#endif return cfg; } #else @@ -597,7 +580,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) cfg->irq_2_pin = entry; entry->apic = apic; entry->pin = pin; - printk(KERN_DEBUG " 0 add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); return; } @@ -613,7 +595,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry = entry->next; entry->apic = apic; entry->pin = pin; - printk(KERN_DEBUG " x add_pin_to_irq: irq %d --> apic %d pin %d\n", irq, apic, pin); } /* -- cgit v1.2.3 From 5ffa4eb2224343ec3fbd492ffe71e28e797435b7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 18 Sep 2008 23:37:57 +0400 Subject: x86: io-apic - interrupt remapping fix Interrupt remapping could lead to NULL dereference in case of kzalloc failed and memory leak in other way. So fix the both cases. Signed-off-by: Cyrill Gorcunov Cc: "Maciej W. Rozycki" Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 13 ++++++++++--- arch/x86/kernel/io_apic.c | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 784116933c7..1f48bd1c9f2 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1360,7 +1360,12 @@ void enable_IR_x2apic(void) local_irq_save(flags); mask_8259A(); - save_mask_IO_APIC_setup(); + + ret = save_mask_IO_APIC_setup(); + if (ret) { + printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); + goto end; + } ret = enable_intr_remapping(1); @@ -1370,14 +1375,15 @@ void enable_IR_x2apic(void) } if (ret) - goto end; + goto end_restore; if (!x2apic) { x2apic = 1; apic_ops = &x2apic_ops; enable_x2apic(); } -end: + +end_restore: if (ret) /* * IR enabling failed @@ -1386,6 +1392,7 @@ end: else reinit_intr_remapped_IO_APIC(x2apic_preenabled); +end: unmask_8259A(); local_irq_restore(flags); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 01419fdc1d5..4cc9cb64c81 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -812,7 +812,7 @@ int save_mask_IO_APIC_setup(void) kzalloc(sizeof(struct IO_APIC_route_entry) * nr_ioapic_registers[apic], GFP_KERNEL); if (!early_ioapic_entries[apic]) - return -ENOMEM; + goto nomem; } for (apic = 0; apic < nr_ioapics; apic++) @@ -826,17 +826,32 @@ int save_mask_IO_APIC_setup(void) ioapic_write_entry(apic, pin, entry); } } + return 0; + +nomem: + for (; apic > 0; apic--) + kfree(early_ioapic_entries[apic]); + kfree(early_ioapic_entries[apic]); + memset(early_ioapic_entries, 0, + ARRAY_SIZE(early_ioapic_entries)); + + return -ENOMEM; } void restore_IO_APIC_setup(void) { int apic, pin; - for (apic = 0; apic < nr_ioapics; apic++) + for (apic = 0; apic < nr_ioapics; apic++) { + if (!early_ioapic_entries[apic]) + break; for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ioapic_write_entry(apic, pin, early_ioapic_entries[apic][pin]); + kfree(early_ioapic_entries[apic]); + early_ioapic_entries[apic] = NULL; + } } void reinit_intr_remapped_IO_APIC(int intr_remapping) -- cgit v1.2.3 From 8da077d6f31da291ee3a7dd559671cb8ca48cbe2 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Tue, 23 Sep 2008 08:42:05 -0500 Subject: x86, uv: fix ordering of calls to uv_system_init & uv_cpu_init Fix problem caused by reordering of the calls to uv_cpu_init() & uv_system_init. Originally, uv_cpu_init() was called AFTER uv_system_init. This order was recently broken as a side-effect of other patches. With this patch, initialization of cpu 0 is now done by the system_init call. Signed-off-by: Jack Steiner Signed-off-by: Ingo Molnar --- arch/x86/kernel/genx2apic_uv_x.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 33581d94a90..b689075bbe2 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -356,7 +356,22 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static bool uv_system_inited; +/* + * Called on each cpu to initialize the per_cpu UV data area. + * ZZZ hotplug not supported yet + */ +void __cpuinit uv_cpu_init(void) +{ + /* CPU 0 initilization will be done via uv_system_init. */ + if (!uv_blade_info) + return; + + uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; + + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) + set_x2apic_extra_bits(uv_hub_info->pnode); +} + void __init uv_system_init(void) { @@ -448,21 +463,6 @@ void __init uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); - uv_system_inited = true; -} -/* - * Called on each cpu to initialize the per_cpu UV data area. - * ZZZ hotplug not supported yet - */ -void __cpuinit uv_cpu_init(void) -{ - BUG_ON(!uv_system_inited); - - uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; - - if (get_uv_system_type() == UV_NON_UNIQUE_APIC) - set_x2apic_extra_bits(uv_hub_info->pnode); + uv_cpu_init(); } - - -- cgit v1.2.3 From 7564676813780e0ba4dacf95338202cb72d2172d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 24 Sep 2008 19:04:36 -0700 Subject: x86: irq no should not use hex in /proc/interrupts Arjan van de Ven noticed that we changed IRQ numbers from decimal to hex in /proc/interrupts - that can break user-space utilities like irqbalanced. Reported-by: Arjan van de Ven Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/irq_32.c | 2 +- arch/x86/kernel/irq_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index b2e1082cf5a..001772ffc91 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -307,7 +307,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%#x: ",i); + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index c2fca89a3f7..ec266109128 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -112,7 +112,7 @@ int show_interrupts(struct seq_file *p, void *v) action = desc->action; if (!action && !any_count) goto skip; - seq_printf(p, "%#x: ",i); + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else -- cgit v1.2.3 From c1370b49cc4f09de5b447ddf688a3988a297dbfc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 23 Sep 2008 23:00:02 +0400 Subject: x86: io-apic - interrupt remapping fix Clean up obscure for() cycle with straight while() form Signed-off-by: Cyrill Gorcunov Cc: "Maciej W. Rozycki" Acked-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4cc9cb64c81..ed68a6f99dc 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -830,9 +830,8 @@ int save_mask_IO_APIC_setup(void) return 0; nomem: - for (; apic > 0; apic--) - kfree(early_ioapic_entries[apic]); - kfree(early_ioapic_entries[apic]); + while (apic >= 0) + kfree(early_ioapic_entries[apic--]); memset(early_ioapic_entries, 0, ARRAY_SIZE(early_ioapic_entries)); -- cgit v1.2.3 From c81bba49a13cb3376654d0cc93dc069fd619ed76 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 25 Sep 2008 11:53:11 -0700 Subject: x86: print out irq nr for msi/ht, v3 v2: fix hpet compiling error v3: Bjorn want to use dev_printk instead Signed-off-by: Yinghai Lu Cc: Bjorn Helgaas Cc: Jesse Barnes Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 3 +++ arch/x86/kernel/io_apic.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 422c577ef69..2913913f4a4 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -467,6 +467,9 @@ static int hpet_setup_irq(struct hpet_dev *dev) irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); enable_irq(dev->irq); + printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", + dev->name, dev->irq); + return 0; } diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index ed68a6f99dc..4ee270d3035 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3354,6 +3354,8 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) #endif set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); + return 0; } @@ -3586,6 +3588,7 @@ int arch_setup_hpet_msi(unsigned int irq) hpet_msi_write(irq, &msg); set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, "edge"); + return 0; } #endif @@ -3682,6 +3685,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) set_irq_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); + + dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); } return err; } -- cgit v1.2.3 From 5f79f2f2ad39b5177c52ed08ffd066ea0c1da924 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Wed, 24 Sep 2008 10:03:17 -0700 Subject: hpet: clean up warning Fix the below compile warnings due to recent HPET MSI changes arch/x86/kernel/hpet.c:48: warning: 'hpet_devs' defined but not used arch/x86/kernel/hpet.c:50: warning: 'per_cpu__cpu_hpet_dev' defined but not used Reported-by: Andrew Morton Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 57 +++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 2913913f4a4..77017e834cf 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -45,10 +45,6 @@ struct hpet_dev { char name[10]; }; -static struct hpet_dev *hpet_devs; - -static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); - unsigned long hpet_readl(unsigned long a) { return readl(hpet_virt_address + a); @@ -126,23 +122,8 @@ EXPORT_SYMBOL_GPL(is_hpet_enabled); * timer 0 and timer 1 in case of RTC emulation. */ #ifdef CONFIG_HPET -static void hpet_reserve_msi_timers(struct hpet_data *hd) -{ - int i; - if (!hpet_devs) - return; - - for (i = 0; i < hpet_num_timers; i++) { - struct hpet_dev *hdev = &hpet_devs[i]; - - if (!(hdev->flags & HPET_DEV_VALID)) - continue; - - hd->hd_irq[hdev->num] = hdev->irq; - hpet_reserve_timer(hd, hdev->num); - } -} +static void hpet_reserve_msi_timers(struct hpet_data *hd); static void hpet_reserve_platform_timers(unsigned long id) { @@ -362,6 +343,10 @@ static int hpet_legacy_next_event(unsigned long delta, * HPET MSI Support */ #ifdef CONFIG_PCI_MSI + +static DEFINE_PER_CPU(struct hpet_dev *, cpu_hpet_dev); +static struct hpet_dev *hpet_devs; + void hpet_msi_unmask(unsigned int irq) { struct hpet_dev *hdev = get_irq_data(irq); @@ -525,7 +510,8 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) #else #define RESERVE_TIMERS 0 #endif -void hpet_msi_capability_lookup(unsigned int start_timer) + +static void hpet_msi_capability_lookup(unsigned int start_timer) { unsigned int id; unsigned int num_timers; @@ -571,6 +557,26 @@ void hpet_msi_capability_lookup(unsigned int start_timer) num_timers, num_timers_used); } +#ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) +{ + int i; + + if (!hpet_devs) + return; + + for (i = 0; i < hpet_num_timers; i++) { + struct hpet_dev *hdev = &hpet_devs[i]; + + if (!(hdev->flags & HPET_DEV_VALID)) + continue; + + hd->hd_irq[hdev->num] = hdev->irq; + hpet_reserve_timer(hd, hdev->num); + } +} +#endif + static struct hpet_dev *hpet_get_unused_timer(void) { int i; @@ -642,10 +648,17 @@ static int hpet_setup_msi_irq(unsigned int irq) { return 0; } -void hpet_msi_capability_lookup(unsigned int start_timer) +static void hpet_msi_capability_lookup(unsigned int start_timer) +{ + return; +} + +#ifdef CONFIG_HPET +static void hpet_reserve_msi_timers(struct hpet_data *hd) { return; } +#endif static int hpet_cpuhp_notify(struct notifier_block *n, unsigned long action, void *hcpu) -- cgit v1.2.3 From 4173a0e7371ece227559b44943c6fd456ee470d1 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Thu, 2 Oct 2008 12:18:21 -0500 Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3 Provide a means for UV interrupt MMRs to be setup with the message to be sent when an MSI is raised. Signed-off-by: Dean Nelson Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/io_apic.c | 68 +++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/uv_irq.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/uv_irq.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 45cecc59d89..acc0e8bf043 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o + obj-y += bios_uv.o uv_irq.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 4ee270d3035..260c95a5e6d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -58,6 +58,8 @@ #include #include #include +#include +#include #include #include @@ -3692,6 +3694,72 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ +#ifdef CONFIG_X86_64 +/* + * Re-target the irq to the specified CPU and enable the specified MMR located + * on the specified blade to allow the sending of MSIs to the specified CPU. + */ +int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, + unsigned long mmr_offset) +{ + const cpumask_t *eligible_cpu = get_cpu_mask(cpu); + struct irq_cfg *cfg; + int mmr_pnode; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long flags; + int err; + + err = assign_irq_vector(irq, *eligible_cpu); + if (err != 0) + return err; + + spin_lock_irqsave(&vector_lock, flags); + set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_name); + spin_unlock_irqrestore(&vector_lock, flags); + + cfg = irq_cfg(irq); + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->vector = cfg->vector; + entry->delivery_mode = INT_DELIVERY_MODE; + entry->dest_mode = INT_DEST_MODE; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = cpu_mask_to_apicid(*eligible_cpu); + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + return irq; +} + +/* + * Disable the specified MMR located on the specified blade so that MSIs are + * longer allowed to be sent. + */ +void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) +{ + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + int mmr_pnode; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); + + entry->mask = 1; + + mmr_pnode = uv_blade_to_pnode(mmr_blade); + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); +} +#endif /* CONFIG_X86_64 */ + int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c new file mode 100644 index 00000000000..6bd26c91c30 --- /dev/null +++ b/arch/x86/kernel/uv_irq.c @@ -0,0 +1,77 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV IRQ functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include + +static void uv_noop(unsigned int irq) +{ +} + +static unsigned int uv_noop_ret(unsigned int irq) +{ + return 0; +} + +static void uv_ack_apic(unsigned int irq) +{ + ack_APIC_irq(); +} + +struct irq_chip uv_irq_chip = { + .name = "UV-CORE", + .startup = uv_noop_ret, + .shutdown = uv_noop, + .enable = uv_noop, + .disable = uv_noop, + .ack = uv_noop, + .mask = uv_noop, + .unmask = uv_noop, + .eoi = uv_ack_apic, + .end = uv_noop, +}; + +/* + * Set up a mapping of an available irq and vector, and enable the specified + * MMR that defines the MSI that is to be sent to the specified CPU when an + * interrupt is raised. + */ +int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, + unsigned long mmr_offset) +{ + int irq; + int ret; + + irq = create_irq(); + if (irq <= 0) + return -EBUSY; + + ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); + if (ret != irq) + destroy_irq(irq); + + return ret; +} +EXPORT_SYMBOL_GPL(uv_setup_irq); + +/* + * Tear down a mapping of an irq and vector, and disable the specified MMR that + * defined the MSI that was to be sent to the specified CPU when an interrupt + * was raised. + * + * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). + */ +void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) +{ + arch_disable_uv_irq(mmr_blade, mmr_offset); + destroy_irq(irq); +} +EXPORT_SYMBOL_GPL(uv_teardown_irq); -- cgit v1.2.3 From 37762b6ffb37f9e21cbc6f80902aa06b7a053fd7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Oct 2008 11:38:37 +0200 Subject: x86, UV: add uv_setup_irq() and uv_teardown_irq() functions, v3, fix fix: arch/x86/kernel/uv_irq.c: In function 'uv_ack_apic': arch/x86/kernel/uv_irq.c:26: error: implicit declaration of function 'ack_APIC_irq' Signed-off-by: Ingo Molnar --- arch/x86/kernel/uv_irq.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index 6bd26c91c30..aeef529917e 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c @@ -10,6 +10,8 @@ #include #include + +#include #include static void uv_noop(unsigned int irq) -- cgit v1.2.3 From a50f70b17541c0060967c6df61133e968bad3652 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:58:54 -0500 Subject: x86: Add UV EFI table entry v4 Look for a UV entry in the EFI tables. Signed-off-by: Russ Anderson Signed-off-by: Paul Jackson Acked-by: Huang Ying Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/efi.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 945a31cdd81..1119d247fe1 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -366,6 +366,10 @@ void __init efi_init(void) SMBIOS_TABLE_GUID)) { efi.smbios = config_tables[i].table; printk(" SMBIOS=0x%lx ", config_tables[i].table); + } else if (!efi_guidcmp(config_tables[i].guid, + UV_SYSTEM_TABLE_GUID)) { + efi.uv_systab = config_tables[i].table; + printk(" UVsystab=0x%lx ", config_tables[i].table); } else if (!efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID)) { efi.hcdp = config_tables[i].table; -- cgit v1.2.3 From 7f5942329e0787087a5e4dced838cee711ac2b58 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:59:15 -0500 Subject: x86: Add UV bios call infrastructure v4 Add the EFI callback function and associated wrapper code. Initialize SAL system table entry info at boot time. Signed-off-by: Russ Anderson Signed-off-by: Paul Jackson Acked-by: Huang Ying Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/bios_uv.c | 101 +++++++++++++++++++++++++++++++-------- arch/x86/kernel/genx2apic_uv_x.c | 1 + 2 files changed, 81 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index fdd585f9c53..5481eb59f78 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -1,8 +1,6 @@ /* * BIOS run time interface routines. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -16,33 +14,94 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson */ +#include +#include +#include #include -const char * -x86_bios_strerror(long status) +struct uv_systab uv_systab; + +s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) { - const char *str; - switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; - } - return str; + struct uv_systab *tab = &uv_systab; + + if (!tab->function) + /* + * BIOS does not support UV systab + */ + return BIOS_STATUS_UNIMPLEMENTED; + + return efi_call6((void *)__va(tab->function), + (u64)which, a1, a2, a3, a4, a5); } -long -x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, - unsigned long *drift_info) +s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { - struct uv_bios_retval isrv; + unsigned long bios_flags; + s64 ret; + + local_irq_save(bios_flags); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + local_irq_restore(bios_flags); + + return ret; +} + +s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) +{ + s64 ret; + + preempt_disable(); + ret = uv_bios_call(which, a1, a2, a3, a4, a5); + preempt_enable(); - BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); - *ticks_per_second = isrv.v0; - *drift_info = isrv.v1; - return isrv.status; + return ret; +} + +long +x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second, + unsigned long *drift_info) +{ + return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, + (u64)ticks_per_second, 0, 0, 0); } EXPORT_SYMBOL_GPL(x86_bios_freq_base); + + +#ifdef CONFIG_EFI +void uv_bios_init(void) +{ + struct uv_systab *tab; + + if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || + (efi.uv_systab == (unsigned long)NULL)) { + printk(KERN_CRIT "No EFI UV System Table.\n"); + uv_systab.function = (unsigned long)NULL; + return; + } + + tab = (struct uv_systab *)ioremap(efi.uv_systab, + sizeof(struct uv_systab)); + if (strncmp(tab->signature, "UVST", 4) != 0) + printk(KERN_ERR "bad signature in UV system table!"); + + /* + * Copy table to permanent spot for later use. + */ + memcpy(&uv_systab, tab, sizeof(struct uv_systab)); + iounmap(tab); + + printk(KERN_INFO "EFI UV System Table Revision %d\n", tab->revision); +} +#else /* !CONFIG_EFI */ + +void uv_bios_init(void) { } +#endif + diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index b689075bbe2..aa2e5e15bf6 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -427,6 +427,7 @@ void __init uv_system_init(void) gnode_upper = (((unsigned long)node_id.s.node_id) & ~((1 << n_val) - 1)) << m_val; + uv_bios_init(); uv_rtc_init(); for_each_present_cpu(cpu) { -- cgit v1.2.3 From 922402f15a85f7a064926eb1db68cc52bc4d4a91 Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:59:33 -0500 Subject: x86: Add UV partition call v4 Add a bios call to return partitioning related info. Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/bios_uv.c | 44 +++++++++++++++++++++++++++++++++++----- arch/x86/kernel/genx2apic_uv_x.c | 14 +++++++------ 2 files changed, 47 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 5481eb59f78..f0dfe6f17e7 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -23,6 +23,7 @@ #include #include #include +#include struct uv_systab uv_systab; @@ -65,14 +66,47 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return ret; } -long -x86_bios_freq_base(unsigned long clock_type, unsigned long *ticks_per_second, - unsigned long *drift_info) + +long sn_partition_id; +EXPORT_SYMBOL_GPL(sn_partition_id); +long uv_coherency_id; +EXPORT_SYMBOL_GPL(uv_coherency_id); +long uv_region_size; +EXPORT_SYMBOL_GPL(uv_region_size); +int uv_type; + + +s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, + long *region) +{ + s64 ret; + u64 v0, v1; + union partition_info_u part; + + ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, + (u64)(&v0), (u64)(&v1), 0, 0); + if (ret != BIOS_STATUS_SUCCESS) + return ret; + + part.val = v0; + if (uvtype) + *uvtype = part.hub_version; + if (partid) + *partid = part.partition_id; + if (coher) + *coher = part.coherence_id; + if (region) + *region = part.region_size; + return ret; +} + + +s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) { return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, - (u64)ticks_per_second, 0, 0, 0); + (u64)ticks_per_second, 0, 0, 0); } -EXPORT_SYMBOL_GPL(x86_bios_freq_base); +EXPORT_SYMBOL_GPL(uv_bios_freq_base); #ifdef CONFIG_EFI diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index aa2e5e15bf6..bfd532843df 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -341,12 +341,12 @@ static __init void map_mmioh_high(int max_pnode) static __init void uv_rtc_init(void) { - long status, ticks_per_sec, drift; + long status; + u64 ticks_per_sec; - status = - x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, - &drift); - if (status != 0 || ticks_per_sec < 100000) { + status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, + &ticks_per_sec); + if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { printk(KERN_WARNING "unable to determine platform RTC clock frequency, " "guessing.\n"); @@ -428,6 +428,8 @@ void __init uv_system_init(void) ~((1 << n_val) - 1)) << m_val; uv_bios_init(); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, + &uv_coherency_id, &uv_region_size); uv_rtc_init(); for_each_present_cpu(cpu) { @@ -449,7 +451,7 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; - uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ + uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; max_pnode = max(pnode, max_pnode); -- cgit v1.2.3 From fc8c2d763bacc02962048fa042e287debb1416aa Mon Sep 17 00:00:00 2001 From: Russ Anderson Date: Fri, 3 Oct 2008 11:59:50 -0500 Subject: x86: Add sysfs entries for UV v4 Create /sys/firmware/sgi_uv sysfs entries for partition_id and coherence_id. Signed-off-by: Russ Anderson Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/uv_sysfs.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/uv_sysfs.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index acc0e8bf043..cb6ade443f0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -108,7 +108,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o # 64 bit specific files ifeq ($(CONFIG_X86_64),y) obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o - obj-y += bios_uv.o uv_irq.o + obj-y += bios_uv.o uv_irq.o uv_sysfs.o obj-y += genx2apic_cluster.o obj-y += genx2apic_phys.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c new file mode 100644 index 00000000000..67f9b9dbf80 --- /dev/null +++ b/arch/x86/kernel/uv_sysfs.c @@ -0,0 +1,72 @@ +/* + * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) Russ Anderson + */ + +#include +#include + +struct kobject *sgi_uv_kobj; + +static ssize_t partition_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); +} + +static ssize_t coherence_id_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); +} + +static struct kobj_attribute partition_id_attr = + __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); + +static struct kobj_attribute coherence_id_attr = + __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); + + +static int __init sgi_uv_sysfs_init(void) +{ + unsigned long ret; + + if (!sgi_uv_kobj) + sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); + if (!sgi_uv_kobj) { + printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); + return -EINVAL; + } + + ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); + return ret; + } + + ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); + if (ret) { + printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); + return ret; + } + + return 0; +} + +device_initcall(sgi_uv_sysfs_init); -- cgit v1.2.3 From 4c66a73f0796dacc2ff0d4af75794ec843ceb3d1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 4 Oct 2008 15:52:15 -0700 Subject: x86: sparse_irq: fix typo in debug print out Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 260c95a5e6d..f959acbc0db 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -257,7 +257,7 @@ static struct irq_cfg *irq_cfg_alloc(unsigned int irq) panic("please boot with nr_irq_cfg= %d\n", count * 2); phys = __pa(cfg); - printk(KERN_DEBUG "irq_irq ==> [%#lx - %#lx]\n", phys, phys + total_bytes); + printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes); for (i = 0; i < nr_irq_cfg; i++) init_one_irq_cfg(&cfg[i]); -- cgit v1.2.3 From 81608f3c254512b906ab78082ec5966b376aacd5 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 10 Oct 2008 19:00:17 +0400 Subject: x86: apic - unify APIC_DIVISOR Use APIC_DIVISOR being set to 16 for both 32/64bit mode. To escape APIC timer underflow during calibration set it to the maximum possible value. Also typo error (CONFG instead of proper CONFIG) fixed. The error was caught by Venkatesh Pallipadi, thanks a lot Venkatesh! See details on http://lkml.org/lkml/2008/10/9/425 Reported-by: Venkatesh Pallipad Signed-off-by: Cyrill Gorcunov Acked-by: "Maciej W. Rozycki" Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 1f48bd1c9f2..04a7f960bbc 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -332,11 +332,7 @@ int lapic_get_maxlvt(void) */ /* Clock divisor */ -#ifdef CONFG_X86_64 -#define APIC_DIVISOR 1 -#else #define APIC_DIVISOR 16 -#endif /* * This function sets up the local APIC timer, with a timeout of @@ -592,10 +588,10 @@ static int __init calibrate_APIC_clock(void) global_clock_event->event_handler = lapic_cal_handler; /* - * Setup the APIC counter to 1e9. There is no way the lapic + * Setup the APIC counter to maximum. There is no way the lapic * can underflow in the 100ms detection time frame */ - __setup_APIC_LVTT(1000000000, 0, 0); + __setup_APIC_LVTT(0xffffffff, 0, 0); /* Let the interrupts run */ local_irq_enable(); -- cgit v1.2.3 From 3235e936c0cc3589309280b6f59e5096779adae3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 13:16:00 +0200 Subject: x86: remove sparse irq from Kconfig This code is not ready yet. Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 600584b7a49..8636ddf2f4a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -237,17 +237,6 @@ config SMP If you don't know what to do here, say N. -config HAVE_SPARSE_IRQ - bool "Support sparse irq numbering" - depends on PCI_MSI || HT_IRQ - default y - help - This enables support for sparse irq, esp for msi/msi-x. the irq - number will be bus/dev/fn + 12bit. You may need if you have lots of - cards supports msi-x installed. - - If you don't know what to do here, say Y. - config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER -- cgit v1.2.3 From 2cc21ef843d4fb7da122239b644a1f6f0aca60a6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:16:55 +0200 Subject: genirq: remove sparse irq code This code is not ready, but we need to rip it out instead of rebasing as we would lose the APIC/IO_APIC unification otherwise. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic.c | 130 ++-------------------------------------------- arch/x86/kernel/irq_32.c | 8 --- arch/x86/kernel/irq_64.c | 8 --- 3 files changed, 4 insertions(+), 142 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index f959acbc0db..683610517d2 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -111,9 +111,6 @@ struct irq_cfg; struct irq_pin_list; struct irq_cfg { unsigned int irq; -#ifdef CONFIG_HAVE_SPARSE_IRQ - struct irq_cfg *next; -#endif struct irq_pin_list *irq_2_pin; cpumask_t domain; cpumask_t old_domain; @@ -151,15 +148,6 @@ static void init_one_irq_cfg(struct irq_cfg *cfg) static struct irq_cfg *irq_cfgx; -#ifdef CONFIG_HAVE_SPARSE_IRQ -/* - * Protect the irq_cfgx_free freelist: - */ -static DEFINE_SPINLOCK(irq_cfg_lock); - -static struct irq_cfg *irq_cfgx_free; -#endif - static void __init init_work(void *data) { struct dyn_array *da = data; @@ -174,114 +162,7 @@ static void __init init_work(void *data) legacy_count = ARRAY_SIZE(irq_cfg_legacy); for (i = legacy_count; i < *da->nr; i++) init_one_irq_cfg(&cfg[i]); - -#ifdef CONFIG_HAVE_SPARSE_IRQ - for (i = 1; i < *da->nr; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = &irq_cfgx[legacy_count]; - irq_cfgx[legacy_count - 1].next = NULL; -#endif -} - -#ifdef CONFIG_HAVE_SPARSE_IRQ -/* need to be biger than size of irq_cfg_legacy */ -static int nr_irq_cfg = 32; - -static int __init parse_nr_irq_cfg(char *arg) -{ - if (arg) { - nr_irq_cfg = simple_strtoul(arg, NULL, 0); - if (nr_irq_cfg < 32) - nr_irq_cfg = 32; - } - return 0; -} - -early_param("nr_irq_cfg", parse_nr_irq_cfg); - -#define for_each_irq_cfg(irqX, cfg) \ - for (cfg = irq_cfgx, irqX = cfg->irq; cfg; cfg = cfg->next, irqX = cfg ? cfg->irq : -1U) - - -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irq_cfg, PAGE_SIZE, init_work); - -static struct irq_cfg *irq_cfg(unsigned int irq) -{ - struct irq_cfg *cfg; - - cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg = cfg->next; - } - - return NULL; -} - -static struct irq_cfg *irq_cfg_alloc(unsigned int irq) -{ - struct irq_cfg *cfg, *cfg_pri; - unsigned long flags; - int count = 0; - int i; - - cfg_pri = cfg = irq_cfgx; - while (cfg) { - if (cfg->irq == irq) - return cfg; - - cfg_pri = cfg; - cfg = cfg->next; - count++; - } - - spin_lock_irqsave(&irq_cfg_lock, flags); - if (!irq_cfgx_free) { - unsigned long phys; - unsigned long total_bytes; - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_cfg %d\n", nr_irq_cfg); - - total_bytes = sizeof(struct irq_cfg) * nr_irq_cfg; - if (after_bootmem) - cfg = kzalloc(total_bytes, GFP_ATOMIC); - else - cfg = __alloc_bootmem_nopanic(total_bytes, PAGE_SIZE, 0); - - if (!cfg) - panic("please boot with nr_irq_cfg= %d\n", count * 2); - - phys = __pa(cfg); - printk(KERN_DEBUG "irq_cfg ==> [%#lx - %#lx]\n", phys, phys + total_bytes); - - for (i = 0; i < nr_irq_cfg; i++) - init_one_irq_cfg(&cfg[i]); - - for (i = 1; i < nr_irq_cfg; i++) - cfg[i-1].next = &cfg[i]; - - irq_cfgx_free = cfg; - } - - cfg = irq_cfgx_free; - irq_cfgx_free = irq_cfgx_free->next; - cfg->next = NULL; - if (cfg_pri) - cfg_pri->next = cfg; - else - irq_cfgx = cfg; - cfg->irq = irq; - - spin_unlock_irqrestore(&irq_cfg_lock, flags); - - return cfg; } -#else #define for_each_irq_cfg(irq, cfg) \ for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq]) @@ -290,17 +171,16 @@ DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work struct irq_cfg *irq_cfg(unsigned int irq) { - if (irq < nr_irqs) - return &irq_cfgx[irq]; + if (irq < nr_irqs) + return &irq_cfgx[irq]; - return NULL; + return NULL; } struct irq_cfg *irq_cfg_alloc(unsigned int irq) { - return irq_cfg(irq); + return irq_cfg(irq); } -#endif /* * This is performance-critical, we want to do it O(1) * @@ -3068,9 +2948,7 @@ unsigned int create_irq_nr(unsigned int irq_want) unsigned long flags; struct irq_cfg *cfg_new; -#ifndef CONFIG_HAVE_SPARSE_IRQ irq_want = nr_irqs - 1; -#endif irq = 0; spin_lock_irqsave(&vector_lock, flags); diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 001772ffc91..ccf6c1bf712 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -272,20 +272,12 @@ int show_interrupts(struct seq_file *p, void *v) struct irq_desc *desc = NULL; int tail = 0; -#ifdef CONFIG_HAVE_SPARSE_IRQ - desc = (struct irq_desc *)v; - entries = -1U; - i = desc->irq; - if (!desc->next) - tail = 1; -#else entries = nr_irqs - 1; i = *(loff_t *) v; if (i == nr_irqs) tail = 1; else desc = irq_to_desc(i); -#endif if (i == 0) { seq_printf(p, " "); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index ec266109128..21f53b91111 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -77,20 +77,12 @@ int show_interrupts(struct seq_file *p, void *v) struct irq_desc *desc = NULL; int tail = 0; -#ifdef CONFIG_HAVE_SPARSE_IRQ - desc = (struct irq_desc *)v; - entries = -1U; - i = desc->irq; - if (!desc->next) - tail = 1; -#else entries = nr_irqs - 1; i = *(loff_t *) v; if (i == nr_irqs) tail = 1; else desc = irq_to_desc(i); -#endif if (i == 0) { seq_printf(p, " "); -- cgit v1.2.3 From ee32c9732244bde4b9b59eeac2814c23e2b71f8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 14:34:09 +0200 Subject: genirq: remove irq_to_desc_alloc Remove the leftover of sparseirqs. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic.c | 6 +----- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/kernel/irqinit_64.c | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 683610517d2..e03bc0f87ee 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1257,11 +1257,7 @@ static void ioapic_register_intr(int irq, unsigned long trigger) { struct irq_desc *desc; - /* first time to use this irq_desc */ - if (irq < 16) - desc = irq_to_desc(irq); - else - desc = irq_to_desc_alloc(irq); + desc = irq_to_desc(irq); if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || trigger == IOAPIC_LEVEL) diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 9092103a18e..a8d35998d30 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -70,7 +70,7 @@ void __init init_ISA_irqs (void) */ for (i = 0; i < 16; i++) { /* first time call this irq_desc */ - struct irq_desc *desc = irq_to_desc_alloc(i); + struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; desc->action = NULL; diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index d17fbc26d96..ff023539128 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -144,7 +144,7 @@ void __init init_ISA_irqs(void) for (i = 0; i < 16; i++) { /* first time call this irq_desc */ - struct irq_desc *desc = irq_to_desc_alloc(i); + struct irq_desc *desc = irq_to_desc(i); desc->status = IRQ_DISABLED; desc->action = NULL; -- cgit v1.2.3 From d6c88a507ef0b6afdb013cba4e7804ba7324d99a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Oct 2008 15:27:23 +0200 Subject: genirq: revert dynarray Revert the dynarray changes. They need more thought and polishing. Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 1 - arch/x86/kernel/io_apic.c | 199 +++++++++++++++------------------------ arch/x86/kernel/setup_percpu.c | 8 +- arch/x86/kernel/visws_quirks.c | 2 +- arch/x86/kernel/vmlinux_32.lds.S | 1 - arch/x86/kernel/vmlinux_64.lds.S | 2 - arch/x86/xen/spinlock.c | 2 +- 7 files changed, 78 insertions(+), 137 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8636ddf2f4a..8da6123a60d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -33,7 +33,6 @@ config X86 select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS - select HAVE_DYN_ARRAY config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index e03bc0f87ee..6f80dc2f137 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -107,7 +107,6 @@ static int __init parse_noapic(char *str) } early_param("noapic", parse_noapic); -struct irq_cfg; struct irq_pin_list; struct irq_cfg { unsigned int irq; @@ -120,7 +119,7 @@ struct irq_cfg { }; /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ -static struct irq_cfg irq_cfg_legacy[] __initdata = { +static struct irq_cfg irq_cfgx[NR_IRQS] = { [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, @@ -139,48 +138,26 @@ static struct irq_cfg irq_cfg_legacy[] __initdata = { [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, }; -static struct irq_cfg irq_cfg_init = { .irq = -1U, }; - -static void init_one_irq_cfg(struct irq_cfg *cfg) -{ - memcpy(cfg, &irq_cfg_init, sizeof(struct irq_cfg)); -} - -static struct irq_cfg *irq_cfgx; - -static void __init init_work(void *data) -{ - struct dyn_array *da = data; - struct irq_cfg *cfg; - int legacy_count; - int i; - - cfg = *da->name; - - memcpy(cfg, irq_cfg_legacy, sizeof(irq_cfg_legacy)); - - legacy_count = ARRAY_SIZE(irq_cfg_legacy); - for (i = legacy_count; i < *da->nr; i++) - init_one_irq_cfg(&cfg[i]); -} - #define for_each_irq_cfg(irq, cfg) \ - for (irq = 0, cfg = &irq_cfgx[irq]; irq < nr_irqs; irq++, cfg = &irq_cfgx[irq]) + for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) -DEFINE_DYN_ARRAY(irq_cfgx, sizeof(struct irq_cfg), nr_irqs, PAGE_SIZE, init_work); - -struct irq_cfg *irq_cfg(unsigned int irq) +static struct irq_cfg *irq_cfg(unsigned int irq) { - if (irq < nr_irqs) - return &irq_cfgx[irq]; - - return NULL; + return irq < nr_irqs ? irq_cfgx + irq : NULL; } -struct irq_cfg *irq_cfg_alloc(unsigned int irq) + +static struct irq_cfg *irq_cfg_alloc(unsigned int irq) { return irq_cfg(irq); } +/* + * Rough estimation of how many shared IRQs there are, can be changed + * anytime. + */ +#define MAX_PLUS_SHARED_IRQS NR_IRQS +#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) + /* * This is performance-critical, we want to do it O(1) * @@ -193,59 +170,29 @@ struct irq_pin_list { struct irq_pin_list *next; }; -static struct irq_pin_list *irq_2_pin_head; -/* fill one page ? */ -static int nr_irq_2_pin = 0x100; +static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; static struct irq_pin_list *irq_2_pin_ptr; -static void __init irq_2_pin_init_work(void *data) + +static void __init irq_2_pin_init(void) { - struct dyn_array *da = data; - struct irq_pin_list *pin; + struct irq_pin_list *pin = irq_2_pin_head; int i; - pin = *da->name; - - for (i = 1; i < *da->nr; i++) + for (i = 1; i < PIN_MAP_SIZE; i++) pin[i-1].next = &pin[i]; irq_2_pin_ptr = &pin[0]; } -DEFINE_DYN_ARRAY(irq_2_pin_head, sizeof(struct irq_pin_list), nr_irq_2_pin, PAGE_SIZE, irq_2_pin_init_work); static struct irq_pin_list *get_one_free_irq_2_pin(void) { - struct irq_pin_list *pin; - int i; - - pin = irq_2_pin_ptr; - - if (pin) { - irq_2_pin_ptr = pin->next; - pin->next = NULL; - return pin; - } - - /* - * we run out of pre-allocate ones, allocate more - */ - printk(KERN_DEBUG "try to get more irq_2_pin %d\n", nr_irq_2_pin); - - if (after_bootmem) - pin = kzalloc(sizeof(struct irq_pin_list)*nr_irq_2_pin, - GFP_ATOMIC); - else - pin = __alloc_bootmem_nopanic(sizeof(struct irq_pin_list) * - nr_irq_2_pin, PAGE_SIZE, 0); + struct irq_pin_list *pin = irq_2_pin_ptr; if (!pin) panic("can not get more irq_2_pin\n"); - for (i = 1; i < nr_irq_2_pin; i++) - pin[i-1].next = &pin[i]; - irq_2_pin_ptr = pin->next; pin->next = NULL; - return pin; } @@ -284,8 +231,9 @@ static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned i static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) { struct io_apic __iomem *io_apic = io_apic_base(apic); - if (sis_apic_bug) - writel(reg, &io_apic->index); + + if (sis_apic_bug) + writel(reg, &io_apic->index); writel(value, &io_apic->data); } @@ -1044,11 +992,11 @@ static int pin_2_irq(int idx, int apic, int pin) while (i < apic) irq += nr_ioapic_registers[i++]; irq += pin; - /* + /* * For MPS mode, so far only needed by ES7000 platform */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); + if (ioapic_renumber_irq) + irq = ioapic_renumber_irq(apic, irq); } #ifdef CONFIG_X86_32 @@ -1232,19 +1180,19 @@ static struct irq_chip ir_ioapic_chip; #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { - int apic, idx, pin; + int apic, idx, pin; - for (apic = 0; apic < nr_ioapics; apic++) { - for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { - idx = find_irq_entry(apic, pin, mp_INT); - if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) - return irq_trigger(idx); - } - } - /* + for (apic = 0; apic < nr_ioapics; apic++) { + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { + idx = find_irq_entry(apic, pin, mp_INT); + if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin))) + return irq_trigger(idx); + } + } + /* * nonexistent IRQs are edge default */ - return 0; + return 0; } #else static inline int IO_APIC_irq_trigger(int irq) @@ -1509,8 +1457,8 @@ __apicdebuginit(void) print_IO_APIC(void) reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); - if (reg_01.bits.version >= 0x20) - reg_03.raw = io_apic_read(apic, 3); + if (reg_01.bits.version >= 0x20) + reg_03.raw = io_apic_read(apic, 3); spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); @@ -2089,9 +2037,9 @@ static int ioapic_retrigger_irq(unsigned int irq) #else static int ioapic_retrigger_irq(unsigned int irq) { - send_IPI_self(irq_cfg(irq)->vector); + send_IPI_self(irq_cfg(irq)->vector); - return 1; + return 1; } #endif @@ -2189,7 +2137,7 @@ static int migrate_irq_remapped_level(int irq) if (io_apic_level_ack_pending(irq)) { /* - * Interrupt in progress. Migrating irq now will change the + * Interrupt in progress. Migrating irq now will change the * vector information in the IO-APIC RTE and that will confuse * the EOI broadcast performed by cpu. * So, delay the irq migration to the next instance. @@ -2426,28 +2374,28 @@ static void ack_apic_level(unsigned int irq) } static struct irq_chip ioapic_chip __read_mostly = { - .name = "IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_apic_edge, - .eoi = ack_apic_level, + .name = "IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_apic_edge, + .eoi = ack_apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ioapic_affinity_irq, + .set_affinity = set_ioapic_affinity_irq, #endif .retrigger = ioapic_retrigger_irq, }; #ifdef CONFIG_INTR_REMAP static struct irq_chip ir_ioapic_chip __read_mostly = { - .name = "IR-IO-APIC", - .startup = startup_ioapic_irq, - .mask = mask_IO_APIC_irq, - .unmask = unmask_IO_APIC_irq, - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .name = "IR-IO-APIC", + .startup = startup_ioapic_irq, + .mask = mask_IO_APIC_irq, + .unmask = unmask_IO_APIC_irq, + .ack = ack_x2apic_edge, + .eoi = ack_x2apic_level, #ifdef CONFIG_SMP - .set_affinity = set_ir_ioapic_affinity_irq, + .set_affinity = set_ir_ioapic_affinity_irq, #endif .retrigger = ioapic_retrigger_irq, }; @@ -2636,8 +2584,8 @@ static inline void __init check_timer(void) local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2822,12 +2770,12 @@ void __init setup_IO_APIC(void) io_apic_irqs = ~PIC_IRQS; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); - /* + /* * Set up IO-APIC IRQ routing. */ #ifdef CONFIG_X86_32 - if (!acpi_ioapic) - setup_ioapic_ids_from_mpc(); + if (!acpi_ioapic) + setup_ioapic_ids_from_mpc(); #endif sync_Arb_IDs(); setup_IO_APIC_irqs(); @@ -2842,9 +2790,9 @@ void __init setup_IO_APIC(void) static int __init io_apic_bug_finalize(void) { - if (sis_apic_bug == -1) - sis_apic_bug = 0; - return 0; + if (sis_apic_bug == -1) + sis_apic_bug = 0; + return 0; } late_initcall(io_apic_bug_finalize); @@ -3199,7 +3147,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) if (index < 0) { printk(KERN_ERR "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); + pci_name(dev)); return -ENOSPC; } return index; @@ -3885,23 +3833,24 @@ static struct resource * __init ioapic_setup_resources(void) void __init ioapic_init_mappings(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; - int i; struct resource *ioapic_res; + int i; + irq_2_pin_init(); ioapic_res = ioapic_setup_resources(); for (i = 0; i < nr_ioapics; i++) { if (smp_found_config) { ioapic_phys = mp_ioapics[i].mp_apicaddr; #ifdef CONFIG_X86_32 - if (!ioapic_phys) { - printk(KERN_ERR - "WARNING: bogus zero IO-APIC " - "address found in MPTABLE, " - "disabling IO/APIC support!\n"); - smp_found_config = 0; - skip_ioapic_setup = 1; - goto fake_ioapic_page; - } + if (!ioapic_phys) { + printk(KERN_ERR + "WARNING: bogus zero IO-APIC " + "address found in MPTABLE, " + "disabling IO/APIC support!\n"); + smp_found_config = 0; + skip_ioapic_setup = 1; + goto fake_ioapic_page; + } #endif } else { #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 2b7dab699e8..410c88f0bfe 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -140,7 +140,7 @@ static void __init setup_cpu_pda_map(void) */ void __init setup_per_cpu_areas(void) { - ssize_t size, old_size, da_size; + ssize_t size, old_size; char *ptr; int cpu; unsigned long align = 1; @@ -150,9 +150,8 @@ void __init setup_per_cpu_areas(void) /* Copy section for each CPU (we discard the original) */ old_size = PERCPU_ENOUGH_ROOM; - da_size = per_cpu_dyn_array_size(&align); align = max_t(unsigned long, PAGE_SIZE, align); - size = roundup(old_size + da_size, align); + size = roundup(old_size, align); printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", size); @@ -182,9 +181,6 @@ void __init setup_per_cpu_areas(void) #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); - - per_cpu_alloc_dyn_array(cpu, ptr + old_size); - } printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 817aa55a120..0c9667f0752 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -633,7 +633,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) /* * handle this 'virtual interrupt' as a Cobalt one now. */ - kstat_irqs_this_cpu(desc)++; + kstat_incr_irqs_this_cpu(realirq, desc); if (likely(desc->action != NULL)) handle_IRQ_event(realirq, desc->action); diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index c36007ab394..a9b8560adbc 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S @@ -145,7 +145,6 @@ SECTIONS *(.x86_cpu_dev.init) __x86_cpu_dev_end = .; } - DYN_ARRAY_INIT(8) SECURITY_INIT . = ALIGN(4); .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 30973dbac8c..3245ad72594 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -174,8 +174,6 @@ SECTIONS } __x86_cpu_dev_end = .; - DYN_ARRAY_INIT(8) - SECURITY_INIT . = ALIGN(8); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index bb6bc721b13..5601506f2dd 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -241,7 +241,7 @@ static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enabl ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ - kstat_irqs_this_cpu(irq_to_desc(irq))++; + kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); out: raw_local_irq_restore(flags); -- cgit v1.2.3 From a1aca5de08a0cb840a90fb3f729a5940f8d21185 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 15 Oct 2008 19:29:15 +0200 Subject: genirq: remove artifacts from sparseirq removal Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/irqinit_32.c | 1 - arch/x86/kernel/vmlinux_64.lds.S | 1 - arch/x86/mm/init_32.c | 3 --- 4 files changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5fef4fece4a..0d1c26a583c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1254,7 +1254,6 @@ static int __init acpi_parse_madt_ioapic_entries(void) return count; } - count = acpi_table_parse_madt(ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, nr_irqs); diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index a8d35998d30..845aa9803e8 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -184,4 +184,3 @@ void __init native_init_IRQ(void) irq_ctx_init(smp_processor_id()); } - diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 3245ad72594..46e05447405 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S @@ -173,7 +173,6 @@ SECTIONS *(.x86_cpu_dev.init) } __x86_cpu_dev_end = .; - SECURITY_INIT . = ALIGN(8); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 91343c2694b..8396868e82c 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -66,7 +66,6 @@ static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; static int __initdata after_init_bootmem; -int after_bootmem; static __init void *alloc_low_page(unsigned long *phys) { @@ -989,8 +988,6 @@ void __init mem_init(void) set_highmem_pages_init(); - after_bootmem = 1; - codesize = (unsigned long) &_etext - (unsigned long) &_text; datasize = (unsigned long) &_edata - (unsigned long) &_etext; initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; -- cgit v1.2.3 From c0c168ca26b54a4a6ad34fc813fe00f275fbc94c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 11:15:28 +0200 Subject: x86: cleanup show_interrupts The sparseirq patches introduced some more ugliness in show_interrupts(). Clean it up all together and make the code easier to read by splitting out the "tail" function which prints the special interrupts. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq_32.c | 160 +++++++++++++++++++++++------------------------ arch/x86/kernel/irq_64.c | 153 ++++++++++++++++++++++---------------------- 2 files changed, 154 insertions(+), 159 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index ccf6c1bf712..8d525765a6c 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -263,22 +263,67 @@ atomic_t irq_err_count; * /proc/interrupts printing: */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", nmi_count(j)); + seq_printf(p, " Non-maskable interrupts\n"); +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { + unsigned long flags, any_count = 0; int i = *(loff_t *) v, j; - struct irqaction * action; - unsigned long flags; - unsigned int entries; - struct irq_desc *desc = NULL; - int tail = 0; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; - entries = nr_irqs - 1; - i = *(loff_t *) v; if (i == nr_irqs) - tail = 1; - else - desc = irq_to_desc(i); + return show_other_interrupts(p); + /* print header */ if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) @@ -286,88 +331,37 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i <= entries) { - unsigned any_count = 0; - - spin_lock_irqsave(&desc->lock, flags); + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP - any_count = kstat_irqs(i); + any_count = kstat_irqs(i); #else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); #endif - action = desc->action; - if (!action && !any_count) - goto skip; - seq_printf(p, "%3d: ", i); + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); + seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); } - if (tail) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_printf(p, " Non-maskable interrupts\n"); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#endif -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); -#endif -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", - per_cpu(irq_stat,j).irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - } + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); return 0; } diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 21f53b91111..4f374294f29 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -68,22 +68,65 @@ static inline void stack_overflow_check(struct pt_regs *regs) * Generic, controller-independent functions: */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); +#endif + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + + return 0; +} + int show_interrupts(struct seq_file *p, void *v) { - int i, j; - struct irqaction * action; - unsigned long flags; - unsigned int entries; - struct irq_desc *desc = NULL; - int tail = 0; - - entries = nr_irqs - 1; - i = *(loff_t *) v; + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; + if (i == nr_irqs) - tail = 1; - else - desc = irq_to_desc(i); + return show_other_interrupts(p); + /* print header */ if (i == 0) { seq_printf(p, " "); for_each_online_cpu(j) @@ -91,79 +134,37 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - if (i <= entries) { - unsigned any_count = 0; - - spin_lock_irqsave(&desc->lock, flags); + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); #ifndef CONFIG_SMP - any_count = kstat_irqs(i); + any_count = kstat_irqs(i); #else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); #endif - action = desc->action; - if (!action && !any_count) - goto skip; - seq_printf(p, "%3d: ", i); + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); #ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); + seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); #endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - seq_putc(p, '\n'); -skip: - spin_unlock_irqrestore(&desc->lock, flags); - } - - if (tail) { - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_printf(p, " Non-maskable interrupts\n"); - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); - seq_printf(p, "THR: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); - seq_printf(p, " Threshold APIC interrupts\n"); -#endif - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); } + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); return 0; } -- cgit v1.2.3 From 6b39ba771e3c78d00e0abcebad270bd4212b28bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 11:32:24 +0200 Subject: x86: unify show_interrupts() and proc helpers show_interrupts() and proc helpers are basically the same for 32 and 64 bit. Move them to a shared source file. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/irq.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 146 ----------------------------------------- arch/x86/kernel/irq_64.c | 132 ------------------------------------- 4 files changed, 167 insertions(+), 279 deletions(-) create mode 100644 arch/x86/kernel/irq.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cb6ade443f0..d7e5a58ee22 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -23,7 +23,7 @@ CFLAGS_hpet.o := $(nostackp) CFLAGS_tsc.o := $(nostackp) obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o -obj-y += traps.o irq_$(BITS).o dumpstack_$(BITS).o +obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time_$(BITS).o ioport.o ldt.o obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c new file mode 100644 index 00000000000..3b912849897 --- /dev/null +++ b/arch/x86/kernel/irq.c @@ -0,0 +1,166 @@ +/* + * Common interrupt code for 32 and 64 bit + */ +#include +#include +#include +#include + +#include +#include +#include + +atomic_t irq_err_count; + +#ifdef CONFIG_X86_32 +# define irq_stats(x) (&per_cpu(irq_stat,x)) +#else +# define irq_stats(x) cpu_pda(x) +#endif +/* + * /proc/interrupts printing: + */ +static int show_other_interrupts(struct seq_file *p) +{ + int j; + + seq_printf(p, "NMI: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->__nmi_count); + seq_printf(p, " Non-maskable interrupts\n"); +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "LOC: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs); + seq_printf(p, " Local timer interrupts\n"); +#endif +#ifdef CONFIG_SMP + seq_printf(p, "RES: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count); + seq_printf(p, " Rescheduling interrupts\n"); + seq_printf(p, "CAL: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_call_count); + seq_printf(p, " Function call interrupts\n"); + seq_printf(p, "TLB: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); + seq_printf(p, " TLB shootdowns\n"); +#endif +#ifdef CONFIG_X86_MCE + seq_printf(p, "TRM: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); + seq_printf(p, " Thermal event interrupts\n"); +# ifdef CONFIG_X86_64 + seq_printf(p, "THR: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); + seq_printf(p, " Threshold APIC interrupts\n"); +# endif +#endif +#ifdef CONFIG_X86_LOCAL_APIC + seq_printf(p, "SPU: "); + for_each_online_cpu(j) + seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); + seq_printf(p, " Spurious interrupts\n"); +#endif + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); +#if defined(CONFIG_X86_IO_APIC) + seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); +#endif + return 0; +} + +int show_interrupts(struct seq_file *p, void *v) +{ + unsigned long flags, any_count = 0; + int i = *(loff_t *) v, j; + struct irqaction *action; + struct irq_desc *desc; + + if (i > nr_irqs) + return 0; + + if (i == nr_irqs) + return show_other_interrupts(p); + + /* print header */ + if (i == 0) { + seq_printf(p, " "); + for_each_online_cpu(j) + seq_printf(p, "CPU%-8d",j); + seq_putc(p, '\n'); + } + + desc = irq_to_desc(i); + spin_lock_irqsave(&desc->lock, flags); +#ifndef CONFIG_SMP + any_count = kstat_irqs(i); +#else + for_each_online_cpu(j) + any_count |= kstat_irqs_cpu(i, j); +#endif + action = desc->action; + if (!action && !any_count) + goto out; + + seq_printf(p, "%3d: ", i); +#ifndef CONFIG_SMP + seq_printf(p, "%10u ", kstat_irqs(i)); +#else + for_each_online_cpu(j) + seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); +#endif + seq_printf(p, " %8s", desc->chip->name); + seq_printf(p, "-%-8s", desc->name); + + if (action) { + seq_printf(p, " %s", action->name); + while ((action = action->next) != NULL) + seq_printf(p, ", %s", action->name); + } + + seq_putc(p, '\n'); +out: + spin_unlock_irqrestore(&desc->lock, flags); + return 0; +} + +/* + * /proc/stat helpers + */ +u64 arch_irq_stat_cpu(unsigned int cpu) +{ + u64 sum = irq_stats(cpu)->__nmi_count; + +#ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->apic_timer_irqs; +#endif +#ifdef CONFIG_SMP + sum += irq_stats(cpu)->irq_resched_count; + sum += irq_stats(cpu)->irq_call_count; + sum += irq_stats(cpu)->irq_tlb_count; +#endif +#ifdef CONFIG_X86_MCE + sum += irq_stats(cpu)->irq_thermal_count; +# ifdef CONFIG_X86_64 + sum += irq_stats(cpu)->irq_threshold_count; +#endif +#endif +#ifdef CONFIG_X86_LOCAL_APIC + sum += irq_stats(cpu)->irq_spurious_count; +#endif + return sum; +} + +u64 arch_irq_stat(void) +{ + u64 sum = atomic_read(&irq_err_count); + +#ifdef CONFIG_X86_IO_APIC + sum += atomic_read(&irq_mis_count); +#endif + return sum; +} diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 8d525765a6c..6d9bf3936c7 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -253,152 +253,6 @@ unsigned int do_IRQ(struct pt_regs *regs) return 1; } -/* - * Interrupt statistics: - */ - -atomic_t irq_err_count; - -/* - * /proc/interrupts printing: - */ - -static int show_other_interrupts(struct seq_file *p) -{ - int j; - - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", nmi_count(j)); - seq_printf(p, " Non-maskable interrupts\n"); -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#endif -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); -#endif -#ifdef CONFIG_X86_LOCAL_APIC - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); -#endif - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); -#if defined(CONFIG_X86_IO_APIC) - seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count)); -#endif - return 0; -} - -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j; - struct irqaction *action; - struct irq_desc *desc; - - if (i > nr_irqs) - return 0; - - if (i == nr_irqs) - return show_other_interrupts(p); - - /* print header */ - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - spin_lock_irqsave(&desc->lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); -#endif - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = nmi_count(cpu); - -#ifdef CONFIG_X86_LOCAL_APIC - sum += per_cpu(irq_stat, cpu).apic_timer_irqs; -#endif -#ifdef CONFIG_SMP - sum += per_cpu(irq_stat, cpu).irq_resched_count; - sum += per_cpu(irq_stat, cpu).irq_call_count; - sum += per_cpu(irq_stat, cpu).irq_tlb_count; -#endif -#ifdef CONFIG_X86_MCE - sum += per_cpu(irq_stat, cpu).irq_thermal_count; -#endif -#ifdef CONFIG_X86_LOCAL_APIC - sum += per_cpu(irq_stat, cpu).irq_spurious_count; -#endif - return sum; -} - -u64 arch_irq_stat(void) -{ - u64 sum = atomic_read(&irq_err_count); - -#ifdef CONFIG_X86_IO_APIC - sum += atomic_read(&irq_mis_count); -#endif - return sum; -} - #ifdef CONFIG_HOTPLUG_CPU #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 4f374294f29..39ef7feb9ea 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,8 +18,6 @@ #include #include -atomic_t irq_err_count; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -64,136 +62,6 @@ static inline void stack_overflow_check(struct pt_regs *regs) } #endif -/* - * Generic, controller-independent functions: - */ - -static int show_other_interrupts(struct seq_file *p) -{ - int j; - - seq_printf(p, "NMI: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); - seq_printf(p, " Non-maskable interrupts\n"); - seq_printf(p, "LOC: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); - seq_printf(p, " Local timer interrupts\n"); -#ifdef CONFIG_SMP - seq_printf(p, "RES: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_resched_count); - seq_printf(p, " Rescheduling interrupts\n"); - seq_printf(p, "CAL: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " Function call interrupts\n"); - seq_printf(p, "TLB: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); - seq_printf(p, " TLB shootdowns\n"); -#endif -#ifdef CONFIG_X86_MCE - seq_printf(p, "TRM: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_thermal_count); - seq_printf(p, " Thermal event interrupts\n"); - seq_printf(p, "THR: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_threshold_count); - seq_printf(p, " Threshold APIC interrupts\n"); -#endif - seq_printf(p, "SPU: "); - for_each_online_cpu(j) - seq_printf(p, "%10u ", cpu_pda(j)->irq_spurious_count); - seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); - - return 0; -} - -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j; - struct irqaction *action; - struct irq_desc *desc; - - if (i > nr_irqs) - return 0; - - if (i == nr_irqs) - return show_other_interrupts(p); - - /* print header */ - if (i == 0) { - seq_printf(p, " "); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d",j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - spin_lock_irqsave(&desc->lock, flags); -#ifndef CONFIG_SMP - any_count = kstat_irqs(i); -#else - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); -#endif - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%3d: ", i); -#ifndef CONFIG_SMP - seq_printf(p, "%10u ", kstat_irqs(i)); -#else - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); -#endif - seq_printf(p, " %8s", desc->chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - -/* - * /proc/stat helpers - */ -u64 arch_irq_stat_cpu(unsigned int cpu) -{ - u64 sum = cpu_pda(cpu)->__nmi_count; - - sum += cpu_pda(cpu)->apic_timer_irqs; -#ifdef CONFIG_SMP - sum += cpu_pda(cpu)->irq_resched_count; - sum += cpu_pda(cpu)->irq_call_count; - sum += cpu_pda(cpu)->irq_tlb_count; -#endif -#ifdef CONFIG_X86_MCE - sum += cpu_pda(cpu)->irq_thermal_count; - sum += cpu_pda(cpu)->irq_threshold_count; -#endif - sum += cpu_pda(cpu)->irq_spurious_count; - return sum; -} - -u64 arch_irq_stat(void) -{ - return atomic_read(&irq_err_count); -} - /* * do_IRQ handles all normal device IRQ's (the special * SMP cross-CPU interrupts have their own specific -- cgit v1.2.3 From 249f6d9eab372790579ada8991bba3384c204e06 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Oct 2008 12:18:50 +0200 Subject: x86: move ack_bad_irq() to irq.c Share more duplicated code. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/irq.c | 23 +++++++++++++++++++++++ arch/x86/kernel/irq_32.c | 23 ----------------------- arch/x86/kernel/irq_64.c | 20 -------------------- 3 files changed, 23 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 3b912849897..ccf6c503fc3 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -12,6 +12,29 @@ atomic_t irq_err_count; +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); + +#ifdef CONFIG_X86_LOCAL_APIC + /* + * Currently unexpected vectors happen only on SMP and APIC. + * We _must_ ack these because every local APIC has only N + * irq slots per priority level, and a 'hanging, unacked' IRQ + * holds up an irq slot - in excessive cases (when multiple + * unexpected vectors occur) that might lock up the APIC + * completely. + * But only ack when the APIC is enabled -AK + */ + if (cpu_has_apic) + ack_APIC_irq(); +#endif +} + #ifdef CONFIG_X86_32 # define irq_stats(x) (&per_cpu(irq_stat,x)) #else diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 6d9bf3936c7..a51382672de 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -25,29 +25,6 @@ EXPORT_PER_CPU_SYMBOL(irq_stat); DEFINE_PER_CPU(struct pt_regs *, irq_regs); EXPORT_PER_CPU_SYMBOL(irq_regs); -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); - -#ifdef CONFIG_X86_LOCAL_APIC - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But only ack when the APIC is enabled -AK - */ - if (cpu_has_apic) - ack_APIC_irq(); -#endif -} - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ static int check_stack_overflow(void) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 39ef7feb9ea..60eb84eb77a 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -18,26 +18,6 @@ #include #include -/* - * 'what should we do if we get a hw irq event on an illegal vector'. - * each architecture has to answer this themselves. - */ -void ack_bad_irq(unsigned int irq) -{ - printk(KERN_WARNING "unexpected IRQ trap at vector %02x\n", irq); - /* - * Currently unexpected vectors happen only on SMP and APIC. - * We _must_ ack these because every local APIC has only N - * irq slots per priority level, and a 'hanging, unacked' IRQ - * holds up an irq slot - in excessive cases (when multiple - * unexpected vectors occur) that might lock up the APIC - * completely. - * But don't ack when the APIC is disabled. -AK - */ - if (!disable_apic) - ack_APIC_irq(); -} - #ifdef CONFIG_DEBUG_STACKOVERFLOW /* * Probabilistic stack overflow check: -- cgit v1.2.3 From 10e0298686be6249b0665465737a6b83446c4d35 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 16 Oct 2008 17:04:22 +0200 Subject: io_apic: make irq_mis_count available on 64-bit too Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 6f80dc2f137..b764d7429c6 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2277,9 +2277,7 @@ static void ack_apic_edge(unsigned int irq) ack_APIC_irq(); } -#ifdef CONFIG_X86_32 atomic_t irq_mis_count; -#endif static void ack_apic_level(unsigned int irq) { -- cgit v1.2.3 From 3038edabf48f01421c621cb77a712b446d3a5d67 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Oct 2008 01:26:27 +0200 Subject: x86 ACPI: fix breakage of resume on 64-bit UP systems with SMP kernel x86 ACPI: Fix breakage of resume on 64-bit UP systems with SMP kernel We are now using per CPU GDT tables in head_64.S and the original early_gdt_descr.address is invalidated after boot by setup_per_cpu_areas(). This breaks resume from suspend to RAM on x86_64 UP systems using SMP kernels, because this part of head_64.S is also executed during the resume and the invalid GDT address causes the system to crash. It doesn't break on 'true' SMP systems, because early_gdt_descr.address is modified every time native_cpu_up() runs. However, during resume it should point to the GDT of the boot CPU rather than to another CPU's GDT. For this reason, during suspend to RAM always make early_gdt_descr.address point to the boot CPU's GDT. This fixes http://bugzilla.kernel.org/show_bug.cgi?id=11568, which is a regression from 2.6.26. Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Cc: Signed-off-by: Ingo Molnar Reported-and-tested-by: Andy Wettstein --- arch/x86/kernel/acpi/sleep.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 426e5d91b63..c44cd6dbfa1 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "realmode/wakeup.h" #include "sleep.h" @@ -98,6 +99,8 @@ int acpi_save_state_mem(void) header->trampoline_segment = setup_trampoline() >> 4; #ifdef CONFIG_SMP stack_start.sp = temp_stack + 4096; + early_gdt_descr.address = + (unsigned long)get_cpu_gdt_table(smp_processor_id()); #endif initial_code = (unsigned long)wakeup_long64; saved_magic = 0x123456789abcdef0; -- cgit v1.2.3 From d768cb6929773060171eee8397a63883f60ddc07 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 25 Aug 2008 15:44:59 -0600 Subject: x86/PCI: follow lspci device/vendor style Use "[%04x:%04x]" for PCI vendor/device IDs to follow the format used by lspci(8). Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 006599db0dc..52a1de1128c 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -493,7 +493,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq if (pirq <= 4) irq = read_config_nybble(router, 0x56, pirq - 1); dev_info(&dev->dev, - "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", + "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n", dev->vendor, dev->device, pirq, irq); return irq; } @@ -501,7 +501,7 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) { dev_info(&dev->dev, - "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", + "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n", dev->vendor, dev->device, pirq, irq); if (pirq <= 4) write_config_nybble(router, 0x56, pirq - 1, irq); @@ -823,7 +823,7 @@ static void __init pirq_find_router(struct irq_router *r) r->get = NULL; r->set = NULL; - DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n", + DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n", rt->rtr_vendor, rt->rtr_device); pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn); @@ -843,7 +843,7 @@ static void __init pirq_find_router(struct irq_router *r) h->probe(r, pirq_router_dev, pirq_router_dev->device)) break; } - dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", + dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n", pirq_router.name, pirq_router_dev->vendor, pirq_router_dev->device); -- cgit v1.2.3 From 37a84ec668ba251ae02cf2c2c664baf6b247ae1f Mon Sep 17 00:00:00 2001 From: Seth Heasley Date: Thu, 28 Aug 2008 15:40:59 -0700 Subject: x86/PCI: irq and pci_ids patch for Intel Ibex Peak DeviceIDs This patch updates the Intel Ibex Peak (PCH) LPC and SMBus Controller DeviceIDs. The LPC Controller ID is set by Firmware within the range of 0x3b00-3b1f. This range is included in pci_ids.h using min and max values, and irq.c now has code to handle the range (in lieu of 32 additions to a SWITCH statement). The SMBus Controller ID is a fixed-value and will not change. Signed-off-by: Seth Heasley Acked-by: Jean Delvare Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 52a1de1128c..bf69dbe08bf 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -590,13 +590,20 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_PCH_0: - case PCI_DEVICE_ID_INTEL_PCH_1: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; return 1; } + + if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) { + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } + return 0; } -- cgit v1.2.3 From f4432c5caec5fa95ea7eefd00f8e6cee17e2e023 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Mon, 20 Oct 2008 13:31:45 -0400 Subject: Update email addresses. Update assorted email addresses and related info to point to a single current, valid address. additionally - trivial CREDITS entry updates. (Not that this file means much any more) - remove arjans dead redhat.com address from powernow driver Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/cpufreq/longhaul.c | 4 ++-- arch/x86/kernel/cpu/cpufreq/powernow-k6.c | 2 +- arch/x86/kernel/cpu/cpufreq/powernow-k7.c | 4 ++-- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | 2 +- arch/x86/kernel/cpu/mcheck/k7.c | 4 ++-- arch/x86/kernel/cpu/mcheck/mce_32.c | 2 +- arch/x86/kernel/cpu/mcheck/non-fatal.c | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index 06fcce516d5..b0461856acf 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c @@ -1,5 +1,5 @@ /* - * (C) 2001-2004 Dave Jones. + * (C) 2001-2004 Dave Jones. * (C) 2002 Padraig Brady. * * Licensed under the terms of the GNU GPL License version 2. @@ -1019,7 +1019,7 @@ MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor"); module_param(revid_errata, int, 0644); MODULE_PARM_DESC(revid_errata, "Ignore CPU Revision ID"); -MODULE_AUTHOR ("Dave Jones "); +MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index b5ced806a31..c1ac5790c63 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -246,7 +246,7 @@ static void __exit powernow_k6_exit(void) } -MODULE_AUTHOR("Arjan van de Ven , Dave Jones , Dominik Brodowski "); +MODULE_AUTHOR("Arjan van de Ven, Dave Jones , Dominik Brodowski "); MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors."); MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 0a61159d7b7..7c7d56b4313 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -1,6 +1,6 @@ /* * AMD K7 Powernow driver. - * (C) 2003 Dave Jones on behalf of SuSE Labs. + * (C) 2003 Dave Jones on behalf of SuSE Labs. * (C) 2003-2004 Dave Jones * * Licensed under the terms of the GNU GPL License version 2. @@ -692,7 +692,7 @@ static void __exit powernow_exit (void) module_param(acpi_force, int, 0444); MODULE_PARM_DESC(acpi_force, "Force ACPI to be used."); -MODULE_AUTHOR ("Dave Jones "); +MODULE_AUTHOR ("Dave Jones "); MODULE_DESCRIPTION ("Powernow driver for AMD K7 processors."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 84bb395038d..008d23ba491 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -7,7 +7,7 @@ * Support : mark.langsdorf@amd.com * * Based on the powernow-k7.c module written by Dave Jones. - * (C) 2003 Dave Jones on behalf of SuSE Labs + * (C) 2003 Dave Jones on behalf of SuSE Labs * (C) 2004 Dominik Brodowski * (C) 2004 Pavel Machek * Licensed under the terms of the GNU GPL License version 2. diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 191f7263c61..04d0376b64b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -431,7 +431,7 @@ static void __exit speedstep_exit(void) } -MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); +MODULE_AUTHOR ("Dave Jones , Dominik Brodowski "); MODULE_DESCRIPTION ("Speedstep driver for Intel mobile processors on chipsets with ICH-M southbridges."); MODULE_LICENSE ("GPL"); diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index f390c9f6635..dd3af6e7b39 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c @@ -1,6 +1,6 @@ /* - * Athlon/Hammer specific Machine Check Exception Reporting - * (C) Copyright 2002 Dave Jones + * Athlon specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones */ #include diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c index 774d87cfd8c..0ebf3fc6a61 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ b/arch/x86/kernel/cpu/mcheck/mce_32.c @@ -1,6 +1,6 @@ /* * mce.c - x86 Machine Check Exception Reporting - * (c) 2002 Alan Cox , Dave Jones + * (c) 2002 Alan Cox , Dave Jones */ #include diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index cc1fccdd31e..a74af128efc 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c @@ -1,7 +1,7 @@ /* * Non Fatal Machine Check Exception Reporting * - * (C) Copyright 2002 Dave Jones. + * (C) Copyright 2002 Dave Jones. * * This file contains routines to check for non-fatal MCEs every 15s * -- cgit v1.2.3