From cfb361f13c8136de78c406745abc4e4456e6d480 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 18 Sep 2008 15:49:14 +0800 Subject: [IA64] utrace syscall.h support for ia64 Add asm/syscall.h for IA64. Utrace requires this. Signed-off-by: Shaohua Li Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 2a9943b5947..12b1e9f0b7a 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2199,3 +2199,68 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) #endif return &user_ia64_view; } + +struct syscall_get_set_args { + unsigned int i; + unsigned int n; + unsigned long *args; + struct pt_regs *regs; + int rw; +}; + +static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) +{ + struct syscall_get_set_args *args = data; + struct pt_regs *pt = args->regs; + unsigned long *krbs, cfm, ndirty; + int i, count; + + if (unw_unwind_to_user(info) < 0) + return; + + cfm = pt->cr_ifs; + krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + count = 0; + if (in_syscall(pt)) + count = min_t(int, args->n, cfm & 0x7f); + + for (i = 0; i < count; i++) { + if (args->rw) + *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = + args->args[i]; + else + args->args[i] = *ia64_rse_skip_regs(krbs, + ndirty + i + args->i); + } + + if (!args->rw) { + while (i < args->n) { + args->args[i] = 0; + i++; + } + } +} + +void ia64_syscall_get_set_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned int i, unsigned int n, + unsigned long *args, int rw) +{ + struct syscall_get_set_args data = { + .i = i, + .n = n, + .args = args, + .regs = regs, + .rw = rw, + }; + + if (task == current) + unw_init_running(syscall_get_set_args_cb, &data); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, task); + syscall_get_set_args_cb(&ufi, &data); + } +} -- cgit v1.2.3 From f14488ccfe0f41207e40520fab60dce356ed9e57 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 6 Oct 2008 10:43:06 -0700 Subject: [IA64] utrace use generic trace hook Make IA64 use generic trace hook in some paths. Signed-off-by: Shaohua Li Signed-off-by: Tony Luck --- arch/ia64/kernel/entry.S | 5 +++++ arch/ia64/kernel/perfmon.c | 7 +++---- arch/ia64/kernel/process.c | 21 ++++++--------------- arch/ia64/kernel/ptrace.c | 47 ++++++++++++++-------------------------------- arch/ia64/kernel/signal.c | 8 ++++++++ 5 files changed, 36 insertions(+), 52 deletions(-) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 0dd6c1419d8..7ef0c594f5e 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -534,6 +534,11 @@ GLOBAL_ENTRY(ia64_trace_syscall) stf.spill [r16]=f10 stf.spill [r17]=f11 br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args + cmp.lt p6,p0=r8,r0 // check tracehook + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> adds r16=PT(F6)+16,sp adds r17=PT(F7)+16,sp ;; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index fc8f3509df2..ada4605d122 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -3684,7 +3685,7 @@ pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) PFM_SET_WORK_PENDING(task, 1); - tsk_set_notify_resume(task); + set_notify_resume(task); /* * XXX: send reschedule if task runs on another CPU @@ -5044,8 +5045,6 @@ pfm_handle_work(void) PFM_SET_WORK_PENDING(current, 0); - tsk_clear_notify_resume(current); - regs = task_pt_regs(current); /* @@ -5414,7 +5413,7 @@ pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, str * when coming from ctxsw, current still points to the * previous task, therefore we must work with task and not current. */ - tsk_set_notify_resume(task); + set_notify_resume(task); } /* * defer until state is changed (shorten spin window). the context is locked diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 3ab8373103e..341a0319a5b 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -160,21 +161,6 @@ show_regs (struct pt_regs *regs) show_stack(NULL, NULL); } -void tsk_clear_notify_resume(struct task_struct *tsk) -{ -#ifdef CONFIG_PERFMON - if (tsk->thread.pfm_needs_checking) - return; -#endif - if (test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_RSE)) - return; - clear_ti_thread_flag(task_thread_info(tsk), TIF_NOTIFY_RESUME); -} - -/* - * do_notify_resume_user(): - * Called from notify_resume_user at entry.S, with interrupts disabled. - */ void do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) { @@ -203,6 +189,11 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) ia64_do_signal(scr, in_syscall); } + if (test_thread_flag(TIF_NOTIFY_RESUME)) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(&scr->pt); + } + /* copy user rbs to kernel rbs */ if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) { local_irq_enable(); /* force interrupt enable */ diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 12b1e9f0b7a..92c9689b7d9 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -603,7 +604,7 @@ void ia64_ptrace_stop(void) { if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) return; - tsk_set_notify_resume(current); + set_notify_resume(current); unw_init_running(do_sync_rbs, ia64_sync_user_rbs); } @@ -613,7 +614,6 @@ void ia64_ptrace_stop(void) void ia64_sync_krbs(void) { clear_tsk_thread_flag(current, TIF_RESTORE_RSE); - tsk_clear_notify_resume(current); unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); } @@ -644,7 +644,7 @@ ptrace_attach_sync_user_rbs (struct task_struct *child) spin_lock_irq(&child->sighand->siglock); if (child->state == TASK_STOPPED && !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { - tsk_set_notify_resume(child); + set_notify_resume(child); child->state = TASK_TRACED; stopped = 1; @@ -1232,37 +1232,16 @@ arch_ptrace (struct task_struct *child, long request, long addr, long data) } -static void -syscall_trace (void) -{ - /* - * The 0x80 provides a way for the tracing parent to - * distinguish between a syscall stop and SIGTRAP delivery. - */ - ptrace_notify(SIGTRAP - | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); - - /* - * This isn't the same as continuing with a signal, but it - * will do for normal use. strace only continues with a - * signal if the stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} - /* "asmlinkage" so the input arguments are preserved... */ -asmlinkage void +asmlinkage long syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { - if (test_thread_flag(TIF_SYSCALL_TRACE) - && (current->ptrace & PT_PTRACED)) - syscall_trace(); + if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (tracehook_report_syscall_entry(®s)) + return -ENOSYS; /* copy user rbs to kernel rbs */ if (test_thread_flag(TIF_RESTORE_RSE)) @@ -1283,6 +1262,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, audit_syscall_entry(arch, syscall, arg0, arg1, arg2, arg3); } + return 0; } /* "asmlinkage" so the input arguments are preserved... */ @@ -1292,6 +1272,8 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, long arg4, long arg5, long arg6, long arg7, struct pt_regs regs) { + int step; + if (unlikely(current->audit_context)) { int success = AUDITSC_RESULT(regs.r10); long result = regs.r8; @@ -1301,10 +1283,9 @@ syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, audit_syscall_exit(success, result); } - if ((test_thread_flag(TIF_SYSCALL_TRACE) - || test_thread_flag(TIF_SINGLESTEP)) - && (current->ptrace & PT_PTRACED)) - syscall_trace(); + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall_exit(®s, step); /* copy user rbs to kernel rbs */ if (test_thread_flag(TIF_RESTORE_RSE)) @@ -1940,7 +1921,7 @@ gpregs_writeback(struct task_struct *target, { if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) return 0; - tsk_set_notify_resume(target); + set_notify_resume(target); return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, NULL, NULL); } diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 19c5a78636f..e12500a9c44 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -439,6 +440,13 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigse sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + /* + * Let tracing know that we've done the handler setup. + */ + tracehook_signal_handler(sig, info, ka, &scr->pt, + test_thread_flag(TIF_SINGLESTEP)); + return 1; } -- cgit v1.2.3 From 749da7912e1270abbbaef04112a7a78febf3f0f4 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:41 +0900 Subject: ia64/pv_ops: fix paravirtualization of ivt.S with CONFIG_SMP=n When CONFIG_SMP=n, three instruction in ivt.S were missed to paravirtualize. paravirtualize them. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/ivt.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index 416a952b19b..f675d8e3385 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -580,7 +580,7 @@ ENTRY(dirty_bit) mov b0=r29 // restore b0 ;; st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE + ITC_D(p0, r18, r16) // install updated PTE #endif mov pr=r31,-1 // restore pr RFI @@ -646,7 +646,7 @@ ENTRY(iaccess_bit) mov b0=r29 // restore b0 ;; st8 [r17]=r18 // store back updated PTE - itc.i r18 // install updated PTE + ITC_I(p0, r18, r16) // install updated PTE #endif /* !CONFIG_SMP */ mov pr=r31,-1 RFI @@ -698,7 +698,7 @@ ENTRY(daccess_bit) or r18=_PAGE_A,r18 // set the accessed bit ;; st8 [r17]=r18 // store back updated PTE - itc.d r18 // install updated PTE + ITC_D(p0, r18, r16) // install updated PTE #endif mov b0=r29 // restore b0 mov pr=r31,-1 -- cgit v1.2.3 From ce1fc742f9703eeda0787b449ac57a780585bc97 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:42 +0900 Subject: ia64/pv_ops: avoid name conflict of get_irq_chip(). The macro get_irq_chip() is defined in linux/include/linux/irq.h which cause name conflict with one in linux/arch/ia64/include/asm/paravirt.h. rename the latter to __get_irq_chip(). Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/paravirt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/paravirt.c b/arch/ia64/kernel/paravirt.c index afaf5b9a2cf..de35d8e8b7d 100644 --- a/arch/ia64/kernel/paravirt.c +++ b/arch/ia64/kernel/paravirt.c @@ -332,7 +332,7 @@ ia64_native_iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val) struct pv_iosapic_ops pv_iosapic_ops = { .pcat_compat_init = ia64_native_iosapic_pcat_compat_init, - .get_irq_chip = ia64_native_iosapic_get_irq_chip, + .__get_irq_chip = ia64_native_iosapic_get_irq_chip, .__read = ia64_native_iosapic_read, .__write = ia64_native_iosapic_write, -- cgit v1.2.3 From 1b4a18fcfadcc51987682fc266bb88ed59d12935 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:44 +0900 Subject: ia64: move function declaration, ia64_cpu_local_tick() from .c to .h eliminate the function declaration ia64_cpu_local_tick() in process.c by defining in arch/ia64/include/asm/timex.h The same function will be used in a different .c file later. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/process.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 3ab8373103e..8de0f460c88 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -251,7 +251,6 @@ default_idle (void) /* We don't actually take CPU down, just spin without interrupts. */ static inline void play_dead(void) { - extern void ia64_cpu_local_tick (void); unsigned int this_cpu = smp_processor_id(); /* Ack it */ -- cgit v1.2.3 From b31c09bd82731600a72c83894e7c6a53b36b6c83 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:49 +0900 Subject: ia64/xen: define several constants for ia64/xen. define several constants for ia64/xen. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/asm-offsets.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 94c44b1ccfd..eaa988baa87 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -16,6 +16,8 @@ #include #include +#include + #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" @@ -286,4 +288,29 @@ void foo(void) offsetof (struct itc_jitter_data_t, itc_jitter)); DEFINE(IA64_ITC_LASTCYCLE_OFFSET, offsetof (struct itc_jitter_data_t, itc_lastcycle)); + +#ifdef CONFIG_XEN + BLANK(); + +#define DEFINE_MAPPED_REG_OFS(sym, field) \ + DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field))) + + DEFINE_MAPPED_REG_OFS(XSI_PSR_I_ADDR_OFS, interrupt_mask_addr); + DEFINE_MAPPED_REG_OFS(XSI_IPSR_OFS, ipsr); + DEFINE_MAPPED_REG_OFS(XSI_IIP_OFS, iip); + DEFINE_MAPPED_REG_OFS(XSI_IFS_OFS, ifs); + DEFINE_MAPPED_REG_OFS(XSI_PRECOVER_IFS_OFS, precover_ifs); + DEFINE_MAPPED_REG_OFS(XSI_ISR_OFS, isr); + DEFINE_MAPPED_REG_OFS(XSI_IFA_OFS, ifa); + DEFINE_MAPPED_REG_OFS(XSI_IIPA_OFS, iipa); + DEFINE_MAPPED_REG_OFS(XSI_IIM_OFS, iim); + DEFINE_MAPPED_REG_OFS(XSI_IHA_OFS, iha); + DEFINE_MAPPED_REG_OFS(XSI_ITIR_OFS, itir); + DEFINE_MAPPED_REG_OFS(XSI_PSR_IC_OFS, interrupt_collection_enabled); + DEFINE_MAPPED_REG_OFS(XSI_BANKNUM_OFS, banknum); + DEFINE_MAPPED_REG_OFS(XSI_BANK0_R16_OFS, bank0_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_BANK1_R16_OFS, bank1_regs[0]); + DEFINE_MAPPED_REG_OFS(XSI_B0NATS_OFS, vbnat); + DEFINE_MAPPED_REG_OFS(XSI_B1NATS_OFS, vnat); +#endif /* CONFIG_XEN */ } -- cgit v1.2.3 From 080104cd0f708b6bb5a121922801867a29ad63fc Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:17:58 +0900 Subject: ia64/pv_ops/xen: elf note based xen startup. This patch enables elf note based xen startup for IA-64, which gives the kernel an early hint for running on xen like x86 case. In order to avoid the multi entry point, presumably extending booting protocol(i.e. extending struct ia64_boot_param) would be necessary. It probably means that elilo also needs modification. Signed-off-by: Qing He Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/asm-offsets.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index eaa988baa87..742dbb1d5a4 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -17,6 +17,7 @@ #include #include +#include #include "../kernel/sigframe.h" #include "../kernel/fsyscall_gtod_data.h" @@ -292,6 +293,9 @@ void foo(void) #ifdef CONFIG_XEN BLANK(); + DEFINE(XEN_NATIVE_ASM, XEN_NATIVE); + DEFINE(XEN_PV_DOMAIN_ASM, XEN_PV_DOMAIN); + #define DEFINE_MAPPED_REG_OFS(sym, field) \ DEFINE(sym, (XMAPPEDREGS_OFS + offsetof(struct mapped_regs, field))) -- cgit v1.2.3 From 78c2ae4a0ebd1ab46160e163bf4ca1b7e9463301 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:06 +0900 Subject: ia64/pv_ops/xen: define the nubmer of irqs which xen needs. define arch/ia64/include/asm/xen/irq.h to define the number of irqs which xen needs. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/nr-irqs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c index 8273afc32db..ee564575148 100644 --- a/arch/ia64/kernel/nr-irqs.c +++ b/arch/ia64/kernel/nr-irqs.c @@ -10,6 +10,7 @@ #include #include #include +#include void foo(void) { -- cgit v1.2.3 From a0df655ccd0669bd3efc85346dc816833dd1197f Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:09 +0900 Subject: ia64/xen: define xen machine vector for domU. define xen machine vector for domU. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/acpi.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5d1eb7ee2bf..00936491933 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -52,6 +52,7 @@ #include #include #include +#include #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ @@ -121,6 +122,8 @@ acpi_get_sysname(void) return "uv"; else return "sn2"; + } else if (xen_pv_domain() && !strcmp(hdr->oem_id, "XEN")) { + return "xen"; } return "dig"; @@ -137,6 +140,8 @@ acpi_get_sysname(void) return "uv"; # elif defined (CONFIG_IA64_DIG) return "dig"; +# elif defined (CONFIG_IA64_XEN_GUEST) + return "xen"; # else # error Unknown platform. Fix acpi.c. # endif -- cgit v1.2.3 From f8d1f99f3958c46cdc983743d75d0b31b9accb80 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Fri, 17 Oct 2008 11:18:13 +0900 Subject: ia64/pv_ops: paravirtualized instruction checker. This patch implements a checker to detect instructions which should be paravirtualized instead of direct writing raw instruction. This patch does rough check so that it doesn't fully cover all cases, but it can detects most cases of paravirtualization breakage of hand written assembly codes. Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 18 ++++++++++++++++++ arch/ia64/kernel/paravirt_inst.h | 4 +++- 2 files changed, 21 insertions(+), 1 deletion(-) (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 87fea11aecb..55e6ca8eebd 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -112,5 +112,23 @@ clean-files += $(objtree)/include/asm-ia64/nr-irqs.h ASM_PARAVIRT_OBJS = ivt.o entry.o define paravirtualized_native AFLAGS_$(1) += -D__IA64_ASM_PARAVIRTUALIZED_NATIVE +AFLAGS_pvchk-sed-$(1) += -D__IA64_ASM_PARAVIRTUALIZED_PVCHECK +extra-y += pvchk-$(1) endef $(foreach obj,$(ASM_PARAVIRT_OBJS),$(eval $(call paravirtualized_native,$(obj)))) + +# +# Checker for paravirtualizations of privileged operations. +# +quiet_cmd_pv_check_sed = PVCHK $@ +define cmd_pv_check_sed + sed -f $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed $< > $@ +endef + +$(obj)/pvchk-sed-%.s: $(src)/%.S $(srctree)/arch/$(SRCARCH)/scripts/pvcheck.sed FORCE + $(call if_changed_dep,as_s_S) +$(obj)/pvchk-%.s: $(obj)/pvchk-sed-%.s FORCE + $(call if_changed,pv_check_sed) +$(obj)/pvchk-%.o: $(obj)/pvchk-%.s FORCE + $(call if_changed,as_o_S) +.PRECIOUS: $(obj)/pvchk-sed-%.s $(obj)/pvchk-%.s $(obj)/pvchk-%.o diff --git a/arch/ia64/kernel/paravirt_inst.h b/arch/ia64/kernel/paravirt_inst.h index 5cad6fb2ed1..64d6d810c64 100644 --- a/arch/ia64/kernel/paravirt_inst.h +++ b/arch/ia64/kernel/paravirt_inst.h @@ -20,7 +20,9 @@ * */ -#ifdef __IA64_ASM_PARAVIRTUALIZED_XEN +#ifdef __IA64_ASM_PARAVIRTUALIZED_PVCHECK +#include +#elif defined(__IA64_ASM_PARAVIRTUALIZED_XEN) #include #include #else -- cgit v1.2.3 From 62fdd7678a26efadd6ac5c2869543caff77d2df0 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 17 Oct 2008 12:14:13 -0700 Subject: [IA64] Add Variable Page Size and IA64 Support in Intel IOMMU The patch contains Intel IOMMU IA64 specific code. It defines new machvec dig_vtd, hooks for IOMMU, DMAR table detection, cache line flush function, etc. For a generic kernel with CONFIG_DMAR=y, if Intel IOMMU is detected, dig_vtd is used for machinve vector. Otherwise, kernel falls back to dig machine vector. Kernel parameter "machvec=dig" or "intel_iommu=off" can be used to force kernel to boot dig machine vector. Signed-off-by: Fenghua Yu Signed-off-by: Tony Luck --- arch/ia64/kernel/Makefile | 4 ++ arch/ia64/kernel/acpi.c | 17 ++++++ arch/ia64/kernel/msi_ia64.c | 80 +++++++++++++++++++++++++ arch/ia64/kernel/pci-dma.c | 129 +++++++++++++++++++++++++++++++++++++++++ arch/ia64/kernel/pci-swiotlb.c | 46 +++++++++++++++ arch/ia64/kernel/setup.c | 42 +++++++++----- 6 files changed, 305 insertions(+), 13 deletions(-) create mode 100644 arch/ia64/kernel/pci-dma.c create mode 100644 arch/ia64/kernel/pci-swiotlb.c (limited to 'arch/ia64/kernel') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index 87fea11aecb..af0e750705e 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -42,6 +42,10 @@ obj-$(CONFIG_IA64_ESI) += esi.o ifneq ($(CONFIG_IA64_ESI),) obj-y += esi_stub.o # must be in kernel proper endif +obj-$(CONFIG_DMAR) += pci-dma.o +ifeq ($(CONFIG_DMAR), y) +obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +endif # The gate DSO image is built using a special linker script. targets += gate.so gate-syms.o diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 5d1eb7ee2bf..8cc2f8a610c 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -91,6 +91,9 @@ acpi_get_sysname(void) struct acpi_table_rsdp *rsdp; struct acpi_table_xsdt *xsdt; struct acpi_table_header *hdr; +#ifdef CONFIG_DMAR + u64 i, nentries; +#endif rsdp_phys = acpi_find_rsdp(); if (!rsdp_phys) { @@ -123,6 +126,18 @@ acpi_get_sysname(void) return "sn2"; } +#ifdef CONFIG_DMAR + /* Look for Intel IOMMU */ + nentries = (hdr->length - sizeof(*hdr)) / + sizeof(xsdt->table_offset_entry[0]); + for (i = 0; i < nentries; i++) { + hdr = __va(xsdt->table_offset_entry[i]); + if (strncmp(hdr->signature, ACPI_SIG_DMAR, + sizeof(ACPI_SIG_DMAR) - 1) == 0) + return "dig_vtd"; + } +#endif + return "dig"; #else # if defined (CONFIG_IA64_HP_SIM) @@ -137,6 +152,8 @@ acpi_get_sysname(void) return "uv"; # elif defined (CONFIG_IA64_DIG) return "dig"; +# elif defined(CONFIG_IA64_DIG_VTD) + return "dig_vtd"; # else # error Unknown platform. Fix acpi.c. # endif diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 60c6ef67ebb..702a09c1323 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include /* @@ -162,3 +163,82 @@ void arch_teardown_msi_irq(unsigned int irq) return ia64_teardown_msi_irq(irq); } + +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + int cpu = first_cpu(mask); + + + if (!cpu_online(cpu)) + return; + + if (irq_prepare_move(irq, cpu)) + return; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DESTID_MASK; + msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu)); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ia64_msi_retrigger_irq, +}; + +static int +msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned dest; + cpumask_t mask; + + cpus_and(mask, irq_to_domain(irq), cpu_online_map); + dest = cpu_physical_id(first_cpu(mask)); + + msg->address_hi = 0; + msg->address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DESTMODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DESTID_CPU(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(cfg->vector); + return 0; +} + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif /* CONFIG_DMAR */ + diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c new file mode 100644 index 00000000000..10a75b55765 --- /dev/null +++ b/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,129 @@ +/* + * Dynamic DMA mapping support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef CONFIG_DMAR + +#include +#include + +#include +#include + +dma_addr_t bad_dma_address __read_mostly; +EXPORT_SYMBOL(bad_dma_address); + +static int iommu_sac_force __read_mostly; + +int no_iommu __read_mostly; +#ifdef CONFIG_IOMMU_DEBUG +int force_iommu __read_mostly = 1; +#else +int force_iommu __read_mostly; +#endif + +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly; + +/* Dummy device used for NULL arguments (normally ISA). Better would + be probably a smaller DMA mask, but this is bug-to-bug compatible + to i386. */ +struct device fallback_dev = { + .bus_id = "fallback device", + .coherent_dma_mask = DMA_32BIT_MASK, + .dma_mask = &fallback_dev.coherent_dma_mask, +}; + +void __init pci_iommu_alloc(void) +{ + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} + +static int __init pci_iommu_init(void) +{ + if (iommu_detected) + intel_iommu_init(); + + return 0; +} + +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); + +void pci_iommu_shutdown(void) +{ + return; +} + +void __init +iommu_dma_init(void) +{ + return; +} + +struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); + +int iommu_dma_supported(struct device *dev, u64 mask) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + dev_info(dev, "Disallowing DAC for device\n"); + return 0; + } +#endif + + if (ops->dma_supported_op) + return ops->dma_supported_op(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + dev_info(dev, "Force SAC with mask %lx\n", mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(iommu_dma_supported); + +#endif diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c new file mode 100644 index 00000000000..16c50516dbc --- /dev/null +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -0,0 +1,46 @@ +/* Glue code to lib/swiotlb.c */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +int swiotlb __read_mostly; +EXPORT_SYMBOL(swiotlb); + +struct dma_mapping_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc_coherent = swiotlb_alloc_coherent, + .free_coherent = swiotlb_free_coherent, + .map_single = swiotlb_map_single, + .unmap_single = swiotlb_unmap_single, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, + .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg, + .unmap_sg = swiotlb_unmap_sg, + .dma_supported_op = swiotlb_dma_supported, +}; + +void __init pci_swiotlb_init(void) +{ + if (!iommu_detected) { +#ifdef CONFIG_IA64_GENERIC + swiotlb = 1; + printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n"); + machvec_init("dig"); + swiotlb_init(); + dma_ops = &swiotlb_dma_ops; +#else + panic("Unable to find Intel IOMMU"); +#endif + } +} diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index de636b21567..2a67a74a48f 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -116,6 +116,13 @@ unsigned int num_io_spaces; */ #define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ unsigned long ia64_i_cache_stride_shift = ~0; +/* + * "clflush_cache_range()" needs to know what processor dependent stride size to + * use when it flushes cache lines including both d-cache and i-cache. + */ +/* Safest way to go: 32 bytes by 32 bytes */ +#define CACHE_STRIDE_SHIFT 5 +unsigned long ia64_cache_stride_shift = ~0; /* * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This @@ -847,13 +854,14 @@ setup_per_cpu_areas (void) } /* - * Calculate the max. cache line size. + * Do the following calculations: * - * In addition, the minimum of the i-cache stride sizes is calculated for - * "flush_icache_range()". + * 1. the max. cache line size. + * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". + * 3. the minimum of the cache stride sizes for "clflush_cache_range()". */ static void __cpuinit -get_max_cacheline_size (void) +get_cache_info(void) { unsigned long line_size, max = 1; u64 l, levels, unique_caches; @@ -867,12 +875,14 @@ get_max_cacheline_size (void) max = SMP_CACHE_BYTES; /* Safest setup for "flush_icache_range()" */ ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; + /* Safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; goto out; } for (l = 0; l < levels; ++l) { - status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, - &cci); + /* cache_type (data_or_unified)=2 */ + status = ia64_pal_cache_config_info(l, 2, &cci); if (status != 0) { printk(KERN_ERR "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n", @@ -880,15 +890,21 @@ get_max_cacheline_size (void) max = SMP_CACHE_BYTES; /* The safest setup for "flush_icache_range()" */ cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + /* The safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; cci.pcci_unified = 1; + } else { + if (cci.pcci_stride < ia64_cache_stride_shift) + ia64_cache_stride_shift = cci.pcci_stride; + + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; } - line_size = 1 << cci.pcci_line_size; - if (line_size > max) - max = line_size; + if (!cci.pcci_unified) { - status = ia64_pal_cache_config_info(l, - /* cache_type (instruction)= */ 1, - &cci); + /* cache_type (instruction)=1*/ + status = ia64_pal_cache_config_info(l, 1, &cci); if (status != 0) { printk(KERN_ERR "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n", @@ -942,7 +958,7 @@ cpu_init (void) } #endif - get_max_cacheline_size(); + get_cache_info(); /* * We can't pass "local_cpu_data" to identify_cpu() because we haven't called -- cgit v1.2.3