diff options
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r-- | arch/i386/kernel/asm-offsets.c | 9 | ||||
-rw-r--r-- | arch/i386/kernel/entry.S | 85 | ||||
-rw-r--r-- | arch/i386/kernel/head.S | 5 | ||||
-rw-r--r-- | arch/i386/kernel/paravirt.c | 37 | ||||
-rw-r--r-- | arch/i386/kernel/setup.c | 2 | ||||
-rw-r--r-- | arch/i386/kernel/smp.c | 5 | ||||
-rw-r--r-- | arch/i386/kernel/smpboot.c | 8 | ||||
-rw-r--r-- | arch/i386/kernel/syscall_table.S | 1 | ||||
-rw-r--r-- | arch/i386/kernel/tsc.c | 23 | ||||
-rw-r--r-- | arch/i386/kernel/vmi.c | 4 | ||||
-rw-r--r-- | arch/i386/kernel/vmiclock.c | 6 | ||||
-rw-r--r-- | arch/i386/kernel/vmlinux.lds.S | 1 | ||||
-rw-r--r-- | arch/i386/kernel/vsyscall-note.S | 49 |
13 files changed, 197 insertions, 38 deletions
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 27a776c9044..25f7eb51392 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c @@ -17,6 +17,8 @@ #include <asm/thread_info.h> #include <asm/elf.h> +#include <xen/interface/xen.h> + #define DEFINE(sym, val) \ asm volatile("\n->" #sym " %0 " #val : : "i" (val)) @@ -59,6 +61,7 @@ void foo(void) OFFSET(TI_addr_limit, thread_info, addr_limit); OFFSET(TI_restart_block, thread_info, restart_block); OFFSET(TI_sysenter_return, thread_info, sysenter_return); + OFFSET(TI_cpu, thread_info, cpu); BLANK(); OFFSET(GDS_size, Xgt_desc_struct, size); @@ -115,4 +118,10 @@ void foo(void) OFFSET(PARAVIRT_iret, paravirt_ops, iret); OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0); #endif + +#ifdef CONFIG_XEN + BLANK(); + OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); + OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); +#endif } diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 3c3c220488c..32980b83493 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -1023,6 +1023,91 @@ ENTRY(kernel_thread_helper) CFI_ENDPROC ENDPROC(kernel_thread_helper) +#ifdef CONFIG_XEN +ENTRY(xen_hypervisor_callback) + CFI_STARTPROC + pushl $0 + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + + /* Check to see if we got the event in the critical + region in xen_iret_direct, after we've reenabled + events and checked for pending events. This simulates + iret instruction's behaviour where it delivers a + pending interrupt when enabling interrupts. */ + movl PT_EIP(%esp),%eax + cmpl $xen_iret_start_crit,%eax + jb 1f + cmpl $xen_iret_end_crit,%eax + jae 1f + + call xen_iret_crit_fixup + +1: mov %esp, %eax + call xen_evtchn_do_upcall + jmp ret_from_intr + CFI_ENDPROC +ENDPROC(xen_hypervisor_callback) + +# Hypervisor uses this for application faults while it executes. +# We get here for two reasons: +# 1. Fault while reloading DS, ES, FS or GS +# 2. Fault while executing IRET +# Category 1 we fix up by reattempting the load, and zeroing the segment +# register if the load fails. +# Category 2 we fix up by jumping to do_iret_error. We cannot use the +# normal Linux return path in this case because if we use the IRET hypercall +# to pop the stack frame we end up in an infinite loop of failsafe callbacks. +# We distinguish between categories by maintaining a status value in EAX. +ENTRY(xen_failsafe_callback) + CFI_STARTPROC + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl $1,%eax +1: mov 4(%esp),%ds +2: mov 8(%esp),%es +3: mov 12(%esp),%fs +4: mov 16(%esp),%gs + testl %eax,%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + lea 16(%esp),%esp + CFI_ADJUST_CFA_OFFSET -16 + jz 5f + addl $16,%esp + jmp iret_exc # EAX != 0 => Category 2 (Bad IRET) +5: pushl $0 # EAX == 0 => Category 1 (Bad segment) + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + jmp ret_from_exception + CFI_ENDPROC + +.section .fixup,"ax" +6: xorl %eax,%eax + movl %eax,4(%esp) + jmp 1b +7: xorl %eax,%eax + movl %eax,8(%esp) + jmp 2b +8: xorl %eax,%eax + movl %eax,12(%esp) + jmp 3b +9: xorl %eax,%eax + movl %eax,16(%esp) + jmp 4b +.previous +.section __ex_table,"a" + .align 4 + .long 1b,6b + .long 2b,7b + .long 3b,8b + .long 4b,9b +.previous +ENDPROC(xen_failsafe_callback) + +#endif /* CONFIG_XEN */ + .section .rodata,"a" #include "syscall_table.S" diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 82714668d43..7c52b222207 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -510,7 +510,8 @@ ENTRY(_stext) /* * BSS section */ -.section ".bss.page_aligned","w" +.section ".bss.page_aligned","wa" + .align PAGE_SIZE_asm ENTRY(swapper_pg_dir) .fill 1024,4,0 ENTRY(swapper_pg_pmd) @@ -538,6 +539,8 @@ fault_msg: .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" .asciz "Stack: %p %p %p %p %p %p %p %p\n" +#include "../xen/xen-head.S" + /* * The IDT and GDT 'descriptors' are a strange 48-bit object * only used by the lidt and lgdt instructions. They are not diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index faab09abca5..53f07a8275e 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c @@ -228,6 +228,41 @@ static int __init print_banner(void) } core_initcall(print_banner); +static struct resource reserve_ioports = { + .start = 0, + .end = IO_SPACE_LIMIT, + .name = "paravirt-ioport", + .flags = IORESOURCE_IO | IORESOURCE_BUSY, +}; + +static struct resource reserve_iomem = { + .start = 0, + .end = -1, + .name = "paravirt-iomem", + .flags = IORESOURCE_MEM | IORESOURCE_BUSY, +}; + +/* + * Reserve the whole legacy IO space to prevent any legacy drivers + * from wasting time probing for their hardware. This is a fairly + * brute-force approach to disabling all non-virtual drivers. + * + * Note that this must be called very early to have any effect. + */ +int paravirt_disable_iospace(void) +{ + int ret; + + ret = request_resource(&ioport_resource, &reserve_ioports); + if (ret == 0) { + ret = request_resource(&iomem_resource, &reserve_iomem); + if (ret) + release_resource(&reserve_ioports); + } + + return ret; +} + struct paravirt_ops paravirt_ops = { .name = "bare hardware", .paravirt_enabled = 0, @@ -267,7 +302,7 @@ struct paravirt_ops paravirt_ops = { .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, - .get_scheduled_cycles = native_read_tsc, + .sched_clock = native_sched_clock, .get_cpu_khz = native_calculate_cpu_khz, .load_tr_desc = native_load_tr_desc, .set_ldt = native_set_ldt, diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2d61e65eeb5..74871d066c2 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -601,6 +601,8 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ + paravirt_post_allocator_init(); + dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 6299c080f6e..2d35d850202 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -22,6 +22,7 @@ #include <asm/mtrr.h> #include <asm/tlbflush.h> +#include <asm/mmu_context.h> #include <mach_apic.h> /* @@ -249,13 +250,13 @@ static unsigned long flush_va; static DEFINE_SPINLOCK(tlbstate_lock); /* - * We cannot call mmdrop() because we are in interrupt context, + * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. * * We need to reload %cr3 since the page tables may be going * away from under us.. */ -static inline void leave_mm (unsigned long cpu) +void leave_mm(unsigned long cpu) { if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) BUG(); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 0b2954534b8..5910d3fac56 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -148,7 +148,7 @@ void __init smp_alloc_memory(void) * a given CPU */ -static void __cpuinit smp_store_cpu_info(int id) +void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = cpu_data + id; @@ -308,8 +308,7 @@ cpumask_t cpu_coregroup_map(int cpu) /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; -static inline void -set_cpu_sibling_map(int cpu) +void set_cpu_sibling_map(int cpu) { int i; struct cpuinfo_x86 *c = cpu_data; @@ -1144,8 +1143,7 @@ void __init native_smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU -static void -remove_siblinginfo(int cpu) +void remove_siblinginfo(int cpu) { int sibling; struct cpuinfo_x86 *c = cpu_data; diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index bf6adce5226..8344c70adf6 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -323,3 +323,4 @@ ENTRY(sys_call_table) .long sys_signalfd .long sys_timerfd .long sys_eventfd + .long sys_fallocate diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index ea63a30ca3e..252f9010f28 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -84,7 +84,7 @@ static inline int check_tsc_unstable(void) * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale __read_mostly; +unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ @@ -93,15 +93,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz) cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } -static inline unsigned long long cycles_2_ns(unsigned long long cyc) -{ - return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; -} - /* * Scheduler clock - returns current time in nanosec units. */ -unsigned long long sched_clock(void) +unsigned long long native_sched_clock(void) { unsigned long long this_offset; @@ -118,12 +113,24 @@ unsigned long long sched_clock(void) return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); /* read the Time Stamp Counter: */ - get_scheduled_cycles(this_offset); + rdtscll(this_offset); /* return the value in ns */ return cycles_2_ns(this_offset); } +/* We need to define a real function for sched_clock, to override the + weak default version */ +#ifdef CONFIG_PARAVIRT +unsigned long long sched_clock(void) +{ + return paravirt_sched_clock(); +} +#else +unsigned long long sched_clock(void) + __attribute__((alias("native_sched_clock"))); +#endif + unsigned long native_calculate_cpu_khz(void) { unsigned long long start, end; diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index c12720d7cbc..72042bb7ec9 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c @@ -362,7 +362,7 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pt(u32 pfn) +static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); @@ -891,7 +891,7 @@ static inline int __init activate_vmi(void) paravirt_ops.setup_boot_clock = vmi_time_bsp_init; paravirt_ops.setup_secondary_clock = vmi_time_ap_init; #endif - paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; + paravirt_ops.sched_clock = vmi_sched_clock; paravirt_ops.get_cpu_khz = vmi_cpu_khz; /* We have true wallclock functions; disable CMOS clock sync */ diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c index 26a37f8a876..f9b845f4e69 100644 --- a/arch/i386/kernel/vmiclock.c +++ b/arch/i386/kernel/vmiclock.c @@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now) return 0; } -/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ -unsigned long long vmi_get_sched_cycles(void) +/* paravirt_ops.sched_clock = vmi_sched_clock */ +unsigned long long vmi_sched_clock(void) { - return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); + return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); } /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index aa87b06c7c8..00f1bc47d3a 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -88,6 +88,7 @@ SECTIONS . = ALIGN(4096); .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { + *(.data.page_aligned) *(.data.idt) } diff --git a/arch/i386/kernel/vsyscall-note.S b/arch/i386/kernel/vsyscall-note.S index d4b5be4f3d5..271f16a8ca0 100644 --- a/arch/i386/kernel/vsyscall-note.S +++ b/arch/i386/kernel/vsyscall-note.S @@ -3,23 +3,40 @@ * Here we can supply some information useful to userland. */ -#include <linux/uts.h> #include <linux/version.h> +#include <linux/elfnote.h> -#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ - .section name, flags; \ - .balign 4; \ - .long 1f - 0f; /* name length */ \ - .long 3f - 2f; /* data length */ \ - .long type; /* note type */ \ -0: .asciz vendor; /* vendor name */ \ -1: .balign 4; \ -2: +/* Ideally this would use UTS_NAME, but using a quoted string here + doesn't work. Remember to change this when changing the + kernel's name. */ +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END -#define ASM_ELF_NOTE_END \ -3: .balign 4; /* pad out section */ \ - .previous +#ifdef CONFIG_XEN - ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) - .long LINUX_VERSION_CODE - ASM_ELF_NOTE_END +/* + * Add a special note telling glibc's dynamic linker a fake hardware + * flavor that it will use to choose the search path for libraries in the + * same way it uses real hardware capabilities like "mmx". + * We supply "nosegneg" as the fake capability, to indicate that we + * do not like negative offsets in instructions using segment overrides, + * since we implement those inefficiently. This makes it possible to + * install libraries optimized to avoid those access patterns in someplace + * like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file + * corresponding to the bits here is needed to make ldconfig work right. + * It should contain: + * hwcap 1 nosegneg + * to match the mapping of bit to name that we give here. + */ + +/* Bit used for the pseudo-hwcap for non-negative segments. We use + bit 1 to avoid bugs in some versions of glibc when bit 0 is + used; the choice is otherwise arbitrary. */ +#define VDSO_NOTE_NONEGSEG_BIT 1 + +ELFNOTE_START(GNU, 2, "a") + .long 1, 1<<VDSO_NOTE_NONEGSEG_BIT /* ncaps, mask */ + .byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */ +ELFNOTE_END +#endif |