diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-03 19:28:46 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-03 19:28:46 +0200 |
commit | f68ec0c24755e5cdb779be6240925f2175311d84 (patch) | |
tree | a7b7128e61a8456385d82bd1c7ca5f14eecbf2ca /include/asm-x86 | |
parent | 98920dc3d1113b883cbc73e3293446d3525c6042 (diff) | |
parent | 94aca1dac6f6d21f4b07e4864baf7768cabcc6e7 (diff) |
Merge commit 'v2.6.27-rc8' into x86/setup
Diffstat (limited to 'include/asm-x86')
86 files changed, 1488 insertions, 613 deletions
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild index 1e3554596f7..4a8e80cdcfa 100644 --- a/include/asm-x86/Kbuild +++ b/include/asm-x86/Kbuild @@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm header-y += boot.h header-y += bootparam.h header-y += debugreg.h -header-y += kvm.h header-y += ldt.h header-y += msr-index.h header-y += prctl.h @@ -19,7 +18,6 @@ unifdef-y += msr.h unifdef-y += mtrr.h unifdef-y += posix_types_32.h unifdef-y += posix_types_64.h -unifdef-y += ptrace.h unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 635d764dc13..35d1743b57a 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -140,6 +140,8 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; + else if (boot_cpu_has(X86_FEATURE_AMDC1E)) + return 1; else return max_cstate; } diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index 7bfcb47cc45..dcc81206739 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -27,14 +27,10 @@ /* * some size calculation constants */ -#define DEV_TABLE_ENTRY_SIZE 256 +#define DEV_TABLE_ENTRY_SIZE 32 #define ALIAS_TABLE_ENTRY_SIZE 2 #define RLOOKUP_TABLE_ENTRY_SIZE (sizeof(void *)) -/* helper macros */ -#define LOW_U32(x) ((x) & ((1ULL << 32)-1)) -#define HIGH_U32(x) (LOW_U32((x) >> 32)) - /* Length of the MMIO region for the AMD IOMMU */ #define MMIO_REGION_LENGTH 0x4000 @@ -70,6 +66,9 @@ #define MMIO_EVT_TAIL_OFFSET 0x2018 #define MMIO_STATUS_OFFSET 0x2020 +/* MMIO status bits */ +#define MMIO_STATUS_COM_WAIT_INT_MASK 0x04 + /* feature control bits */ #define CONTROL_IOMMU_EN 0x00ULL #define CONTROL_HT_TUN_EN 0x01ULL @@ -90,6 +89,7 @@ #define CMD_INV_IOMMU_PAGES 0x03 #define CMD_COMPL_WAIT_STORE_MASK 0x01 +#define CMD_COMPL_WAIT_INT_MASK 0x02 #define CMD_INV_IOMMU_PAGES_SIZE_MASK 0x01 #define CMD_INV_IOMMU_PAGES_PDE_MASK 0x02 @@ -100,6 +100,7 @@ #define DEV_ENTRY_TRANSLATION 0x01 #define DEV_ENTRY_IR 0x3d #define DEV_ENTRY_IW 0x3e +#define DEV_ENTRY_NO_PAGE_FAULT 0x62 #define DEV_ENTRY_EX 0x67 #define DEV_ENTRY_SYSMGT1 0x68 #define DEV_ENTRY_SYSMGT2 0x69 @@ -158,78 +159,170 @@ #define MAX_DOMAIN_ID 65536 +/* + * This structure contains generic data for IOMMU protection domains + * independent of their use. + */ struct protection_domain { - spinlock_t lock; - u16 id; - int mode; - u64 *pt_root; - void *priv; + spinlock_t lock; /* mostly used to lock the page table*/ + u16 id; /* the domain id written to the device table */ + int mode; /* paging mode (0-6 levels) */ + u64 *pt_root; /* page table root pointer */ + void *priv; /* private data */ }; +/* + * Data container for a dma_ops specific protection domain + */ struct dma_ops_domain { struct list_head list; + + /* generic protection domain information */ struct protection_domain domain; + + /* size of the aperture for the mappings */ unsigned long aperture_size; + + /* address we start to search for free addresses */ unsigned long next_bit; + + /* address allocation bitmap */ unsigned long *bitmap; + + /* + * Array of PTE pages for the aperture. In this array we save all the + * leaf pages of the domain page table used for the aperture. This way + * we don't need to walk the page table to find a specific PTE. We can + * just calculate its address in constant time. + */ u64 **pte_pages; }; +/* + * Structure where we save information about one hardware AMD IOMMU in the + * system. + */ struct amd_iommu { struct list_head list; + + /* locks the accesses to the hardware */ spinlock_t lock; + /* device id of this IOMMU */ u16 devid; + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ u16 cap_ptr; + /* physical address of MMIO space */ u64 mmio_phys; + /* virtual address of MMIO space */ u8 *mmio_base; + + /* capabilities of that IOMMU read from ACPI */ u32 cap; + + /* first device this IOMMU handles. read from PCI */ u16 first_device; + /* last device this IOMMU handles. read from PCI */ u16 last_device; + + /* start of exclusion range of that IOMMU */ u64 exclusion_start; + /* length of exclusion range of that IOMMU */ u64 exclusion_length; + /* command buffer virtual address */ u8 *cmd_buf; + /* size of command buffer */ u32 cmd_buf_size; + /* if one, we need to send a completion wait command */ int need_sync; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; +/* + * List with all IOMMUs in the system. This list is not locked because it is + * only written and read at driver initialization or suspend time + */ extern struct list_head amd_iommu_list; +/* + * Structure defining one entry in the device table + */ struct dev_table_entry { u32 data[8]; }; +/* + * One entry for unity mappings parsed out of the ACPI table. + */ struct unity_map_entry { struct list_head list; + + /* starting device id this entry is used for (including) */ u16 devid_start; + /* end device id this entry is used for (including) */ u16 devid_end; + + /* start address to unity map (including) */ u64 address_start; + /* end address to unity map (including) */ u64 address_end; + + /* required protection */ int prot; }; +/* + * List of all unity mappings. It is not locked because as runtime it is only + * read. It is created at ACPI table parsing time. + */ extern struct list_head amd_iommu_unity_map; -/* data structures for device handling */ +/* + * Data structures for device handling + */ + +/* + * Device table used by hardware. Read and write accesses by software are + * locked with the amd_iommu_pd_table lock. + */ extern struct dev_table_entry *amd_iommu_dev_table; + +/* + * Alias table to find requestor ids to device ids. Not locked because only + * read on runtime. + */ extern u16 *amd_iommu_alias_table; + +/* + * Reverse lookup table to find the IOMMU which translates a specific device. + */ extern struct amd_iommu **amd_iommu_rlookup_table; +/* size of the dma_ops aperture as power of 2 */ extern unsigned amd_iommu_aperture_order; +/* largest PCI device id we expect translation requests for */ extern u16 amd_iommu_last_bdf; /* data structures for protection domain handling */ extern struct protection_domain **amd_iommu_pd_table; + +/* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; +/* will be 1 if device isolation is enabled */ extern int amd_iommu_isolate; +/* takes a PCI device id and prints it out in a readable form */ static inline void print_devid(u16 devid, int nl) { int bus = devid >> 8; @@ -241,4 +334,11 @@ static inline void print_devid(u16 devid, int nl) printk("\n"); } +/* takes bus and device/function and returns the device id + * FIXME: should that be in generic PCI code? */ +static inline u16 calc_devid(u8 bus, u8 devfn) +{ + return (((u16)bus) << 8) | devfn; +} + #endif diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 4e2c1e517f0..133c998161c 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -3,6 +3,8 @@ #include <linux/pm.h> #include <linux/delay.h> + +#include <asm/alternative.h> #include <asm/fixmap.h> #include <asm/apicdef.h> #include <asm/processor.h> @@ -10,8 +12,6 @@ #define ARCH_APICTIMER_STOPS_ON_C3 1 -#define Dprintk(x...) - /* * Debugging macros */ @@ -35,7 +35,7 @@ extern void generic_apic_probe(void); #ifdef CONFIG_X86_LOCAL_APIC -extern int apic_verbosity; +extern unsigned int apic_verbosity; extern int local_apic_timer_c2_ok; extern int ioapic_force; @@ -48,7 +48,6 @@ extern int disable_apic; #include <asm/paravirt.h> #else #define apic_write native_apic_write -#define apic_write_atomic native_apic_write_atomic #define apic_read native_apic_read #define setup_boot_clock setup_boot_APIC_clock #define setup_secondary_clock setup_secondary_APIC_clock @@ -58,12 +57,11 @@ extern int is_vsmp_box(void); static inline void native_apic_write(unsigned long reg, u32 v) { - *((volatile u32 *)(APIC_BASE + reg)) = v; -} + volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); -static inline void native_apic_write_atomic(unsigned long reg, u32 v) -{ - (void)xchg((u32 *)(APIC_BASE + reg), v); + alternative_io("movl %0, %1", "xchgl %0, %1", X86_FEATURE_11AP, + ASM_OUTPUT2("=r" (v), "=m" (*addr)), + ASM_OUTPUT2("0" (v), "m" (*addr))); } static inline u32 native_apic_read(unsigned long reg) @@ -75,16 +73,6 @@ extern void apic_wait_icr_idle(void); extern u32 safe_apic_wait_icr_idle(void); extern int get_physical_broadcast(void); -#ifdef CONFIG_X86_GOOD_APIC -# define FORCE_READ_AROUND_WRITE 0 -# define apic_read_around(x) -# define apic_write_around(x, y) apic_write((x), (y)) -#else -# define FORCE_READ_AROUND_WRITE 1 -# define apic_read_around(x) apic_read(x) -# define apic_write_around(x, y) apic_write_atomic((x), (y)) -#endif - static inline void ack_APIC_irq(void) { /* @@ -95,7 +83,7 @@ static inline void ack_APIC_irq(void) */ /* Docs say use 0 for future compatibility */ - apic_write_around(APIC_EOI, 0); + apic_write(APIC_EOI, 0); } extern int lapic_get_maxlvt(void); diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index 768aee8a04e..8411750ceb6 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -21,6 +21,7 @@ extern void intr_init_hook(void); extern void pre_intr_init_hook(void); extern void pre_setup_arch_hook(void); extern void trap_init_hook(void); +extern void pre_time_init_hook(void); extern void time_init_hook(void); extern void mca_nmi_hook(void); diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h index a0095191c02..91c7d03e65b 100644 --- a/include/asm-x86/atomic_64.h +++ b/include/asm-x86/atomic_64.h @@ -228,7 +228,7 @@ static inline void atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) - : "ir" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter)); } /** @@ -242,7 +242,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) - : "ir" (i), "m" (v->counter)); + : "er" (i), "m" (v->counter)); } /** @@ -260,7 +260,7 @@ static inline int atomic64_sub_and_test(long i, atomic64_t *v) asm volatile(LOCK_PREFIX "subq %2,%0; sete %1" : "=m" (v->counter), "=qm" (c) - : "ir" (i), "m" (v->counter) : "memory"); + : "er" (i), "m" (v->counter) : "memory"); return c; } @@ -341,7 +341,7 @@ static inline int atomic64_add_negative(long i, atomic64_t *v) asm volatile(LOCK_PREFIX "addq %2,%0; sets %1" : "=m" (v->counter), "=qm" (c) - : "ir" (i), "m" (v->counter) : "memory"); + : "er" (i), "m" (v->counter) : "memory"); return c; } diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 96b1829cea1..cfb2b64f76e 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -356,7 +356,7 @@ static inline unsigned long ffz(unsigned long word) * __fls: find last set bit in word * @word: The word to search * - * Undefined if no zero exists, so code should check against ~0UL first. + * Undefined if no set bit exists, so code should check against 0 first. */ static inline unsigned long __fls(unsigned long word) { diff --git a/include/asm-x86/calling.h b/include/asm-x86/calling.h index f13e62e2cb3..2bc162e0ec6 100644 --- a/include/asm-x86/calling.h +++ b/include/asm-x86/calling.h @@ -104,7 +104,7 @@ .endif .endm - .macro LOAD_ARGS offset + .macro LOAD_ARGS offset, skiprax=0 movq \offset(%rsp), %r11 movq \offset+8(%rsp), %r10 movq \offset+16(%rsp), %r9 @@ -113,7 +113,10 @@ movq \offset+48(%rsp), %rdx movq \offset+56(%rsp), %rsi movq \offset+64(%rsp), %rdi + .if \skiprax + .else movq \offset+72(%rsp), %rax + .endif .endm #define REST_SKIP 6*8 @@ -165,4 +168,3 @@ .macro icebp .byte 0xf1 .endm - diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index 75ef959db32..cfcfb0a806b 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -72,13 +72,16 @@ #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ -#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ +#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ +#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ +#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ #define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ #define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ +#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ +#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ +#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -90,6 +93,7 @@ #define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ @@ -188,6 +192,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) #define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) +#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index 87a715367a1..3c034f48fdb 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h @@ -5,6 +5,9 @@ struct dev_archdata { #ifdef CONFIG_ACPI void *acpi_handle; #endif +#ifdef CONFIG_X86_64 +struct dma_mapping_ops *dma_ops; +#endif #ifdef CONFIG_DMAR void *iommu; /* hook for IOMMU specific extension */ #endif diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index a1a4dc7fe6e..ad9cd6d49bf 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -14,11 +14,11 @@ extern dma_addr_t bad_dma_address; extern int iommu_merge; extern struct device fallback_dev; extern int panic_on_overflow; -extern int forbid_dac; extern int force_iommu; struct dma_mapping_ops { - int (*mapping_error)(dma_addr_t dma_addr); + int (*mapping_error)(struct device *dev, + dma_addr_t dma_addr); void* (*alloc_coherent)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp); void (*free_coherent)(struct device *dev, size_t size, @@ -57,14 +57,32 @@ struct dma_mapping_ops { int is_phys; }; -extern const struct dma_mapping_ops *dma_ops; +extern struct dma_mapping_ops *dma_ops; -static inline int dma_mapping_error(dma_addr_t dma_addr) +static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) { - if (dma_ops->mapping_error) - return dma_ops->mapping_error(dma_addr); +#ifdef CONFIG_X86_32 + return dma_ops; +#else + if (unlikely(!dev) || !dev->archdata.dma_ops) + return dma_ops; + else + return dev->archdata.dma_ops; +#endif +} + +/* Make sure we keep the same behaviour */ +static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) +{ +#ifdef CONFIG_X86_32 + return 0; +#else + struct dma_mapping_ops *ops = get_dma_ops(dev); + if (ops->mapping_error) + return ops->mapping_error(dev, dma_addr); return (dma_addr == bad_dma_address); +#endif } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) @@ -84,44 +102,53 @@ static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction); + return ops->map_single(hwdev, virt_to_phys(ptr), size, direction); } static inline void dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->unmap_single) - dma_ops->unmap_single(dev, addr, size, direction); + if (ops->unmap_single) + ops->unmap_single(dev, addr, size, direction); } static inline int dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_sg(hwdev, sg, nents, direction); + return ops->map_sg(hwdev, sg, nents, direction); } static inline void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->unmap_sg) - dma_ops->unmap_sg(hwdev, sg, nents, direction); + if (ops->unmap_sg) + ops->unmap_sg(hwdev, sg, nents, direction); } static inline void dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_cpu) - dma_ops->sync_single_for_cpu(hwdev, dma_handle, size, - direction); + if (ops->sync_single_for_cpu) + ops->sync_single_for_cpu(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -129,10 +156,11 @@ static inline void dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_device) - dma_ops->sync_single_for_device(hwdev, dma_handle, size, - direction); + if (ops->sync_single_for_device) + ops->sync_single_for_device(hwdev, dma_handle, size, direction); flush_write_buffers(); } @@ -140,11 +168,12 @@ static inline void dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_cpu) - dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, - size, direction); + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_cpu) + ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, + size, direction); flush_write_buffers(); } @@ -153,11 +182,12 @@ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, unsigned long offset, size_t size, int direction) { - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_device) - dma_ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, direction); + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); + if (ops->sync_single_range_for_device) + ops->sync_single_range_for_device(hwdev, dma_handle, + offset, size, direction); flush_write_buffers(); } @@ -165,9 +195,11 @@ static inline void dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_cpu) - dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + if (ops->sync_sg_for_cpu) + ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -175,9 +207,11 @@ static inline void dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int nelems, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(hwdev); + BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_device) - dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction); + if (ops->sync_sg_for_device) + ops->sync_sg_for_device(hwdev, sg, nelems, direction); flush_write_buffers(); } @@ -186,9 +220,11 @@ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction) { + struct dma_mapping_ops *ops = get_dma_ops(dev); + BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(dev, page_to_phys(page)+offset, - size, direction); + return ops->map_single(dev, page_to_phys(page) + offset, + size, direction); } static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, @@ -213,25 +249,5 @@ static inline int dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) -#ifdef CONFIG_X86_32 -# define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -extern int -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags); - -extern void -dma_release_declared_memory(struct device *dev); - -extern void * -dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size); -#endif /* CONFIG_X86_32 */ +#include <asm-generic/dma-coherent.h> #endif diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 06633b01dd5..16a31e2c7c5 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -90,6 +90,14 @@ static inline void e820_mark_nosave_regions(unsigned long limit_pfn) } #endif +#ifdef CONFIG_MEMTEST +extern void early_memtest(unsigned long start, unsigned long end); +#else +static inline void early_memtest(unsigned long start, unsigned long end) +{ +} +#endif + extern unsigned long end_user_pfn; extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h index 7ed2bd7a7f5..d4f2b0abe92 100644 --- a/include/asm-x86/efi.h +++ b/include/asm-x86/efi.h @@ -86,7 +86,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) -extern void *efi_ioremap(unsigned long addr, unsigned long size); +extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); #endif /* CONFIG_X86_32 */ diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index aae2f0501a4..f1ac2b2167d 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -90,13 +90,13 @@ enum fixed_addresses { * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. * - * We round it up to the next 512 pages boundary so that we + * We round it up to the next 256 pages boundary so that we * can have a single pgd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 #define FIX_BTMAPS_NESTING 4 - FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 - - (__end_of_permanent_fixed_addresses & 511), + FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - + (__end_of_permanent_fixed_addresses & 255), FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1, FIX_WP_TEST, #ifdef CONFIG_ACPI diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h index c184441133f..5c68b32ee1c 100644 --- a/include/asm-x86/ftrace.h +++ b/include/asm-x86/ftrace.h @@ -1,5 +1,5 @@ #ifndef _ASM_X86_FTRACE -#define _ASM_SPARC64_FTRACE +#define _ASM_X86_FTRACE #ifdef CONFIG_FTRACE #define MCOUNT_ADDR ((long)(mcount)) diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 33b9aeeb35a..3f62a83887f 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -2,7 +2,6 @@ #define _ASM_X8664_GART_H 1 #include <asm/e820.h> -#include <asm/iommu.h> extern void set_up_gart_resume(u32, u32); diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h index b02ea6e17de..754d635f90f 100644 --- a/include/asm-x86/genapic_32.h +++ b/include/asm-x86/genapic_32.h @@ -118,6 +118,7 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define get_uv_system_type() UV_NONE #define is_uv_system() 0 #define uv_wakeup_secondary(a, b) 1 +#define uv_system_init() do {} while (0) #endif diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 0f8504627c4..a47d6312913 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -42,6 +42,7 @@ extern int is_uv_system(void); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern void uv_cpu_init(void); +extern void uv_system_init(void); extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index bb06027fc83..2c1cda0b8a8 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -50,6 +50,7 @@ extern int geode_get_dev_base(unsigned int dev); #define MSR_PIC_YSEL_HIGH 0x51400021 #define MSR_PIC_ZSEL_LOW 0x51400022 #define MSR_PIC_ZSEL_HIGH 0x51400023 +#define MSR_PIC_IRQM_LPC 0x51400025 #define MSR_MFGPT_IRQ 0x51400028 #define MSR_MFGPT_NR 0x51400029 @@ -237,7 +238,7 @@ static inline u16 geode_mfgpt_read(int timer, u16 reg) } extern int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable); -extern int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable); +extern int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable); extern int geode_mfgpt_alloc_timer(int timer, int domain); #define geode_mfgpt_setup_irq(t, c, i) geode_mfgpt_set_irq((t), (c), (i), 1) diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h index ff87fca0caf..c4c91b37c10 100644 --- a/include/asm-x86/gpio.h +++ b/include/asm-x86/gpio.h @@ -1,6 +1,56 @@ +/* + * Generic GPIO API implementation for x86. + * + * Derived from the generic GPIO API for powerpc: + * + * Copyright (c) 2007-2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov <avorontsov@ru.mvista.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + #ifndef _ASM_I386_GPIO_H #define _ASM_I386_GPIO_H -#include <gpio.h> +#include <asm-generic/gpio.h> + +#ifdef CONFIG_GPIOLIB + +/* + * Just call gpiolib. + */ +static inline int gpio_get_value(unsigned int gpio) +{ + return __gpio_get_value(gpio); +} + +static inline void gpio_set_value(unsigned int gpio, int value) +{ + __gpio_set_value(gpio, value); +} + +static inline int gpio_cansleep(unsigned int gpio) +{ + return __gpio_cansleep(gpio); +} + +/* + * Not implemented, yet. + */ +static inline int gpio_to_irq(unsigned int gpio) +{ + return -ENOSYS; +} + +static inline int irq_to_gpio(unsigned int irq) +{ + return -EINVAL; +} + +#endif /* CONFIG_GPIOLIB */ #endif /* _ASM_I386_GPIO_H */ diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h index 14171a4924f..439a9acc132 100644 --- a/include/asm-x86/hugetlb.h +++ b/include/asm-x86/hugetlb.h @@ -14,11 +14,13 @@ static inline int is_hugepage_only_range(struct mm_struct *mm, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) { - if (len & ~HPAGE_MASK) + struct hstate *h = hstate_file(file); + if (len & ~huge_page_mask(h)) return -EINVAL; - if (addr & ~HPAGE_MASK) + if (addr & ~huge_page_mask(h)) return -EINVAL; return 0; } @@ -26,7 +28,7 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) { } -static inline void hugetlb_free_pgd_range(struct mmu_gather **tlb, +static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling) diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index 77ba51df566..edd0b95f14d 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -98,9 +98,17 @@ extern void (*const interrupt[NR_IRQS])(void); #else typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); -extern spinlock_t vector_lock; #endif -extern void setup_vector_irq(int cpu); + +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64) +extern void lock_vector_lock(void); +extern void unlock_vector_lock(void); +extern void __setup_vector_irq(int cpu); +#else +static inline void lock_vector_lock(void) {} +static inline void unlock_vector_lock(void) {} +static inline void __setup_vector_irq(int cpu) {} +#endif #endif /* !ASSEMBLY_ */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 37672f79dcc..56d00e31aec 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -13,6 +13,7 @@ #include <linux/sched.h> #include <linux/kernel_stat.h> #include <linux/regset.h> +#include <linux/hardirq.h> #include <asm/asm.h> #include <asm/processor.h> #include <asm/sigcontext.h> @@ -62,8 +63,6 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) #else : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); #endif - if (unlikely(err)) - init_fpu(current); return err; } @@ -137,60 +136,6 @@ static inline void __save_init_fpu(struct task_struct *tsk) task_thread_info(tsk)->status &= ~TS_USEDFPU; } -/* - * Signal frame handlers. - */ - -static inline int save_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err = 0; - - BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.xstate->fxsave)); - - if ((unsigned long)buf % 16) - printk("save_i387: bad fpstate %p\n", buf); - - if (!used_math()) - return 0; - clear_used_math(); /* trigger finit */ - if (task_thread_info(tsk)->status & TS_USEDFPU) { - err = save_i387_checking((struct i387_fxsave_struct __user *) - buf); - if (err) - return err; - task_thread_info(tsk)->status &= ~TS_USEDFPU; - stts(); - } else { - if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, - sizeof(struct i387_fxsave_struct))) - return -1; - } - return 1; -} - -/* - * This restores directly out of user space. Exceptions are handled. - */ -static inline int restore_i387(struct _fpstate __user *buf) -{ - struct task_struct *tsk = current; - int err; - - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } - - if (!(task_thread_info(current)->status & TS_USEDFPU)) { - clts(); - task_thread_info(current)->status |= TS_USEDFPU; - } - return restore_fpu_checking((__force struct i387_fxsave_struct *)buf); -} - #else /* CONFIG_X86_32 */ extern void finit(void); @@ -290,6 +235,37 @@ static inline void kernel_fpu_end(void) preempt_enable(); } +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context aswell. To prevent these kernel instructions + * in interrupt context interact wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If we are in process context, we are ok to take a spurious DNA fault. + * Otherwise, doing clts() in process context require pre-emption to + * be disabled or some heavy lifting like kernel_fpu_begin() + */ + if (!in_interrupt()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + #ifdef CONFIG_X86_64 static inline void save_init_fpu(struct task_struct *tsk) diff --git a/include/asm-x86/ide.h b/include/asm-x86/ide.h deleted file mode 100644 index cf9c98e5bdb..00000000000 --- a/include/asm-x86/ide.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 1994-1996 Linus Torvalds & authors - */ - -/* - * This file contains the i386 architecture specific IDE code. - */ - -#ifndef __ASMi386_IDE_H -#define __ASMi386_IDE_H - -#ifdef __KERNEL__ - - -#ifndef MAX_HWIFS -# ifdef CONFIG_BLK_DEV_IDEPCI -#define MAX_HWIFS 10 -# else -#define MAX_HWIFS 6 -# endif -#endif - -static __inline__ int ide_default_irq(unsigned long base) -{ - switch (base) { - case 0x1f0: return 14; - case 0x170: return 15; - case 0x1e8: return 11; - case 0x168: return 10; - case 0x1e0: return 8; - case 0x160: return 12; - default: - return 0; - } -} - -static __inline__ unsigned long ide_default_io_base(int index) -{ - /* - * If PCI is present then it is not safe to poke around - * the other legacy IDE ports. Only 0x1f0 and 0x170 are - * defined compatibility mode ports for PCI. A user can - * override this using ide= but we must default safe. - */ - if (no_pci_devices()) { - switch(index) { - case 2: return 0x1e8; - case 3: return 0x168; - case 4: return 0x1e0; - case 5: return 0x160; - } - } - switch (index) { - case 0: return 0x1f0; - case 1: return 0x170; - default: - return 0; - } -} - -#include <asm-generic/ide_iops.h> - -#endif /* __KERNEL__ */ - -#endif /* __ASMi386_IDE_H */ diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index d240e5b30a4..cbb64912361 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -10,4 +10,6 @@ void idle_notifier_register(struct notifier_block *n); void enter_idle(void); void exit_idle(void); +void c1e_remove_cpu(int cpu); + #endif diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index bf5d629b3a3..0f954dc89cb 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -21,7 +21,7 @@ extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); #define build_mmio_read(name, size, type, reg, barrier) \ static inline type name(const volatile void __iomem *addr) \ -{ type ret; asm volatile("mov" size " %1,%0":"=" reg (ret) \ +{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \ :"m" (*(volatile type __force *)addr) barrier); return ret; } #define build_mmio_write(name, size, type, reg, barrier) \ @@ -29,13 +29,13 @@ static inline void name(type val, volatile void __iomem *addr) \ { asm volatile("mov" size " %0,%1": :reg (val), \ "m" (*(volatile type __force *)addr) barrier); } -build_mmio_read(readb, "b", unsigned char, "q", :"memory") -build_mmio_read(readw, "w", unsigned short, "r", :"memory") -build_mmio_read(readl, "l", unsigned int, "r", :"memory") +build_mmio_read(readb, "b", unsigned char, "=q", :"memory") +build_mmio_read(readw, "w", unsigned short, "=r", :"memory") +build_mmio_read(readl, "l", unsigned int, "=r", :"memory") -build_mmio_read(__readb, "b", unsigned char, "q", ) -build_mmio_read(__readw, "w", unsigned short, "r", ) -build_mmio_read(__readl, "l", unsigned int, "r", ) +build_mmio_read(__readb, "b", unsigned char, "=q", ) +build_mmio_read(__readw, "w", unsigned short, "=r", ) +build_mmio_read(__readl, "l", unsigned int, "=r", ) build_mmio_write(writeb, "b", unsigned char, "q", :"memory") build_mmio_write(writew, "w", unsigned short, "r", :"memory") @@ -59,8 +59,8 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #define mmiowb() barrier() #ifdef CONFIG_X86_64 -build_mmio_read(readq, "q", unsigned long, "r", :"memory") -build_mmio_read(__readq, "q", unsigned long, "r", ) +build_mmio_read(readq, "q", unsigned long, "=r", :"memory") +build_mmio_read(__readq, "q", unsigned long, "=r", ) build_mmio_write(writeq, "q", unsigned long, "r", :"memory") build_mmio_write(__writeq, "q", unsigned long, "r", ) diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 4df44ed5407..e876d89ac15 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -110,6 +110,8 @@ static inline void *phys_to_virt(unsigned long address) */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); /* * The default ioremap() behavior is non-cached: diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index ddd8058a502..22995c5c5ad 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -175,6 +175,8 @@ extern void early_iounmap(void *addr, unsigned long size); */ extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, + unsigned long prot_val); /* * The default ioremap() behavior is non-cached: diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 068c9a40aa5..5f888cc5be4 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -3,9 +3,12 @@ extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); +extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); + #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; extern int gart_iommu_aperture_allowed; @@ -25,10 +28,18 @@ extern void gart_iommu_hole_init(void); static inline void early_gart_iommu_check(void) { } - +static inline void gart_iommu_init(void) +{ +} static inline void gart_iommu_shutdown(void) { } +static inline void gart_parse_options(char *options) +{ +} +static inline void gart_iommu_hole_init(void) +{ +} #endif #endif diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index 196d63c28aa..bb1c09f7a76 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -122,7 +122,7 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) * - mbligh */ local_irq_save(flags); - for_each_cpu_mask(query_cpu, mask) { + for_each_cpu_mask_nr(query_cpu, mask) { __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), vector, APIC_DEST_PHYSICAL); } diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index 90b1d1f12f0..a48c7f2dbdc 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -76,6 +76,7 @@ #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa #define THRESHOLD_APIC_VECTOR 0xf9 +#define UV_BAU_MESSAGE 0xf8 #define INVALIDATE_TLB_VECTOR_END 0xf7 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ @@ -109,7 +110,15 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#if !defined(CONFIG_X86_VOYAGER) +#ifdef CONFIG_X86_64 +# if NR_CPUS < MAX_IO_APICS +# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) +# else +# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) +# endif +# define NR_IRQ_VECTORS NR_IRQS + +#elif !defined(CONFIG_X86_VOYAGER) # if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h index 8f855a15f64..4246ab7dc98 100644 --- a/include/asm-x86/kexec.h +++ b/include/asm-x86/kexec.h @@ -10,14 +10,15 @@ # define VA_PTE_0 5 # define PA_PTE_1 6 # define VA_PTE_1 7 +# define PA_SWAP_PAGE 8 # ifdef CONFIG_X86_PAE -# define PA_PMD_0 8 -# define VA_PMD_0 9 -# define PA_PMD_1 10 -# define VA_PMD_1 11 -# define PAGES_NR 12 +# define PA_PMD_0 9 +# define VA_PMD_0 10 +# define PA_PMD_1 11 +# define VA_PMD_1 12 +# define PAGES_NR 13 # else -# define PAGES_NR 8 +# define PAGES_NR 9 # endif #else # define PA_CONTROL_PAGE 0 @@ -40,6 +41,10 @@ # define PAGES_NR 17 #endif +#ifdef CONFIG_X86_32 +# define KEXEC_CONTROL_CODE_MAX_SIZE 2048 +#endif + #ifndef __ASSEMBLY__ #include <linux/string.h> @@ -62,7 +67,7 @@ /* Maximum address we can use for the control code buffer */ # define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE -# define KEXEC_CONTROL_CODE_SIZE 4096 +# define KEXEC_CONTROL_PAGE_SIZE 4096 /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_386 @@ -78,7 +83,7 @@ # define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) /* Allocate one page for the pdp and the second for the code */ -# define KEXEC_CONTROL_CODE_SIZE (4096UL + 4096UL) +# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) /* The native architecture */ # define KEXEC_ARCH KEXEC_ARCH_X86_64 @@ -152,11 +157,12 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } #ifdef CONFIG_X86_32 -asmlinkage NORET_TYPE void +asmlinkage unsigned long relocate_kernel(unsigned long indirection_page, unsigned long control_page, unsigned long start_address, - unsigned int has_pae) ATTRIB_NORET; + unsigned int has_pae, + unsigned int preserve_context); #else NORET_TYPE void relocate_kernel(unsigned long indirection_page, diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h index 484c47554f3..94d63db1036 100644 --- a/include/asm-x86/kgdb.h +++ b/include/asm-x86/kgdb.h @@ -39,12 +39,13 @@ enum regnames { GDB_FS, /* 14 */ GDB_GS, /* 15 */ }; +#define NUMREGBYTES ((GDB_GS+1)*4) #else /* ! CONFIG_X86_32 */ -enum regnames { +enum regnames64 { GDB_AX, /* 0 */ - GDB_DX, /* 1 */ + GDB_BX, /* 1 */ GDB_CX, /* 2 */ - GDB_BX, /* 3 */ + GDB_DX, /* 3 */ GDB_SI, /* 4 */ GDB_DI, /* 5 */ GDB_BP, /* 6 */ @@ -58,18 +59,15 @@ enum regnames { GDB_R14, /* 14 */ GDB_R15, /* 15 */ GDB_PC, /* 16 */ - GDB_PS, /* 17 */ }; -#endif /* CONFIG_X86_32 */ -/* - * Number of bytes of registers: - */ -#ifdef CONFIG_X86_32 -# define NUMREGBYTES 64 -#else -# define NUMREGBYTES ((GDB_PS+1)*8) -#endif +enum regnames32 { + GDB_PS = 34, + GDB_CS, + GDB_SS, +}; +#define NUMREGBYTES ((GDB_SS+1)*4) +#endif /* CONFIG_X86_32 */ static inline void arch_kgdb_breakpoint(void) { diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 80eefef2cc7..6f1840812e5 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -228,5 +228,6 @@ struct kvm_pit_state { #define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12) #define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13) #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) +#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) #endif diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 844f2a89afb..c2e34c27590 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -13,6 +13,7 @@ #include <linux/types.h> #include <linux/mm.h> +#include <linux/mmu_notifier.h> #include <linux/kvm.h> #include <linux/kvm_para.h> @@ -27,6 +28,7 @@ #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_PIO_PAGE_OFFSET 1 +#define KVM_COALESCED_MMIO_PAGE_OFFSET 2 #define CR3_PAE_RESERVED_BITS ((X86_CR3_PWT | X86_CR3_PCD) - 1) #define CR3_NONPAE_RESERVED_BITS ((PAGE_SIZE-1) & ~(X86_CR3_PWT | X86_CR3_PCD)) @@ -79,6 +81,7 @@ #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 40 +#define KVM_NR_VAR_MTRR 8 extern spinlock_t kvm_lock; extern struct list_head vm_list; @@ -109,12 +112,12 @@ enum { }; enum { + VCPU_SREG_ES, VCPU_SREG_CS, + VCPU_SREG_SS, VCPU_SREG_DS, - VCPU_SREG_ES, VCPU_SREG_FS, VCPU_SREG_GS, - VCPU_SREG_SS, VCPU_SREG_TR, VCPU_SREG_LDTR, }; @@ -243,11 +246,13 @@ struct kvm_vcpu_arch { gfn_t last_pt_write_gfn; int last_pt_write_count; u64 *last_pte_updated; + gfn_t last_pte_gfn; struct { gfn_t gfn; /* presumed gfn during guest pte update */ pfn_t pfn; /* pfn corresponding to that gfn */ int largepage; + unsigned long mmu_seq; } update_pte; struct i387_fxsave_struct host_fx_image; @@ -287,6 +292,10 @@ struct kvm_vcpu_arch { unsigned int hv_clock_tsc_khz; unsigned int time_offset; struct page *time_page; + + bool nmi_pending; + + u64 mtrr[0x100]; }; struct kvm_mem_alias { @@ -344,6 +353,7 @@ struct kvm_vcpu_stat { u32 mmio_exits; u32 signal_exits; u32 irq_window_exits; + u32 nmi_window_exits; u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; @@ -379,7 +389,6 @@ struct kvm_x86_ops { void (*prepare_guest_switch)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); - void (*vcpu_decache)(struct kvm_vcpu *vcpu); int (*set_guest_debug)(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg); @@ -497,6 +506,10 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value); +void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, + int type_bits, int seg); + int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); @@ -515,6 +528,8 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, u32 error_code); +void kvm_inject_nmi(struct kvm_vcpu *vcpu); + void fx_init(struct kvm_vcpu *vcpu); int emulator_read_std(unsigned long addr, @@ -543,6 +558,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); void kvm_enable_tdp(void); +void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); @@ -554,55 +570,53 @@ static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) return (struct kvm_mmu_page *)page_private(page); } -static inline u16 read_fs(void) +static inline u16 kvm_read_fs(void) { u16 seg; asm("mov %%fs, %0" : "=g"(seg)); return seg; } -static inline u16 read_gs(void) +static inline u16 kvm_read_gs(void) { u16 seg; asm("mov %%gs, %0" : "=g"(seg)); return seg; } -static inline u16 read_ldt(void) +static inline u16 kvm_read_ldt(void) { u16 ldt; asm("sldt %0" : "=g"(ldt)); return ldt; } -static inline void load_fs(u16 sel) +static inline void kvm_load_fs(u16 sel) { asm("mov %0, %%fs" : : "rm"(sel)); } -static inline void load_gs(u16 sel) +static inline void kvm_load_gs(u16 sel) { asm("mov %0, %%gs" : : "rm"(sel)); } -#ifndef load_ldt -static inline void load_ldt(u16 sel) +static inline void kvm_load_ldt(u16 sel) { asm("lldt %0" : : "rm"(sel)); } -#endif -static inline void get_idt(struct descriptor_table *table) +static inline void kvm_get_idt(struct descriptor_table *table) { asm("sidt %0" : "=m"(*table)); } -static inline void get_gdt(struct descriptor_table *table) +static inline void kvm_get_gdt(struct descriptor_table *table) { asm("sgdt %0" : "=m"(*table)); } -static inline unsigned long read_tr_base(void) +static inline unsigned long kvm_read_tr_base(void) { u16 tr; asm("str %0" : "=g"(tr)); @@ -619,17 +633,17 @@ static inline unsigned long read_msr(unsigned long msr) } #endif -static inline void fx_save(struct i387_fxsave_struct *image) +static inline void kvm_fx_save(struct i387_fxsave_struct *image) { asm("fxsave (%0)":: "r" (image)); } -static inline void fx_restore(struct i387_fxsave_struct *image) +static inline void kvm_fx_restore(struct i387_fxsave_struct *image) { asm("fxrstor (%0)":: "r" (image)); } -static inline void fx_finit(void) +static inline void kvm_fx_finit(void) { asm("finit"); } @@ -691,4 +705,34 @@ enum { trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \ vcpu, 0, 0, 0, 0, 0, 0) +#ifdef CONFIG_64BIT +# define KVM_EX_ENTRY ".quad" +# define KVM_EX_PUSH "pushq" +#else +# define KVM_EX_ENTRY ".long" +# define KVM_EX_PUSH "pushl" +#endif + +/* + * Hardware virtualization extension instructions may fault if a + * reboot turns off virtualization while processes are running. + * Trap the fault and ignore the instruction if that happens. + */ +asmlinkage void kvm_handle_fault_on_reboot(void); + +#define __kvm_handle_fault_on_reboot(insn) \ + "666: " insn "\n\t" \ + ".pushsection .fixup, \"ax\" \n" \ + "667: \n\t" \ + KVM_EX_PUSH " $666b \n\t" \ + "jmp kvm_handle_fault_on_reboot \n\t" \ + ".popsection \n\t" \ + ".pushsection __ex_table, \"a\" \n\t" \ + KVM_EX_ENTRY " 666b, 667b \n\t" \ + ".popsection" + +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_age_hva(struct kvm *kvm, unsigned long hva); + #endif diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h index b877bbd2d3a..4e8c1e48d91 100644 --- a/include/asm-x86/kvm_x86_emulate.h +++ b/include/asm-x86/kvm_x86_emulate.h @@ -124,7 +124,8 @@ struct decode_cache { u8 rex_prefix; struct operand src; struct operand dst; - unsigned long *override_base; + bool has_seg_override; + u8 seg_override; unsigned int d; unsigned long regs[NR_VCPU_REGS]; unsigned long eip; @@ -134,6 +135,7 @@ struct decode_cache { u8 modrm_reg; u8 modrm_rm; u8 use_modrm_ea; + bool rip_relative; unsigned long modrm_ea; void *modrm_ptr; unsigned long modrm_val; @@ -150,12 +152,7 @@ struct x86_emulate_ctxt { /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; - unsigned long cs_base; - unsigned long ds_base; - unsigned long es_base; - unsigned long ss_base; - unsigned long gs_base; - unsigned long fs_base; + u32 cs_base; /* decode cache */ diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h index 017c8c19ad8..c3b9dc6970c 100644 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ b/include/asm-x86/mach-bigsmp/mach_apic.h @@ -63,9 +63,9 @@ static inline void init_apic_ldr(void) unsigned long val; int cpu = smp_processor_id(); - apic_write_around(APIC_DFR, APIC_DFR_VALUE); + apic_write(APIC_DFR, APIC_DFR_VALUE); val = calculate_ldr(cpu); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } static inline void setup_apic_routing(void) diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index 0b2cde5e1b7..f3226b9a6b8 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -46,10 +46,10 @@ static inline void init_apic_ldr(void) { unsigned long val; - apic_write_around(APIC_DFR, APIC_DFR_VALUE); + apic_write(APIC_DFR, APIC_DFR_VALUE); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id()); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } static inline int apic_id_registered(void) diff --git a/include/asm-x86/mach-default/smpboot_hooks.h b/include/asm-x86/mach-default/smpboot_hooks.h index 56d001b9dce..dbab36d64d4 100644 --- a/include/asm-x86/mach-default/smpboot_hooks.h +++ b/include/asm-x86/mach-default/smpboot_hooks.h @@ -12,11 +12,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) { CMOS_WRITE(0xa, 0xf); local_flush_tlb(); - Dprintk("1.\n"); + pr_debug("1.\n"); *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; - Dprintk("2.\n"); + pr_debug("2.\n"); *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; - Dprintk("3.\n"); + pr_debug("3.\n"); } static inline void smpboot_restore_warm_reset_vector(void) diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h index fbc8ad256f5..0a3fdf93067 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/mach-es7000/mach_apic.h @@ -66,9 +66,9 @@ static inline void init_apic_ldr(void) unsigned long val; int cpu = smp_processor_id(); - apic_write_around(APIC_DFR, APIC_DFR_VALUE); + apic_write(APIC_DFR, APIC_DFR_VALUE); val = calculate_ldr(cpu); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } #ifndef CONFIG_X86_GENERICARCH diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h index 9ef0b941bb2..c83c120be53 100644 --- a/include/asm-x86/mach-generic/mach_mpspec.h +++ b/include/asm-x86/mach-generic/mach_mpspec.h @@ -7,4 +7,6 @@ /* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */ #define MAX_MP_BUSSES 260 +extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, + char *productid); #endif /* __ASM_MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h index 1f76c2e7023..c47e2ab5c5c 100644 --- a/include/asm-x86/mach-summit/mach_apic.h +++ b/include/asm-x86/mach-summit/mach_apic.h @@ -63,10 +63,10 @@ static inline void init_apic_ldr(void) * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); id = my_cluster | (1UL << count); - apic_write_around(APIC_DFR, APIC_DFR_VALUE); + apic_write(APIC_DFR, APIC_DFR_VALUE); val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; val |= SET_APIC_LOGICAL_ID(id); - apic_write_around(APIC_LDR, val); + apic_write(APIC_LDR, val); } static inline int multi_timer_check(int apic, int irq) @@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map) static inline physid_mask_t apicid_to_cpu_present(int apicid) { - return physid_mask_of_physid(0); + return physid_mask_of_physid(apicid); } static inline void setup_portio_remap(void) diff --git a/include/asm-x86/mach-visws/entry_arch.h b/include/asm-x86/mach-visws/entry_arch.h deleted file mode 100644 index 86be554342d..00000000000 --- a/include/asm-x86/mach-visws/entry_arch.h +++ /dev/null @@ -1,5 +0,0 @@ -/* - * VISWS uses the standard Linux entry points: - */ - -#include "../mach-default/entry_arch.h" diff --git a/include/asm-x86/mach-visws/mach_apic.h b/include/asm-x86/mach-visws/mach_apic.h deleted file mode 100644 index 6943e7a1d0e..00000000000 --- a/include/asm-x86/mach-visws/mach_apic.h +++ /dev/null @@ -1 +0,0 @@ -#include "../mach-default/mach_apic.h" diff --git a/include/asm-x86/mach-visws/mach_apicdef.h b/include/asm-x86/mach-visws/mach_apicdef.h deleted file mode 100644 index 42711d152a9..00000000000 --- a/include/asm-x86/mach-visws/mach_apicdef.h +++ /dev/null @@ -1 +0,0 @@ -#include "../mach-default/mach_apicdef.h" diff --git a/include/asm-x86/mach-visws/setup_arch.h b/include/asm-x86/mach-visws/setup_arch.h deleted file mode 100644 index fa4766ca2d1..00000000000 --- a/include/asm-x86/mach-visws/setup_arch.h +++ /dev/null @@ -1 +0,0 @@ -#include "../mach-default/setup_arch.h" diff --git a/include/asm-x86/mach-visws/smpboot_hooks.h b/include/asm-x86/mach-visws/smpboot_hooks.h deleted file mode 100644 index e4433ca8871..00000000000 --- a/include/asm-x86/mach-visws/smpboot_hooks.h +++ /dev/null @@ -1 +0,0 @@ -#include "../mach-default/smpboot_hooks.h" diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h index 94f1fd79e22..531eaa58745 100644 --- a/include/asm-x86/mce.h +++ b/include/asm-x86/mce.h @@ -92,6 +92,7 @@ extern int mce_disabled; void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, device_mce); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); diff --git a/include/asm-x86/mman.h b/include/asm-x86/mman.h index c1682b542da..90bc4108a4f 100644 --- a/include/asm-x86/mman.h +++ b/include/asm-x86/mman.h @@ -12,6 +12,7 @@ #define MAP_NORESERVE 0x4000 /* don't check for reservations */ #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ #define MAP_NONBLOCK 0x10000 /* do not block on IO */ +#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index 95beda07c6f..e293ab81e85 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -3,7 +3,7 @@ #ifdef CONFIG_PCI_MMCONFIG extern void __cpuinit fam10h_check_enable_mmcfg(void); -extern void __init check_enable_amd_mmconf_dmi(void); +extern void __cpuinit check_enable_amd_mmconf_dmi(void); #else static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index b2298a22756..5862e646065 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -97,10 +97,16 @@ static inline int pfn_valid(int pfn) reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags)) #define alloc_bootmem(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_nopanic(x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \ + __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_nopanic(x) \ + __alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \ + __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index ca110ee73f0..2362cfda1fb 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -52,14 +52,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, { DECLARE_ARGS(val, low, high); - asm volatile("2: rdmsr ; xor %0,%0\n" + asm volatile("2: rdmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" - "3: mov %3,%0 ; jmp 1b\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) - : "=r" (*err), EAX_EDX_RET(val, low, high) - : "c" (msr), "i" (-EFAULT)); + : [err] "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), [fault] "i" (-EFAULT)); return EAX_EDX_VAL(val, low, high); } @@ -73,15 +73,15 @@ static inline int native_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int err; - asm volatile("2: wrmsr ; xor %0,%0\n" + asm volatile("2: wrmsr ; xor %[err],%[err]\n" "1:\n\t" ".section .fixup,\"ax\"\n\t" - "3: mov %4,%0 ; jmp 1b\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" ".previous\n\t" _ASM_EXTABLE(2b, 3b) - : "=a" (err) + : [err] "=a" (err) : "c" (msr), "0" (low), "d" (high), - "i" (-EFAULT) + [fault] "i" (-EFAULT) : "memory"); return err; } @@ -192,19 +192,20 @@ do { \ #define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) #ifdef CONFIG_SMP -void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); -void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); - int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); #else /* CONFIG_SMP */ -static inline void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) +static inline int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { rdmsr(msr_no, *l, *h); + return 0; } -static inline void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { wrmsr(msr_no, l, h); + return 0; } static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) diff --git a/include/asm-x86/namei.h b/include/asm-x86/namei.h deleted file mode 100644 index 415ef5d9550..00000000000 --- a/include/asm-x86/namei.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _ASM_X86_NAMEI_H -#define _ASM_X86_NAMEI_H - -/* This dummy routine maybe changed to something useful - * for /usr/gnemul/ emulation stuff. - * Look at asm-sparc/namei.h for details. - */ - -#define __emul_prefix() NULL - -#endif /* _ASM_X86_NAMEI_H */ diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index 28d7b4533b1..49982110e4d 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -18,8 +18,11 @@ (ie, 32-bit PAE). */ #define PHYSICAL_PAGE_MASK (((signed long)PAGE_MASK) & __PHYSICAL_MASK) -/* PTE_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ -#define PTE_MASK ((pteval_t)PHYSICAL_PAGE_MASK) +/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */ +#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK) + +/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */ +#define PTE_FLAGS_MASK (~PTE_PFN_MASK) #define PMD_PAGE_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_PAGE_MASK (~(PMD_PAGE_SIZE-1)) @@ -29,8 +32,7 @@ #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK) +#define HUGE_MAX_HSTATE 2 #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -144,6 +146,11 @@ static inline pteval_t native_pte_val(pte_t pte) return pte.pte; } +static inline pteval_t native_pte_flags(pte_t pte) +{ + return native_pte_val(pte) & PTE_FLAGS_MASK; +} + #define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) @@ -165,7 +172,7 @@ static inline pteval_t native_pte_val(pte_t pte) #endif #define pte_val(x) native_pte_val(x) -#define pte_flags(x) native_pte_val(x) +#define pte_flags(x) native_pte_flags(x) #define __pte(x) native_make_pte(x) #endif /* CONFIG_PARAVIRT */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index ef5e8ec6a6a..fbbde93f12d 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -205,7 +205,6 @@ struct pv_apic_ops { * these shouldn't be in this interface. */ void (*apic_write)(unsigned long reg, u32 v); - void (*apic_write_atomic)(unsigned long reg, u32 v); u32 (*apic_read)(unsigned long reg); void (*setup_boot_clock)(void); void (*setup_secondary_clock)(void); @@ -326,6 +325,15 @@ struct pv_mmu_ops { unsigned long phys, pgprot_t flags); }; +struct raw_spinlock; +struct pv_lock_ops { + int (*spin_is_locked)(struct raw_spinlock *lock); + int (*spin_is_contended)(struct raw_spinlock *lock); + void (*spin_lock)(struct raw_spinlock *lock); + int (*spin_trylock)(struct raw_spinlock *lock); + void (*spin_unlock)(struct raw_spinlock *lock); +}; + /* This contains all the paravirt structures: we get a convenient * number for each function using the offset which we use to indicate * what to patch. */ @@ -336,6 +344,7 @@ struct paravirt_patch_template { struct pv_irq_ops pv_irq_ops; struct pv_apic_ops pv_apic_ops; struct pv_mmu_ops pv_mmu_ops; + struct pv_lock_ops pv_lock_ops; }; extern struct pv_info pv_info; @@ -345,6 +354,7 @@ extern struct pv_cpu_ops pv_cpu_ops; extern struct pv_irq_ops pv_irq_ops; extern struct pv_apic_ops pv_apic_ops; extern struct pv_mmu_ops pv_mmu_ops; +extern struct pv_lock_ops pv_lock_ops; #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) @@ -896,11 +906,6 @@ static inline void apic_write(unsigned long reg, u32 v) PVOP_VCALL2(pv_apic_ops.apic_write, reg, v); } -static inline void apic_write_atomic(unsigned long reg, u32 v) -{ - PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v); -} - static inline u32 apic_read(unsigned long reg) { return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg); @@ -1083,6 +1088,9 @@ static inline pteval_t pte_flags(pte_t pte) ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags, pte.pte); +#ifdef CONFIG_PARAVIRT_DEBUG + BUG_ON(ret & PTE_PFN_MASK); +#endif return ret; } @@ -1374,6 +1382,37 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) +void paravirt_use_bytelocks(void); + +#ifdef CONFIG_SMP + +static inline int __raw_spin_is_locked(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); +} + +static inline int __raw_spin_is_contended(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); +} + +static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) +{ + PVOP_VCALL1(pv_lock_ops.spin_lock, lock); +} + +static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); +} + +static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) +{ + PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); +} + +#endif + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch_site { u8 *instr; /* original instructions */ @@ -1396,8 +1435,8 @@ extern struct paravirt_patch_site __parainstructions[], * caller saved registers but the argument parameter */ #define PV_SAVE_REGS "pushq %%rdi;" #define PV_RESTORE_REGS "popq %%rdi;" -#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx" -#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx" +#define PV_EXTRA_CLOBBERS EXTRA_CLOBBERS, "rcx" , "rdx", "rsi" +#define PV_VEXTRA_CLOBBERS EXTRA_CLOBBERS, "rdi", "rcx" , "rdx", "rsi" #define PV_FLAGS_ARG "D" #endif @@ -1458,6 +1497,7 @@ static inline unsigned long __raw_local_irq_save(void) return f; } + /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef __PVOP_CALL @@ -1489,8 +1529,26 @@ static inline unsigned long __raw_local_irq_save(void) #ifdef CONFIG_X86_64 -#define PV_SAVE_REGS pushq %rax; pushq %rdi; pushq %rcx; pushq %rdx -#define PV_RESTORE_REGS popq %rdx; popq %rcx; popq %rdi; popq %rax +#define PV_SAVE_REGS \ + push %rax; \ + push %rcx; \ + push %rdx; \ + push %rsi; \ + push %rdi; \ + push %r8; \ + push %r9; \ + push %r10; \ + push %r11 +#define PV_RESTORE_REGS \ + pop %r11; \ + pop %r10; \ + pop %r9; \ + pop %r8; \ + pop %rdi; \ + pop %rsi; \ + pop %rdx; \ + pop %rcx; \ + pop %rax #define PARA_PATCH(struct, off) ((PARAVIRT_PATCH_##struct + (off)) / 8) #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8) #define PARA_INDIRECT(addr) *addr(%rip) diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index 912a3a17b9d..f643a3a92da 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -22,6 +22,32 @@ DECLARE_PER_CPU(struct x8664_pda, pda); +/* + * These are supposed to be implemented as a single instruction which + * operates on the per-cpu data base segment. x86-64 doesn't have + * that yet, so this is a fairly inefficient workaround for the + * meantime. The single instruction is atomic with respect to + * preemption and interrupts, so we need to explicitly disable + * interrupts here to achieve the same effect. However, because it + * can be used from within interrupt-disable/enable, we can't actually + * disable interrupts; disabling preemption is enough. + */ +#define x86_read_percpu(var) \ + ({ \ + typeof(per_cpu_var(var)) __tmp; \ + preempt_disable(); \ + __tmp = __get_cpu_var(var); \ + preempt_enable(); \ + __tmp; \ + }) + +#define x86_write_percpu(var, val) \ + do { \ + preempt_disable(); \ + __get_cpu_var(var) = (val); \ + preempt_enable(); \ + } while(0) + #else /* CONFIG_X86_64 */ #ifdef __ASSEMBLY__ @@ -156,7 +182,7 @@ do { \ DEFINE_PER_CPU(_type, _name) = _initvalue; \ __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \ { [0 ... NR_CPUS-1] = _initvalue }; \ - __typeof__(_type) *_name##_early_ptr = _name##_early_map + __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map #define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \ EXPORT_PER_CPU_SYMBOL(_name) diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index c93dbb6c262..105057f3403 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h @@ -25,7 +25,7 @@ static inline int pud_none(pud_t pud) static inline int pud_bad(pud_t pud) { - return (pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; + return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0; } static inline int pud_present(pud_t pud) @@ -120,9 +120,9 @@ static inline void pud_clear(pud_t *pudp) write_cr3(pgd); } -#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK)) +#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK)) -#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK)) +#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) /* Find an entry in the second-level page table.. */ @@ -160,7 +160,7 @@ static inline int pte_none(pte_t pte) static inline unsigned long pte_pfn(pte_t pte) { - return (pte_val(pte) & PTE_MASK) >> PAGE_SHIFT; + return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; } /* diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 49cbd76b954..04caa2f544d 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -18,6 +18,7 @@ #define _PAGE_BIT_UNUSED2 10 #define _PAGE_BIT_UNUSED3 11 #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ +#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) @@ -34,6 +35,8 @@ #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) +#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) +#define __HAVE_ARCH_PTE_SPECIAL #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) @@ -53,8 +56,8 @@ _PAGE_DIRTY) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_PCD | _PAGE_PWT | \ - _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ + _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) #define _PAGE_CACHE_WB (0) @@ -180,7 +183,7 @@ static inline int pte_exec(pte_t pte) static inline int pte_special(pte_t pte) { - return 0; + return pte_val(pte) & _PAGE_SPECIAL; } static inline int pmd_large(pmd_t pte) @@ -246,7 +249,7 @@ static inline pte_t pte_clrglobal(pte_t pte) static inline pte_t pte_mkspecial(pte_t pte) { - return pte; + return __pte(pte_val(pte) | _PAGE_SPECIAL); } extern pteval_t __supported_pte_mask; @@ -286,7 +289,7 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) return __pgprot(preservebits | addbits); } -#define pte_pgprot(x) __pgprot(pte_flags(x) & ~PTE_MASK) +#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) @@ -302,6 +305,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); +#ifdef CONFIG_X86_32 +extern void native_pagetable_setup_start(pgd_t *base); +extern void native_pagetable_setup_done(pgd_t *base); +#else +static inline void native_pagetable_setup_start(pgd_t *base) {} +static inline void native_pagetable_setup_done(pgd_t *base) {} +#endif + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT */ @@ -333,6 +344,16 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte); #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) + +static inline void __init paravirt_pagetable_setup_start(pgd_t *base) +{ + native_pagetable_setup_start(base); +} + +static inline void __init paravirt_pagetable_setup_done(pgd_t *base) +{ + native_pagetable_setup_done(base); +} #endif /* CONFIG_PARAVIRT */ #endif /* __ASSEMBLY__ */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index ec871c420d7..5c3b26567a9 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -88,7 +88,7 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val((x))) #define pmd_present(x) (pmd_val((x)) & _PAGE_PRESENT) -#define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) @@ -139,7 +139,7 @@ static inline int pud_large(pud_t pud) { return 0; } #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) #define pmd_page_vaddr(pmd) \ - ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK)) + ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK)) #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ @@ -171,21 +171,6 @@ do { \ */ #define update_mmu_cache(vma, address, pte) do { } while (0) -extern void native_pagetable_setup_start(pgd_t *base); -extern void native_pagetable_setup_done(pgd_t *base); - -#ifndef CONFIG_PARAVIRT -static inline void __init paravirt_pagetable_setup_start(pgd_t *base) -{ - native_pagetable_setup_start(base); -} - -static inline void __init paravirt_pagetable_setup_done(pgd_t *base) -{ - native_pagetable_setup_done(base); -} -#endif /* !CONFIG_PARAVIRT */ - #endif /* !__ASSEMBLY__ */ /* diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index fa7208b483c..549144d03d9 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -16,6 +16,8 @@ extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; +extern pmd_t level2_fixmap_pgt[512]; +extern pmd_t level2_ident_pgt[512]; extern pgd_t init_level4_pgt[]; #define swapper_pg_dir init_level4_pgt @@ -149,24 +151,24 @@ static inline void native_pgd_clear(pgd_t *pgd) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) #define VMEMMAP_START _AC(0xffffe20000000000, UL) #define MODULES_VADDR _AC(0xffffffffa0000000, UL) -#define MODULES_END _AC(0xfffffffffff00000, UL) +#define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) #ifndef __ASSEMBLY__ static inline int pgd_bad(pgd_t pgd) { - return (pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE; + return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } static inline int pud_bad(pud_t pud) { - return (pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE; + return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } static inline int pmd_bad(pmd_t pmd) { - return (pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE; + return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE; } #define pte_none(x) (!pte_val((x))) @@ -191,7 +193,7 @@ static inline int pmd_bad(pmd_t pmd) * Level 4 access. */ #define pgd_page_vaddr(pgd) \ - ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK)) + ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK)) #define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT)) #define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT) static inline int pgd_large(pgd_t pgd) { return 0; } @@ -214,7 +216,7 @@ static inline int pud_large(pud_t pte) } /* PMD - Level 2 access */ -#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK)) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK)) #define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT)) #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index 092b39b3a7e..eff2ecd7fff 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -88,10 +88,12 @@ #define CX86_ARR_BASE 0xc4 #define CX86_RCR_BASE 0xdc +#ifdef __KERNEL__ #ifdef CONFIG_VM86 #define X86_VM_MASK X86_EFLAGS_VM #else #define X86_VM_MASK 0 /* No VM86 support */ #endif +#endif #endif /* __ASM_I386_PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 55402d2ab93..4df3e2f6fb5 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -134,7 +134,7 @@ extern __u32 cleared_cpu_caps[NCAPINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) -#define current_cpu_data cpu_data(smp_processor_id()) +#define current_cpu_data __get_cpu_var(cpu_info) #else #define cpu_data(cpu) boot_cpu_data #define current_cpu_data boot_cpu_data @@ -722,14 +722,35 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); -extern int force_mwait; - extern void select_idle_routine(const struct cpuinfo_x86 *c); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; extern unsigned long idle_nomwait; +/* + * on systems with caches, caches must be flashed as the absolute + * last instruction before going into a suspended halt. Otherwise, + * dirty data can linger in the cache and become stale on resume, + * leading to strange errors. + * + * perform a variety of operations to guarantee that the compiler + * will not reorder instructions. wbinvd itself is serializing + * so the processor will not reorder. + * + * Systems without cache can just go into halt. + */ +static inline void wbinvd_halt(void) +{ + mb(); + /* check for clflush to determine if wbinvd is legal */ + if (cpu_has_clflush) + asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory"); + else + while (1) + halt(); +} + extern void enable_sep_cpu(void); extern int sysenter_setup(void); diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index f224eb3c315..72e7b9db29b 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -73,11 +73,11 @@ #ifdef __x86_64__ # define PTRACE_ARCH_PRCTL 30 -#else -# define PTRACE_SYSEMU 31 -# define PTRACE_SYSEMU_SINGLESTEP 32 #endif +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 + #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ #ifndef __ASSEMBLY__ diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index adec887dd7c..5c2ff4bc298 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -41,6 +41,12 @@ # define NEED_3DNOW 0 #endif +#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) +# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) +#else +# define NEED_NOPL 0 +#endif + #ifdef CONFIG_X86_64 #define NEED_PSE 0 #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) @@ -67,7 +73,7 @@ #define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) #define REQUIRED_MASK2 0 -#define REQUIRED_MASK3 0 +#define REQUIRED_MASK3 (NEED_NOPL) #define REQUIRED_MASK4 0 #define REQUIRED_MASK5 0 #define REQUIRED_MASK6 0 diff --git a/include/asm-x86/semaphore.h b/include/asm-x86/semaphore.h deleted file mode 100644 index d9b2034ed1d..00000000000 --- a/include/asm-x86/semaphore.h +++ /dev/null @@ -1 +0,0 @@ -#include <linux/semaphore.h> diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index 90ab2225e71..a07c6f1c01e 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -19,13 +19,28 @@ static inline int is_visws_box(void) { return 0; } /* * Any setup quirks to be performed? */ -extern int (*arch_time_init_quirk)(void); -extern int (*arch_pre_intr_init_quirk)(void); -extern int (*arch_intr_init_quirk)(void); -extern int (*arch_trap_init_quirk)(void); -extern char * (*arch_memory_setup_quirk)(void); -extern int (*mach_get_smp_config_quirk)(unsigned int early); -extern int (*mach_find_smp_config_quirk)(unsigned int reserve); +struct mpc_config_processor; +struct mpc_config_bus; +struct mp_config_oemtable; +struct x86_quirks { + int (*arch_pre_time_init)(void); + int (*arch_time_init)(void); + int (*arch_pre_intr_init)(void); + int (*arch_intr_init)(void); + int (*arch_trap_init)(void); + char * (*arch_memory_setup)(void); + int (*mach_get_smp_config)(unsigned int early); + int (*mach_find_smp_config)(unsigned int reserve); + + int *mpc_record; + int (*mpc_apic_id)(struct mpc_config_processor *m); + void (*mpc_oem_bus_info)(struct mpc_config_bus *m, char *name); + void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); + void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, + unsigned short oemsize); +}; + +extern struct x86_quirks *x86_quirks; #ifndef CONFIG_PARAVIRT #define paravirt_post_allocator_init() do {} while (0) @@ -76,6 +91,7 @@ extern unsigned long init_pg_tables_start; extern unsigned long init_pg_tables_end; #else +void __init x86_64_init_pda(void); void __init x86_64_start_kernel(char *real_mode); void __init x86_64_start_reservations(char *real_mode_data); diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h index f15186d39c6..6dac49364e9 100644 --- a/include/asm-x86/signal.h +++ b/include/asm-x86/signal.h @@ -181,12 +181,12 @@ typedef struct sigaltstack { #ifdef __KERNEL__ #include <asm/sigcontext.h> -#ifdef __386__ +#ifdef __i386__ #define __HAVE_ARCH_SIG_BITOPS #define sigaddset(set,sig) \ - (__builtin_constantp(sig) \ + (__builtin_constant_p(sig) \ ? __const_sigaddset((set), (sig)) \ : __gen_sigaddset((set), (sig))) diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index c2784b3e0b7..3c877f74f27 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -25,6 +25,8 @@ extern cpumask_t cpu_callin_map; extern void (*mtrr_hook)(void); extern void zap_low_mappings(void); +extern int __cpuinit get_local_pda(int cpu); + extern int smp_num_siblings; extern unsigned int num_processors; extern cpumask_t cpu_initialized; diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 21e89bf92f1..e39c790dbfd 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -6,7 +6,7 @@ #include <asm/page.h> #include <asm/processor.h> #include <linux/compiler.h> - +#include <asm/paravirt.h> /* * Your basic SMP spinlocks, allowing only a single CPU anywhere * @@ -54,21 +54,21 @@ * much between them in performance though, especially as locks are out of line. */ #if (NR_CPUS < 256) -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return (((tmp >> 8) & 0xff) != (tmp & 0xff)); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); - return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1; + return (((tmp >> 8) - tmp) & 0xff) > 1; } -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) { short inc = 0x0100; @@ -87,9 +87,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) : "memory", "cc"); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) - -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) { int tmp; short new; @@ -110,7 +108,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) return tmp; } -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incb %0" : "+m" (lock->slock) @@ -118,21 +116,21 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) : "memory", "cc"); } #else -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return (((tmp >> 16) & 0xffff) != (tmp & 0xffff)); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); - return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1; + return (((tmp >> 16) - tmp) & 0xffff) > 1; } -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) { int inc = 0x00010000; int tmp; @@ -153,9 +151,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) : "memory", "cc"); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) - -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) { int tmp; int new; @@ -177,7 +173,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) return tmp; } -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incw %0" : "+m" (lock->slock) @@ -186,6 +182,98 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) } #endif +#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) + +#ifdef CONFIG_PARAVIRT +/* + * Define virtualization-friendly old-style lock byte lock, for use in + * pv_lock_ops if desired. + * + * This differs from the pre-2.6.24 spinlock by always using xchgb + * rather than decb to take the lock; this allows it to use a + * zero-initialized lock structure. It also maintains a 1-byte + * contention counter, so that we can implement + * __byte_spin_is_contended. + */ +struct __byte_spinlock { + s8 lock; + s8 spinners; +}; + +static inline int __byte_spin_is_locked(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + return bl->lock != 0; +} + +static inline int __byte_spin_is_contended(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + return bl->spinners != 0; +} + +static inline void __byte_spin_lock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + s8 val = 1; + + asm("1: xchgb %1, %0\n" + " test %1,%1\n" + " jz 3f\n" + " " LOCK_PREFIX "incb %2\n" + "2: rep;nop\n" + " cmpb $1, %0\n" + " je 2b\n" + " " LOCK_PREFIX "decb %2\n" + " jmp 1b\n" + "3:" + : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); +} + +static inline int __byte_spin_trylock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + u8 old = 1; + + asm("xchgb %1,%0" + : "+m" (bl->lock), "+q" (old) : : "memory"); + + return old == 0; +} + +static inline void __byte_spin_unlock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + smp_wmb(); + bl->lock = 0; +} +#else /* !CONFIG_PARAVIRT */ +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +{ + return __ticket_spin_is_locked(lock); +} + +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +{ + return __ticket_spin_is_contended(lock); +} + +static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +{ + __ticket_spin_lock(lock); +} + +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +{ + return __ticket_spin_trylock(lock); +} + +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __ticket_spin_unlock(lock); +} +#endif /* CONFIG_PARAVIRT */ + static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { while (__raw_spin_is_locked(lock)) diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h index 9029cf78cf5..06c071c9eee 100644 --- a/include/asm-x86/spinlock_types.h +++ b/include/asm-x86/spinlock_types.h @@ -5,7 +5,7 @@ # error "please don't include this file directly" #endif -typedef struct { +typedef struct raw_spinlock { unsigned int slock; } raw_spinlock_t; diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h index f5d9e74b1e4..2730b351afc 100644 --- a/include/asm-x86/swiotlb.h +++ b/include/asm-x86/swiotlb.h @@ -35,7 +35,7 @@ extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction); -extern int swiotlb_dma_mapping_error(dma_addr_t dma_addr); +extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); extern void swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); @@ -45,12 +45,14 @@ extern int swiotlb_force; #ifdef CONFIG_SWIOTLB extern int swiotlb; +extern void pci_swiotlb_init(void); #else #define swiotlb 0 +static inline void pci_swiotlb_init(void) +{ +} #endif -extern void pci_swiotlb_init(void); - static inline void dma_mark_clean(void *addr, size_t size) {} #endif /* _ASM_SWIOTLB_H */ diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index 895339d2bc0..da0a675adf9 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -75,13 +75,10 @@ struct thread_info { #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ #define TIF_IRET 5 /* force IRET */ -#ifdef CONFIG_X86_32 #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ -#endif #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */ -#define TIF_HRTICK_RESCHED 11 /* reprogram hrtick timer */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_IA32 17 /* 32bit process */ #define TIF_FORK 18 /* ret_from_fork */ @@ -100,15 +97,10 @@ struct thread_info { #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_IRET (1 << TIF_IRET) -#ifdef CONFIG_X86_32 #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) -#else -#define _TIF_SYSCALL_EMU 0 -#endif #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY) -#define _TIF_HRTICK_RESCHED (1 << TIF_HRTICK_RESCHED) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_IA32 (1 << TIF_IA32) #define _TIF_FORK (1 << TIF_FORK) @@ -121,18 +113,27 @@ struct thread_info { #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +/* work to do in syscall_trace_enter() */ +#define _TIF_WORK_SYSCALL_ENTRY \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \ + _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP) + +/* work to do in syscall_trace_leave() */ +#define _TIF_WORK_SYSCALL_EXIT \ + (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP) + /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ (0x0000FFFF & \ - ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP| \ - _TIF_SECCOMP|_TIF_SYSCALL_EMU)) + ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \ + _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU)) /* work to do on any return to user space */ #define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP) /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY|_TIF_HRTICK_RESCHED) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ @@ -151,6 +152,8 @@ struct thread_info { #define THREAD_FLAGS GFP_KERNEL #endif +#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR + #define alloc_thread_info(tsk) \ ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h new file mode 100644 index 00000000000..a4b65a71bd6 --- /dev/null +++ b/include/asm-x86/traps.h @@ -0,0 +1,66 @@ +#ifndef _ASM_X86_TRAPS_H +#define _ASM_X86_TRAPS_H + +/* Common in X86_32 and X86_64 */ +asmlinkage void divide_error(void); +asmlinkage void debug(void); +asmlinkage void nmi(void); +asmlinkage void int3(void); +asmlinkage void overflow(void); +asmlinkage void bounds(void); +asmlinkage void invalid_op(void); +asmlinkage void device_not_available(void); +asmlinkage void coprocessor_segment_overrun(void); +asmlinkage void invalid_TSS(void); +asmlinkage void segment_not_present(void); +asmlinkage void stack_segment(void); +asmlinkage void general_protection(void); +asmlinkage void page_fault(void); +asmlinkage void coprocessor_error(void); +asmlinkage void simd_coprocessor_error(void); +asmlinkage void alignment_check(void); +asmlinkage void spurious_interrupt_bug(void); +#ifdef CONFIG_X86_MCE +asmlinkage void machine_check(void); +#endif /* CONFIG_X86_MCE */ + +void do_divide_error(struct pt_regs *, long); +void do_overflow(struct pt_regs *, long); +void do_bounds(struct pt_regs *, long); +void do_coprocessor_segment_overrun(struct pt_regs *, long); +void do_invalid_TSS(struct pt_regs *, long); +void do_segment_not_present(struct pt_regs *, long); +void do_stack_segment(struct pt_regs *, long); +void do_alignment_check(struct pt_regs *, long); +void do_invalid_op(struct pt_regs *, long); +void do_general_protection(struct pt_regs *, long); +void do_nmi(struct pt_regs *, long); + +extern int panic_on_unrecovered_nmi; +extern int kstack_depth_to_print; + +#ifdef CONFIG_X86_32 + +void do_iret_error(struct pt_regs *, long); +void do_int3(struct pt_regs *, long); +void do_debug(struct pt_regs *, long); +void math_error(void __user *); +void do_coprocessor_error(struct pt_regs *, long); +void do_simd_coprocessor_error(struct pt_regs *, long); +void do_spurious_interrupt_bug(struct pt_regs *, long); +unsigned long patch_espfix_desc(unsigned long, unsigned long); +asmlinkage void math_emulate(long); + +#else /* CONFIG_X86_32 */ + +asmlinkage void double_fault(void); + +asmlinkage void do_int3(struct pt_regs *, long); +asmlinkage void do_stack_segment(struct pt_regs *, long); +asmlinkage void do_debug(struct pt_regs *, unsigned long); +asmlinkage void do_coprocessor_error(struct pt_regs *); +asmlinkage void do_simd_coprocessor_error(struct pt_regs *); +asmlinkage void do_spurious_interrupt_bug(struct pt_regs *); + +#endif /* CONFIG_X86_32 */ +#endif /* _ASM_X86_TRAPS_H */ diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index f6fa4d841bb..5f702d1d521 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -451,3 +451,4 @@ extern struct movsl_mask { #endif #endif + diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index 8317d94771d..d7394673b77 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -332,6 +332,12 @@ #define __NR_fallocate 324 #define __NR_timerfd_settime 325 #define __NR_timerfd_gettime 326 +#define __NR_signalfd4 327 +#define __NR_eventfd2 328 +#define __NR_epoll_create1 329 +#define __NR_dup3 330 +#define __NR_pipe2 331 +#define __NR_inotify_init1 332 #ifdef __KERNEL__ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index 9c1a4a3470d..3a341d79179 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -639,6 +639,20 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) +#define __NR_paccept 288 +__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_signalfd4 289 +__SYSCALL(__NR_signalfd4, sys_signalfd4) +#define __NR_eventfd2 290 +__SYSCALL(__NR_eventfd2, sys_eventfd2) +#define __NR_epoll_create1 291 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_dup3 292 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 293 +__SYSCALL(__NR_pipe2, sys_pipe2) +#define __NR_inotify_init1 294 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) #ifndef __NO_STUBS diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h new file mode 100644 index 00000000000..aa73362ff5d --- /dev/null +++ b/include/asm-x86/uv/bios.h @@ -0,0 +1,68 @@ +#ifndef _ASM_X86_BIOS_H +#define _ASM_X86_BIOS_H + +/* + * BIOS layer definitions. + * + * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/rtc.h> + +#define BIOS_FREQ_BASE 0x01000001 + +enum { + BIOS_FREQ_BASE_PLATFORM = 0, + BIOS_FREQ_BASE_INTERVAL_TIMER = 1, + BIOS_FREQ_BASE_REALTIME_CLOCK = 2 +}; + +# define BIOS_CALL(result, a0, a1, a2, a3, a4, a5, a6, a7) \ + do { \ + /* XXX - the real call goes here */ \ + result.status = BIOS_STATUS_UNIMPLEMENTED; \ + isrv.v0 = 0; \ + isrv.v1 = 0; \ + } while (0) + +enum { + BIOS_STATUS_SUCCESS = 0, + BIOS_STATUS_UNIMPLEMENTED = -1, + BIOS_STATUS_EINVAL = -2, + BIOS_STATUS_ERROR = -3 +}; + +struct uv_bios_retval { + /* + * A zero status value indicates call completed without error. + * A negative status value indicates reason of call failure. + * A positive status value indicates success but an + * informational value should be printed (e.g., "reboot for + * change to take effect"). + */ + s64 status; + u64 v0; + u64 v1; + u64 v2; +}; + +extern long +x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info); +extern const char *x86_bios_strerror(long status); + +#endif /* _ASM_X86_BIOS_H */ diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h index 91ac0dfb758..610b6b308e9 100644 --- a/include/asm-x86/uv/uv_bau.h +++ b/include/asm-x86/uv/uv_bau.h @@ -40,11 +40,6 @@ #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 -#define UV_BAU_MESSAGE 200 -/* - * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h - * To be dynamically allocated in the future - */ #define UV_NET_ENDPOINT_INTD 0x38 #define UV_DESC_BASE_PNODE_SHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h index 86e085e003d..8e18fb80f5e 100644 --- a/include/asm-x86/vdso.h +++ b/include/asm-x86/vdso.h @@ -36,4 +36,12 @@ extern const char VDSO32_PRELINK[]; extern void __user __kernel_sigreturn; extern void __user __kernel_rt_sigreturn; +/* + * These symbols are defined by vdso32.S to mark the bounds + * of the ELF DSO images included therein. + */ +extern const char vdso32_int80_start, vdso32_int80_end; +extern const char vdso32_syscall_start, vdso32_syscall_end; +extern const char vdso32_sysenter_start, vdso32_sysenter_end; + #endif /* asm-x86/vdso.h */ diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h index f8d57ea1f05..8ded7472002 100644 --- a/include/asm-x86/xen/events.h +++ b/include/asm-x86/xen/events.h @@ -5,6 +5,7 @@ enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, XEN_CALL_FUNCTION_SINGLE_VECTOR, + XEN_SPIN_UNLOCK_VECTOR, XEN_NR_IPIS, }; diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index 2a4f9b41d68..91cb7fd5c12 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -40,83 +40,157 @@ #include <xen/interface/sched.h> #include <xen/interface/physdev.h> +/* + * The hypercall asms have to meet several constraints: + * - Work on 32- and 64-bit. + * The two architectures put their arguments in different sets of + * registers. + * + * - Work around asm syntax quirks + * It isn't possible to specify one of the rNN registers in a + * constraint, so we use explicit register variables to get the + * args into the right place. + * + * - Mark all registers as potentially clobbered + * Even unused parameters can be clobbered by the hypervisor, so we + * need to make sure gcc knows it. + * + * - Avoid compiler bugs. + * This is the tricky part. Because x86_32 has such a constrained + * register set, gcc versions below 4.3 have trouble generating + * code when all the arg registers and memory are trashed by the + * asm. There are syntactically simpler ways of achieving the + * semantics below, but they cause the compiler to crash. + * + * The only combination I found which works is: + * - assign the __argX variables first + * - list all actually used parameters as "+r" (__argX) + * - clobber the rest + * + * The result certainly isn't pretty, and it really shows up cpp's + * weakness as as macro language. Sorry. (But let's just give thanks + * there aren't more than 5 arguments...) + */ + extern struct { char _entry[32]; } hypercall_page[]; +#define __HYPERCALL "call hypercall_page+%c[offset]" +#define __HYPERCALL_ENTRY(x) \ + [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0])) + +#ifdef CONFIG_X86_32 +#define __HYPERCALL_RETREG "eax" +#define __HYPERCALL_ARG1REG "ebx" +#define __HYPERCALL_ARG2REG "ecx" +#define __HYPERCALL_ARG3REG "edx" +#define __HYPERCALL_ARG4REG "esi" +#define __HYPERCALL_ARG5REG "edi" +#else +#define __HYPERCALL_RETREG "rax" +#define __HYPERCALL_ARG1REG "rdi" +#define __HYPERCALL_ARG2REG "rsi" +#define __HYPERCALL_ARG3REG "rdx" +#define __HYPERCALL_ARG4REG "r10" +#define __HYPERCALL_ARG5REG "r8" +#endif + +#define __HYPERCALL_DECLS \ + register unsigned long __res asm(__HYPERCALL_RETREG); \ + register unsigned long __arg1 asm(__HYPERCALL_ARG1REG) = __arg1; \ + register unsigned long __arg2 asm(__HYPERCALL_ARG2REG) = __arg2; \ + register unsigned long __arg3 asm(__HYPERCALL_ARG3REG) = __arg3; \ + register unsigned long __arg4 asm(__HYPERCALL_ARG4REG) = __arg4; \ + register unsigned long __arg5 asm(__HYPERCALL_ARG5REG) = __arg5; + +#define __HYPERCALL_0PARAM "=r" (__res) +#define __HYPERCALL_1PARAM __HYPERCALL_0PARAM, "+r" (__arg1) +#define __HYPERCALL_2PARAM __HYPERCALL_1PARAM, "+r" (__arg2) +#define __HYPERCALL_3PARAM __HYPERCALL_2PARAM, "+r" (__arg3) +#define __HYPERCALL_4PARAM __HYPERCALL_3PARAM, "+r" (__arg4) +#define __HYPERCALL_5PARAM __HYPERCALL_4PARAM, "+r" (__arg5) + +#define __HYPERCALL_0ARG() +#define __HYPERCALL_1ARG(a1) \ + __HYPERCALL_0ARG() __arg1 = (unsigned long)(a1); +#define __HYPERCALL_2ARG(a1,a2) \ + __HYPERCALL_1ARG(a1) __arg2 = (unsigned long)(a2); +#define __HYPERCALL_3ARG(a1,a2,a3) \ + __HYPERCALL_2ARG(a1,a2) __arg3 = (unsigned long)(a3); +#define __HYPERCALL_4ARG(a1,a2,a3,a4) \ + __HYPERCALL_3ARG(a1,a2,a3) __arg4 = (unsigned long)(a4); +#define __HYPERCALL_5ARG(a1,a2,a3,a4,a5) \ + __HYPERCALL_4ARG(a1,a2,a3,a4) __arg5 = (unsigned long)(a5); + +#define __HYPERCALL_CLOBBER5 "memory" +#define __HYPERCALL_CLOBBER4 __HYPERCALL_CLOBBER5, __HYPERCALL_ARG5REG +#define __HYPERCALL_CLOBBER3 __HYPERCALL_CLOBBER4, __HYPERCALL_ARG4REG +#define __HYPERCALL_CLOBBER2 __HYPERCALL_CLOBBER3, __HYPERCALL_ARG3REG +#define __HYPERCALL_CLOBBER1 __HYPERCALL_CLOBBER2, __HYPERCALL_ARG2REG +#define __HYPERCALL_CLOBBER0 __HYPERCALL_CLOBBER1, __HYPERCALL_ARG1REG + #define _hypercall0(type, name) \ ({ \ - long __res; \ - asm volatile ( \ - "call %[call]" \ - : "=a" (__res) \ - : [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ - : "memory" ); \ + __HYPERCALL_DECLS; \ + __HYPERCALL_0ARG(); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_0PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER0); \ (type)__res; \ }) #define _hypercall1(type, name, a1) \ ({ \ - long __res, __ign1; \ - asm volatile ( \ - "call %[call]" \ - : "=a" (__res), "=b" (__ign1) \ - : "1" ((long)(a1)), \ - [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ - : "memory" ); \ + __HYPERCALL_DECLS; \ + __HYPERCALL_1ARG(a1); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_1PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER1); \ (type)__res; \ }) #define _hypercall2(type, name, a1, a2) \ ({ \ - long __res, __ign1, __ign2; \ - asm volatile ( \ - "call %[call]" \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ - : "memory" ); \ + __HYPERCALL_DECLS; \ + __HYPERCALL_2ARG(a1, a2); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_2PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER2); \ (type)__res; \ }) #define _hypercall3(type, name, a1, a2, a3) \ ({ \ - long __res, __ign1, __ign2, __ign3; \ - asm volatile ( \ - "call %[call]" \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), \ - [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ - : "memory" ); \ + __HYPERCALL_DECLS; \ + __HYPERCALL_3ARG(a1, a2, a3); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_3PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER3); \ (type)__res; \ }) #define _hypercall4(type, name, a1, a2, a3, a4) \ ({ \ - long __res, __ign1, __ign2, __ign3, __ign4; \ - asm volatile ( \ - "call %[call]" \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)), \ - [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ - : "memory" ); \ + __HYPERCALL_DECLS; \ + __HYPERCALL_4ARG(a1, a2, a3, a4); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_4PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER4); \ (type)__res; \ }) #define _hypercall5(type, name, a1, a2, a3, a4, a5) \ ({ \ - long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ - asm volatile ( \ - "call %[call]" \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)), \ - "5" ((long)(a5)), \ - [call] "m" (hypercall_page[__HYPERVISOR_##name]) \ - : "memory" ); \ + __HYPERCALL_DECLS; \ + __HYPERCALL_5ARG(a1, a2, a3, a4, a5); \ + asm volatile (__HYPERCALL \ + : __HYPERCALL_5PARAM \ + : __HYPERCALL_ENTRY(name) \ + : __HYPERCALL_CLOBBER5); \ (type)__res; \ }) @@ -152,6 +226,7 @@ HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp) return _hypercall2(int, stack_switch, ss, esp); } +#ifdef CONFIG_X86_32 static inline int HYPERVISOR_set_callbacks(unsigned long event_selector, unsigned long event_address, @@ -162,6 +237,17 @@ HYPERVISOR_set_callbacks(unsigned long event_selector, event_selector, event_address, failsafe_selector, failsafe_address); } +#else /* CONFIG_X86_64 */ +static inline int +HYPERVISOR_set_callbacks(unsigned long event_address, + unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, + syscall_address); +} +#endif /* CONFIG_X86_{32,64} */ static inline int HYPERVISOR_callback_op(int cmd, void *arg) @@ -223,12 +309,12 @@ static inline int HYPERVISOR_update_va_mapping(unsigned long va, pte_t new_val, unsigned long flags) { - unsigned long pte_hi = 0; -#ifdef CONFIG_X86_PAE - pte_hi = new_val.pte_high; -#endif - return _hypercall4(int, update_va_mapping, va, - new_val.pte_low, pte_hi, flags); + if (sizeof(new_val) == sizeof(long)) + return _hypercall3(int, update_va_mapping, va, + new_val.pte, flags); + else + return _hypercall4(int, update_va_mapping, va, + new_val.pte, new_val.pte >> 32, flags); } static inline int @@ -281,12 +367,13 @@ static inline int HYPERVISOR_update_va_mapping_otherdomain(unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { - unsigned long pte_hi = 0; -#ifdef CONFIG_X86_PAE - pte_hi = new_val.pte_high; -#endif - return _hypercall5(int, update_va_mapping_otherdomain, va, - new_val.pte_low, pte_hi, flags, domid); + if (sizeof(new_val) == sizeof(long)) + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val.pte, flags, domid); + else + return _hypercall5(int, update_va_mapping_otherdomain, va, + new_val.pte, new_val.pte >> 32, + flags, domid); } static inline int @@ -301,6 +388,14 @@ HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args) return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); } +#ifdef CONFIG_X86_64 +static inline int +HYPERVISOR_set_segment_base(int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} +#endif + static inline int HYPERVISOR_suspend(unsigned long srec) { @@ -327,14 +422,14 @@ MULTI_update_va_mapping(struct multicall_entry *mcl, unsigned long va, { mcl->op = __HYPERVISOR_update_va_mapping; mcl->args[0] = va; -#ifdef CONFIG_X86_PAE - mcl->args[1] = new_val.pte_low; - mcl->args[2] = new_val.pte_high; -#else - mcl->args[1] = new_val.pte_low; - mcl->args[2] = 0; -#endif - mcl->args[3] = flags; + if (sizeof(new_val) == sizeof(long)) { + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + } else { + mcl->args[1] = new_val.pte; + mcl->args[2] = new_val.pte >> 32; + mcl->args[3] = flags; + } } static inline void @@ -354,15 +449,16 @@ MULTI_update_va_mapping_otherdomain(struct multicall_entry *mcl, unsigned long v { mcl->op = __HYPERVISOR_update_va_mapping_otherdomain; mcl->args[0] = va; -#ifdef CONFIG_X86_PAE - mcl->args[1] = new_val.pte_low; - mcl->args[2] = new_val.pte_high; -#else - mcl->args[1] = new_val.pte_low; - mcl->args[2] = 0; -#endif - mcl->args[3] = flags; - mcl->args[4] = domid; + if (sizeof(new_val) == sizeof(long)) { + mcl->args[1] = new_val.pte; + mcl->args[2] = flags; + mcl->args[3] = domid; + } else { + mcl->args[1] = new_val.pte; + mcl->args[2] = new_val.pte >> 32; + mcl->args[3] = flags; + mcl->args[4] = domid; + } } static inline void @@ -370,10 +466,15 @@ MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { mcl->op = __HYPERVISOR_update_descriptor; - mcl->args[0] = maddr; - mcl->args[1] = maddr >> 32; - mcl->args[2] = desc.a; - mcl->args[3] = desc.b; + if (sizeof(maddr) == sizeof(long)) { + mcl->args[0] = maddr; + mcl->args[1] = *(unsigned long *)&desc; + } else { + mcl->args[0] = maddr; + mcl->args[1] = maddr >> 32; + mcl->args[2] = desc.a; + mcl->args[3] = desc.b; + } } static inline void diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h index 8e15dd28c91..04ee0610014 100644 --- a/include/asm-x86/xen/hypervisor.h +++ b/include/asm-x86/xen/hypervisor.h @@ -35,7 +35,6 @@ #include <linux/types.h> #include <linux/kernel.h> -#include <linux/version.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h index 6227000a1e8..9d810f2538a 100644 --- a/include/asm-x86/xen/interface.h +++ b/include/asm-x86/xen/interface.h @@ -1,13 +1,13 @@ /****************************************************************************** * arch-x86_32.h * - * Guest OS interface to x86 32-bit Xen. + * Guest OS interface to x86 Xen. * * Copyright (c) 2004, K A Fraser */ -#ifndef __XEN_PUBLIC_ARCH_X86_32_H__ -#define __XEN_PUBLIC_ARCH_X86_32_H__ +#ifndef __ASM_X86_XEN_INTERFACE_H +#define __ASM_X86_XEN_INTERFACE_H #ifdef __XEN__ #define __DEFINE_GUEST_HANDLE(name, type) \ @@ -57,6 +57,17 @@ DEFINE_GUEST_HANDLE(long); DEFINE_GUEST_HANDLE(void); #endif +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + /* * SEGMENT DESCRIPTOR TABLES */ @@ -71,58 +82,21 @@ DEFINE_GUEST_HANDLE(void); #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) /* - * These flat segments are in the Xen-private section of every GDT. Since these - * are also present in the initial GDT, many OSes will be able to avoid - * installing their own GDT. - */ -#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ -#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ -#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ -#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ -#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ -#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ - -#define FLAT_KERNEL_CS FLAT_RING1_CS -#define FLAT_KERNEL_DS FLAT_RING1_DS -#define FLAT_KERNEL_SS FLAT_RING1_SS -#define FLAT_USER_CS FLAT_RING3_CS -#define FLAT_USER_DS FLAT_RING3_DS -#define FLAT_USER_SS FLAT_RING3_SS - -/* And the trap vector is... */ -#define TRAP_INSTR "int $0x82" - -/* - * Virtual addresses beyond this are not modifiable by guest OSes. The - * machine->physical mapping table starts at this address, read-only. - */ -#ifdef CONFIG_X86_PAE -#define __HYPERVISOR_VIRT_START 0xF5800000 -#else -#define __HYPERVISOR_VIRT_START 0xFC000000 -#endif - -#ifndef HYPERVISOR_VIRT_START -#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) -#endif - -#ifndef machine_to_phys_mapping -#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) -#endif - -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 - -#ifndef __ASSEMBLY__ - -/* * Send an array of these to HYPERVISOR_set_trap_table() + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter */ #define TI_GET_DPL(_ti) ((_ti)->flags & 3) #define TI_GET_IF(_ti) ((_ti)->flags & 4) #define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl)) #define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2)) +#ifndef __ASSEMBLY__ struct trap_info { uint8_t vector; /* exception vector */ uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ @@ -131,32 +105,21 @@ struct trap_info { }; DEFINE_GUEST_HANDLE_STRUCT(trap_info); -struct cpu_user_regs { - uint32_t ebx; - uint32_t ecx; - uint32_t edx; - uint32_t esi; - uint32_t edi; - uint32_t ebp; - uint32_t eax; - uint16_t error_code; /* private */ - uint16_t entry_vector; /* private */ - uint32_t eip; - uint16_t cs; - uint8_t saved_upcall_mask; - uint8_t _pad0; - uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ - uint32_t esp; - uint16_t ss, _pad1; - uint16_t es, _pad2; - uint16_t ds, _pad3; - uint16_t fs, _pad4; - uint16_t gs, _pad5; +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + unsigned long pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; }; -DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); +#endif /* !__ASSEMBLY__ */ -typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ +#ifdef CONFIG_X86_32 +#include "interface_32.h" +#else +#include "interface_64.h" +#endif +#ifndef __ASSEMBLY__ /* * The following is all CPU context. Note that the fpu_ctxt block is filled * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. @@ -173,33 +136,29 @@ struct vcpu_guest_context { unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ unsigned long event_callback_cs; /* CS:EIP of event callback */ unsigned long event_callback_eip; unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; + unsigned long syscall_callback_eip; +#endif unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif }; DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); - -struct arch_shared_info { - unsigned long max_pfn; /* max pfn that appears in table */ - /* Frame containing list of mfns containing list of mfns containing p2m. */ - unsigned long pfn_to_mfn_frame_list_list; - unsigned long nmi_reason; -}; - -struct arch_vcpu_info { - unsigned long cr2; - unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ -}; - -struct xen_callback { - unsigned long cs; - unsigned long eip; -}; -#endif /* !__ASSEMBLY__ */ +#endif /* !__ASSEMBLY__ */ /* * Prefix forces emulation of some non-trapping instructions. @@ -213,4 +172,4 @@ struct xen_callback { #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif -#endif +#endif /* __ASM_X86_XEN_INTERFACE_H */ diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h new file mode 100644 index 00000000000..d8ac41d5db8 --- /dev/null +++ b/include/asm-x86/xen/interface_32.h @@ -0,0 +1,97 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __ASM_X86_XEN_INTERFACE_32_H +#define __ASM_X86_XEN_INTERFACE_32_H + + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +/* And the trap vector is... */ +#define TRAP_INSTR "int $0x82" + +/* + * Virtual addresses beyond this are not modifiable by guest OSes. The + * machine->physical mapping table starts at this address, read-only. + */ +#define __HYPERVISOR_VIRT_START 0xF5800000 + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(struct vcpu_info) == 64 */ +}; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#define XEN_CALLBACK(__cs, __eip) \ + ((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) }) +#endif /* !__ASSEMBLY__ */ + + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + * + * Note that Xen is using the fact that the pagetable base is always + * page-aligned, and putting the 12 MSB of the address into the 12 LSB + * of cr3. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +#endif /* __ASM_X86_XEN_INTERFACE_32_H */ diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h new file mode 100644 index 00000000000..842266ce96e --- /dev/null +++ b/include/asm-x86/xen/interface_64.h @@ -0,0 +1,159 @@ +#ifndef __ASM_X86_XEN_INTERFACE_64_H +#define __ASM_X86_XEN_INTERFACE_64_H + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +DEFINE_GUEST_HANDLE_STRUCT(cpu_user_regs); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; + +typedef unsigned long xen_callback_t; + +#define XEN_CALLBACK(__cs, __rip) \ + ((unsigned long)(__rip)) + +#endif /* !__ASSEMBLY__ */ + + +#endif /* __ASM_X86_XEN_INTERFACE_64_H */ diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index 377c04591c1..7b3835d3b77 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -124,7 +124,7 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn) static inline unsigned long pte_mfn(pte_t pte) { - return (pte.pte & PTE_MASK) >> PAGE_SHIFT; + return (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; } static inline pte_t mfn_pte(unsigned long page_nr, pgprot_t pgprot) @@ -148,13 +148,17 @@ static inline pte_t __pte_ma(pteval_t x) } #define pmd_val_ma(v) ((v).pmd) +#ifdef __PAGETABLE_PUD_FOLDED #define pud_val_ma(v) ((v).pgd.pgd) +#else +#define pud_val_ma(v) ((v).pud) +#endif #define __pmd_ma(x) ((pmd_t) { (x) } ) #define pgd_val_ma(x) ((x).pgd) -xmaddr_t arbitrary_virt_to_machine(unsigned long address); +xmaddr_t arbitrary_virt_to_machine(void *address); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); |