From a2f1b424900715ed9d1699c3bb88a434a2b42bc0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Add 4GB DMA32 zone Add a new 4GB GFP_DMA32 zone between the GFP_DMA and GFP_NORMAL zones. As a bit of historical background: when the x86-64 port was originally designed we had some discussion if we should use a 16MB DMA zone like i386 or a 4GB DMA zone like IA64 or both. Both was ruled out at this point because it was in early 2.4 when VM is still quite shakey and had bad troubles even dealing with one DMA zone. We settled on the 16MB DMA zone mainly because we worried about older soundcards and the floppy. But this has always caused problems since then because device drivers had trouble getting enough DMA able memory. These days the VM works much better and the wide use of NUMA has proven it can deal with many zones successfully. So this patch adds both zones. This helps drivers who need a lot of memory below 4GB because their hardware is not accessing more (graphic drivers - proprietary and free ones, video frame buffer drivers, sound drivers etc.). Previously they could only use IOMMU+16MB GFP_DMA, which was not enough memory. Another common problem is that hardware who has full memory addressing for >4GB misses it for some control structures in memory (like transmit rings or other metadata). They tended to allocate memory in the 16MB GFP_DMA or the IOMMU/swiotlb then using pci_alloc_consistent, but that can tie up a lot of precious 16MB GFPDMA/IOMMU/swiotlb memory (even on AMD systems the IOMMU tends to be quite small) especially if you have many devices. With the new zone pci_alloc_consistent can just put this stuff into memory below 4GB which works better. One argument was still if the zone should be 4GB or 2GB. The main motivation for 2GB would be an unnamed not so unpopular hardware raid controller (mostly found in older machines from a particular four letter company) who has a strange 2GB restriction in firmware. But that one works ok with swiotlb/IOMMU anyways, so it doesn't really need GFP_DMA32. I chose 4GB to be compatible with IA64 and because it seems to be the most common restriction. The new zone is so far added only for x86-64. For other architectures who don't set up this new zone nothing changes. Architectures can set a compatibility define in Kconfig CONFIG_DMA_IS_DMA32 that will define GFP_DMA32 as GFP_DMA. Otherwise it's a nop because on 32bit architectures it's normally not needed because GFP_NORMAL (=0) is DMA able enough. One problem is still that GFP_DMA means different things on different architectures. e.g. some drivers used to have #ifdef ia64 use GFP_DMA (trusting it to be 4GB) #elif __x86_64__ (use other hacks like the swiotlb because 16MB is not enough) ... . This was quite ugly and is now obsolete. These should be now converted to use GFP_DMA32 unconditionally. I haven't done this yet. Or best only use pci_alloc_consistent/dma_alloc_coherent which will use GFP_DMA32 transparently. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/dma.h | 11 +++++++++-- include/asm-x86_64/proto.h | 2 ++ include/linux/gfp.h | 11 +++++++++++ include/linux/mmzone.h | 16 +++++++++------- 4 files changed, 31 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/dma.h b/include/asm-x86_64/dma.h index 16fa3a064d0..6f2a817b6a7 100644 --- a/include/asm-x86_64/dma.h +++ b/include/asm-x86_64/dma.h @@ -72,8 +72,15 @@ #define MAX_DMA_CHANNELS 8 -/* The maximum address that we can perform a DMA transfer to on this platform */ -#define MAX_DMA_ADDRESS (PAGE_OFFSET+0x1000000) + +/* 16MB ISA DMA zone */ +#define MAX_DMA_PFN ((16*1024*1024) >> PAGE_SHIFT) + +/* 4GB broken PCI/AGP hardware bus master zone */ +#define MAX_DMA32_PFN ((4UL*1024*1024*1024) >> PAGE_SHIFT) + +/* Compat define for old dma zone */ +#define MAX_DMA_ADDRESS ((unsigned long)__va(MAX_DMA_PFN << PAGE_SHIFT)) /* 8237 DMA controllers */ #define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index dbb37b0adb4..c251152a065 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -22,6 +22,8 @@ extern void mtrr_bp_init(void); #define mtrr_bp_init() do {} while (0) #endif extern void init_memory_mapping(unsigned long start, unsigned long end); +extern void size_zones(unsigned long *z, unsigned long *h, + unsigned long start_pfn, unsigned long end_pfn); extern void system_call(void); extern int kernel_syscall(void); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c3779432a72..4351e6bb5a7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -14,6 +14,13 @@ struct vm_area_struct; /* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) +#ifdef CONFIG_DMA_IS_DMA32 +#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ +#elif BITS_PER_LONG < 64 +#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ +#else +#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ +#endif /* * Action modifiers - doesn't change the zoning @@ -64,6 +71,10 @@ struct vm_area_struct; #define GFP_DMA __GFP_DMA +/* 4GB DMA on some platforms */ +#define GFP_DMA32 __GFP_DMA32 + + #define gfp_zone(mask) ((__force int)((mask) & (__force gfp_t)GFP_ZONEMASK)) /* diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f5fa3082fd6..da7a829f856 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -71,10 +71,11 @@ struct per_cpu_pageset { #endif #define ZONE_DMA 0 -#define ZONE_NORMAL 1 -#define ZONE_HIGHMEM 2 +#define ZONE_DMA32 1 +#define ZONE_NORMAL 2 +#define ZONE_HIGHMEM 3 -#define MAX_NR_ZONES 3 /* Sync this with ZONES_SHIFT */ +#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ #define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ @@ -108,9 +109,10 @@ struct per_cpu_pageset { /* * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a PC we have 3 zones: + * into multiple physical zones. On a PC we have 4 zones: * * ZONE_DMA < 16 MB ISA DMA capable memory + * ZONE_DMA32 0 MB Empty * ZONE_NORMAL 16-896 MB direct mapped by the kernel * ZONE_HIGHMEM > 896 MB only page cache and user processes */ @@ -455,10 +457,10 @@ extern struct pglist_data contig_page_data; #if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) /* - * with 32 bit page->flags field, we reserve 8 bits for node/zone info. - * there are 3 zones (2 bits) and this leaves 8-2=6 bits for nodes. + * with 32 bit page->flags field, we reserve 9 bits for node/zone info. + * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes. */ -#define FLAGS_RESERVED 8 +#define FLAGS_RESERVED 9 #elif BITS_PER_LONG == 64 /* -- cgit v1.2.3 From 979edfadbae2286eec5b46143c00e81bca96498e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Adjust, correct, and complete the HPET definitions for x86-64. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/hpet.h | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/hpet.h b/include/asm-x86_64/hpet.h index a3877f57099..c20c28f5c7a 100644 --- a/include/asm-x86_64/hpet.h +++ b/include/asm-x86_64/hpet.h @@ -14,18 +14,18 @@ #define HPET_CFG 0x010 #define HPET_STATUS 0x020 #define HPET_COUNTER 0x0f0 -#define HPET_T0_CFG 0x100 -#define HPET_T0_CMP 0x108 -#define HPET_T0_ROUTE 0x110 -#define HPET_T1_CFG 0x120 -#define HPET_T1_CMP 0x128 -#define HPET_T1_ROUTE 0x130 -#define HPET_T2_CFG 0x140 -#define HPET_T2_CMP 0x148 -#define HPET_T2_ROUTE 0x150 +#define HPET_Tn_OFFSET 0x20 +#define HPET_Tn_CFG(n) (0x100 + (n) * HPET_Tn_OFFSET) +#define HPET_Tn_ROUTE(n) (0x104 + (n) * HPET_Tn_OFFSET) +#define HPET_Tn_CMP(n) (0x108 + (n) * HPET_Tn_OFFSET) +#define HPET_T0_CFG HPET_Tn_CFG(0) +#define HPET_T0_CMP HPET_Tn_CMP(0) +#define HPET_T1_CFG HPET_Tn_CFG(1) +#define HPET_T1_CMP HPET_Tn_CMP(1) #define HPET_ID_VENDOR 0xffff0000 #define HPET_ID_LEGSUP 0x00008000 +#define HPET_ID_64BIT 0x00002000 #define HPET_ID_NUMBER 0x00001f00 #define HPET_ID_REV 0x000000ff #define HPET_ID_NUMBER_SHIFT 8 @@ -38,11 +38,18 @@ #define HPET_LEGACY_8254 2 #define HPET_LEGACY_RTC 8 -#define HPET_TN_ENABLE 0x004 -#define HPET_TN_PERIODIC 0x008 -#define HPET_TN_PERIODIC_CAP 0x010 -#define HPET_TN_SETVAL 0x040 -#define HPET_TN_32BIT 0x100 +#define HPET_TN_LEVEL 0x0002 +#define HPET_TN_ENABLE 0x0004 +#define HPET_TN_PERIODIC 0x0008 +#define HPET_TN_PERIODIC_CAP 0x0010 +#define HPET_TN_64BIT_CAP 0x0020 +#define HPET_TN_SETVAL 0x0040 +#define HPET_TN_32BIT 0x0100 +#define HPET_TN_ROUTE 0x3e00 +#define HPET_TN_FSB 0x4000 +#define HPET_TN_FSB_CAP 0x8000 + +#define HPET_TN_ROUTE_SHIFT 9 extern int is_hpet_enabled(void); extern int hpet_rtc_timer_init(void); -- cgit v1.2.3 From 89b831ef8bf5cfbb357dbc0a2e07700d7f20eec5 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Support for AMD specific MCE Threshold. MC4_MISC - DRAM Errors Threshold Register realized under AMD K8 Rev F. This register is used to count correctable and uncorrectable ECC errors that occur during DRAM read operations. The user may interface through sysfs files in order to change the threshold configuration. bank%d/error_count - reads current error count, write to clear. bank%d/interrupt_enable - set/clear interrupt enable. bank%d/threshold_limit - read/write the threshold limit. APIC vector 0xF9 in hw_irq.h. 5 software defined bank ids in mce.h. new apic.c function to setup threshold apic lvt. defaults to interrupt off, count enabled, and threshold limit max. sysfs interface created on /sys/devices/system/threshold. AK: added some ifdefs to make it compile on UP Signed-off-by: Jacob Shin Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/apic.h | 2 ++ include/asm-x86_64/hw_irq.h | 2 +- include/asm-x86_64/mce.h | 10 ++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/apic.h b/include/asm-x86_64/apic.h index 6c5d5ca8383..5647b7de174 100644 --- a/include/asm-x86_64/apic.h +++ b/include/asm-x86_64/apic.h @@ -111,6 +111,8 @@ extern unsigned int nmi_watchdog; extern int disable_timer_pin_1; +extern void setup_threshold_lvt(unsigned long lvt_off); + #endif /* CONFIG_X86_LOCAL_APIC */ extern unsigned boot_cpu_id; diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index dc97668ea0f..c14a8c7267a 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h @@ -55,7 +55,7 @@ struct hw_interrupt_type; #define CALL_FUNCTION_VECTOR 0xfc #define KDB_VECTOR 0xfb /* reserved for KDB */ #define THERMAL_APIC_VECTOR 0xfa -/* 0xf9 free */ +#define THRESHOLD_APIC_VECTOR 0xf9 #define INVALIDATE_TLB_VECTOR_END 0xf8 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f8 used for TLB flush */ diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 869249db679..5d298b799a9 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h @@ -67,6 +67,8 @@ struct mce_log { /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 +#define MCE_THRESHOLD_BASE MCE_EXTENDED_BANK + 1 /* MCE_AMD */ +#define MCE_THRESHOLD_DRAM_ECC MCE_THRESHOLD_BASE + 4 void mce_log(struct mce *m); #ifdef CONFIG_X86_MCE_INTEL @@ -77,4 +79,12 @@ static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) } #endif +#ifdef CONFIG_X86_MCE_AMD +void mce_amd_feature_init(struct cpuinfo_x86 *c); +#else +static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) +{ +} +#endif + #endif -- cgit v1.2.3 From 6004e1b7effcbb385a6b7c790e4b8008682cf679 Mon Sep 17 00:00:00 2001 From: James Cleverdon Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] i386/x86-64: Share interrupt vectors when there is a large number of interrupt sources Here's a patch that builds on Natalie Protasevich's IRQ compression patch and tries to work for MPS boots as well as ACPI. It is meant for a 4-node IBM x460 NUMA box, which was dying because it had interrupt pins with GSI numbers > NR_IRQS and thus overflowed irq_desc. The problem is that this system has 270 GSIs (which are 1:1 mapped with I/O APIC RTEs) and an 8-node box would have 540. This is much bigger than NR_IRQS (224 for both i386 and x86_64). Also, there aren't enough vectors to go around. There are about 190 usable vectors, not counting the reserved ones and the unused vectors at 0x20 to 0x2F. So, my patch attempts to compress the GSI range and share vectors by sharing IRQs. Cc: "Protasevich, Natalie" Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/desc.h | 3 +++ include/asm-x86_64/mpspec.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/desc.h b/include/asm-x86_64/desc.h index 68ac3c62fe3..1a3d380f9d5 100644 --- a/include/asm-x86_64/desc.h +++ b/include/asm-x86_64/desc.h @@ -98,16 +98,19 @@ static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsig static inline void set_intr_gate(int nr, void *func) { + BUG_ON((unsigned)nr > 0xFF); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0); } static inline void set_intr_gate_ist(int nr, void *func, unsigned ist) { + BUG_ON((unsigned)nr > 0xFF); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist); } static inline void set_system_gate(int nr, void *func) { + BUG_ON((unsigned)nr > 0xFF); _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0); } diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h index f267e10c023..6b375384f5c 100644 --- a/include/asm-x86_64/mpspec.h +++ b/include/asm-x86_64/mpspec.h @@ -157,7 +157,8 @@ struct mpc_config_lintsrc */ #define MAX_MP_BUSSES 256 -#define MAX_IRQ_SOURCES 256 +/* Each PCI slot may be a combo card with its own bus. 4 IRQ pins per slot. */ +#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4) enum mp_bustype { MP_BUS_ISA = 1, MP_BUS_EISA, -- cgit v1.2.3 From 1dff7f3db5f045ccbfeca5bb00b0958a78501557 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Fix up outdated pfn_to_page comment pfn_to_page really requires pfn_valid to be true now, no question. Some people stumbled over it, but it was misleading and wrong. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/mmzone.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index b40c661f111..8d48dc7e43a 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -41,9 +41,7 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) #define pfn_to_nid(pfn) phys_to_nid((unsigned long)(pfn) << PAGE_SHIFT) #define kvaddr_to_nid(kaddr) phys_to_nid(__pa(kaddr)) -/* AK: this currently doesn't deal with invalid addresses. We'll see - if the 2.5 kernel doesn't pass them - (2.4 used to). */ +/* Requires pfn_valid(pfn) to be true */ #define pfn_to_page(pfn) ({ \ int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); \ ((pfn) - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; \ -- cgit v1.2.3 From 07808b74e7dab1aa385e698795875337d72daf7d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Remove obsolete ARCH_HAS_ATOMIC_UNSIGNED and page_flags_t Has been introduced for x86-64 at some point to save memory in struct page, but has been obsolete for some time. Just remove it. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/linux/mm.h | 10 ++-------- include/linux/mmzone.h | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5c1fb0a2e80..23fad4dae23 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -206,12 +206,6 @@ struct vm_operations_struct { struct mmu_gather; struct inode; -#ifdef ARCH_HAS_ATOMIC_UNSIGNED -typedef unsigned page_flags_t; -#else -typedef unsigned long page_flags_t; -#endif - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -219,7 +213,7 @@ typedef unsigned long page_flags_t; * a page. */ struct page { - page_flags_t flags; /* Atomic flags, some possibly + unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ atomic_t _count; /* Usage count, see below. */ atomic_t _mapcount; /* Count of ptes mapped in mms, @@ -435,7 +429,7 @@ static inline void put_page(struct page *page) #endif /* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */ -#define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH) +#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH) #define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH) #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index da7a829f856..57fc99c67c3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -455,7 +455,7 @@ extern struct pglist_data contig_page_data; #include #endif -#if BITS_PER_LONG == 32 || defined(ARCH_HAS_ATOMIC_UNSIGNED) +#if BITS_PER_LONG == 32 /* * with 32 bit page->flags field, we reserve 9 bits for node/zone info. * there are 4 zones (3 bits) and this leaves 9-3=6 bits for nodes. -- cgit v1.2.3 From 69d81fcde7797342417591ba7affb372b9c86eae Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Speed up numa_node_id by putting it directly into the PDA Not go from the CPU number to an mapping array. Mode number is often used now in fast paths. This also adds a generic numa_node_id to all the topology includes Suggested by Eric Dumazet Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/numa.h | 2 ++ include/asm-x86_64/pda.h | 1 + include/asm-x86_64/topology.h | 2 ++ include/linux/mmzone.h | 2 ++ 4 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/numa.h b/include/asm-x86_64/numa.h index bcf55c3f7f7..d51e56fdc3d 100644 --- a/include/asm-x86_64/numa.h +++ b/include/asm-x86_64/numa.h @@ -17,6 +17,8 @@ extern void numa_add_cpu(int cpu); extern void numa_init_array(void); extern int numa_off; +extern void numa_set_node(int cpu, int node); + extern unsigned char apicid_to_node[256]; #define NUMA_NO_NODE 0xff diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h index bbf89aa8a1a..8733ccfa442 100644 --- a/include/asm-x86_64/pda.h +++ b/include/asm-x86_64/pda.h @@ -15,6 +15,7 @@ struct x8664_pda { int irqcount; /* Irq nesting counter. Starts with -1 */ int cpunumber; /* Logical CPU number */ char *irqstackptr; /* top of irqstack */ + int nodenumber; /* number of current node */ unsigned int __softirq_pending; unsigned int __nmi_count; /* number of NMI on this CPUs */ struct mm_struct *active_mm; diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 1c603cd7e4d..d39ebd5263e 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h @@ -28,6 +28,8 @@ extern int __node_distance(int, int); #define pcibus_to_node(bus) ((long)(bus->sysdata)) #define pcibus_to_cpumask(bus) node_to_cpumask(pcibus_to_node(bus)); +#define numa_node_id() read_pda(nodenumber) + /* sched_domains SD_NODE_INIT for x86_64 machines */ #define SD_NODE_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 57fc99c67c3..f3cffc354de 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -435,7 +435,9 @@ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *, #include /* Returns the number of the current Node. */ +#ifndef numa_node_id #define numa_node_id() (cpu_to_node(raw_smp_processor_id())) +#endif #ifndef CONFIG_NEED_MULTIPLE_NODES -- cgit v1.2.3 From f6c2e3330d3fdd5474bc3756da46fca889a30e33 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Unmap NULL during early bootup We should zap the low mappings, as soon as possible, so that we can catch kernel bugs more effectively. Previously early boot had NULL mapped and didn't trap on NULL references. This patch introduces boot_level4_pgt, which will always have low identity addresses mapped. Druing boot, all the processors will use this as their level4 pgt. On BP, we will switch to init_level4_pgt as soon as we enter C code and zap the low mappings as soon as we are done with the usage of identity low mapped addresses. On AP's we will zap the low mappings as soon as we jump to C code. Signed-off-by: Suresh Siddha Signed-off-by: Ashok Raj Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/pgtable.h | 1 + include/asm-x86_64/proto.h | 2 ++ include/asm-x86_64/smp.h | 1 - 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 7a07196a720..a204efb553d 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -16,6 +16,7 @@ extern pud_t level3_physmem_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pgd_t init_level4_pgt[]; +extern pgd_t boot_level4_pgt[]; extern unsigned long __supported_pte_mask; #define swapper_pg_dir init_level4_pgt diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h index c251152a065..34501086afe 100644 --- a/include/asm-x86_64/proto.h +++ b/include/asm-x86_64/proto.h @@ -11,6 +11,8 @@ struct pt_regs; extern void start_kernel(void); extern void pda_init(int); +extern void zap_low_mappings(int cpu); + extern void early_idt_handler(void); extern void mcheck_init(struct cpuinfo_x86 *c); diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index c57ce407134..592161e979e 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -47,7 +47,6 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); -extern void zap_low_mappings(void); void smp_stop_cpu(void); extern int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, int retry, int wait); -- cgit v1.2.3 From 6b75aeedde1e8a8513393d3c1367bf81bc5b0c67 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Don't apply __PHYSICAL_MASK to page frame numbers It is for physical addresses, not for PFNs. Pointed out by Tejun Heo. Cc: htejun@gmail.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/page.h | 2 +- include/asm-x86_64/pgtable.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index e5ab4d231f2..06e489f3247 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -11,7 +11,7 @@ #define PAGE_SIZE (1UL << PAGE_SHIFT) #endif #define PAGE_MASK (~(PAGE_SIZE-1)) -#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT)) +#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK) #define THREAD_ORDER 1 #ifdef __ASSEMBLY__ diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index a204efb553d..f8e87a57f3a 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h @@ -246,7 +246,7 @@ static inline unsigned long pud_bad(pud_t pud) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) /* FIXME: is this right? */ #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pte_pfn(x) ((pte_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) +#define pte_pfn(x) ((pte_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot) { @@ -353,7 +353,7 @@ static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address) #define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0) #define pmd_bad(x) ((pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE ) #define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot))) -#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK) +#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT) #define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) #define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE }) -- cgit v1.2.3 From 2bc0414ee04fd8bb798760801f5d7476dff44241 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Only use asm/sections.h to declare section symbols Adding __initdata_* to asm-generic/sections.h Replaces a lot of open coded externs in arch/x86_64/* I had to change __bss_end to __bss_stop to match the other architectures. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-generic/sections.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 886dbd11689..0b49f9e070f 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -13,5 +13,6 @@ extern char _eextratext[] __attribute__((weak)); extern char _end[]; extern char __per_cpu_start[], __per_cpu_end[]; extern char __kprobes_text_start[], __kprobes_text_end[]; +extern char __initdata_begin[], __initdata_end[]; #endif /* _ASM_GENERIC_SECTIONS_H_ */ -- cgit v1.2.3 From 485832a5d928facd82f1525270d9f048da2063a1 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Use int operations in spinlocks to support more than 128 CPUs spinning. Pointed out by Eric Dumazet Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/spinlock.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/spinlock.h b/include/asm-x86_64/spinlock.h index 69636831ad2..fe484a699cc 100644 --- a/include/asm-x86_64/spinlock.h +++ b/include/asm-x86_64/spinlock.h @@ -18,22 +18,22 @@ */ #define __raw_spin_is_locked(x) \ - (*(volatile signed char *)(&(x)->slock) <= 0) + (*(volatile signed int *)(&(x)->slock) <= 0) #define __raw_spin_lock_string \ "\n1:\t" \ - "lock ; decb %0\n\t" \ + "lock ; decl %0\n\t" \ "js 2f\n" \ LOCK_SECTION_START("") \ "2:\t" \ "rep;nop\n\t" \ - "cmpb $0,%0\n\t" \ + "cmpl $0,%0\n\t" \ "jle 2b\n\t" \ "jmp 1b\n" \ LOCK_SECTION_END #define __raw_spin_unlock_string \ - "movb $1,%0" \ + "movl $1,%0" \ :"=m" (lock->slock) : : "memory" static inline void __raw_spin_lock(raw_spinlock_t *lock) @@ -47,10 +47,10 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) static inline int __raw_spin_trylock(raw_spinlock_t *lock) { - char oldval; + int oldval; __asm__ __volatile__( - "xchgb %b0,%1" + "xchgl %0,%1" :"=q" (oldval), "=m" (lock->slock) :"0" (0) : "memory"); -- cgit v1.2.3 From 420f8f68c9c5148dddf946bebdbc7eacde2172cb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: New heuristics to find out hotpluggable CPUs. With a NR_CPUS==128 kernel with CPU hotplug enabled we would waste 4MB on per CPU data of all possible CPUs. The reason was that HOTPLUG always set up possible map to NR_CPUS cpus and then we need to allocate that much (each per CPU data is roughly ~32k now) The underlying problem is that ACPI didn't tell us how many hotplug CPUs the platform supports. So the old code just assumed all, which would lead to this memory wastage. This implements some new heuristics: - If the BIOS specified disabled CPUs in the ACPI/mptables assume they can be enabled later (this is bending the ACPI specification a bit, but seems like a obvious extension) - The user can overwrite it with a new additionals_cpus=NUM option - Otherwise use half of the available CPUs or 2, whatever is more. Cc: ashok.raj@intel.com Cc: len.brown@intel.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/smp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h index 592161e979e..cf8f969f902 100644 --- a/include/asm-x86_64/smp.h +++ b/include/asm-x86_64/smp.h @@ -81,6 +81,8 @@ extern int safe_smp_processor_id(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void prefill_possible_map(void); +extern unsigned num_processors; +extern unsigned disabled_cpus; #endif /* !ASSEMBLY */ -- cgit v1.2.3 From ea0be473a1f0ee89024a24d8ea4b05fbf6efcee3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Allow modular build of ia32 aout loader Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/ia32.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-x86_64/ia32.h b/include/asm-x86_64/ia32.h index 6efa00fe4e7..c7bc9c0525b 100644 --- a/include/asm-x86_64/ia32.h +++ b/include/asm-x86_64/ia32.h @@ -165,6 +165,11 @@ struct siginfo_t; int do_get_thread_area(struct thread_struct *t, struct user_desc __user *info); int do_set_thread_area(struct thread_struct *t, struct user_desc __user *info); int ia32_child_tls(struct task_struct *p, struct pt_regs *childregs); + +struct linux_binprm; +extern int ia32_setup_arg_pages(struct linux_binprm *bprm, + unsigned long stack_top, int exec_stack); + #endif #endif /* !CONFIG_IA32_SUPPORT */ -- cgit v1.2.3 From a88cde13bae3fffd6ecc812bdd02c91eafb6073e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Formatting fixes for arch/x86_64/kernel/process.c No functional changes. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/msr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/msr.h b/include/asm-x86_64/msr.h index 5a7fe3c6c3d..24dc39651bc 100644 --- a/include/asm-x86_64/msr.h +++ b/include/asm-x86_64/msr.h @@ -19,7 +19,7 @@ : "=a" (a__), "=d" (b__) \ : "c" (msr)); \ val = a__ | (b__<<32); \ -} while(0); +} while(0) #define wrmsr(msr,val1,val2) \ __asm__ __volatile__("wrmsr" \ -- cgit v1.2.3 From e90f22edf432512219cc2952f5811961abbd164f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Fix NUMA node lookup debug code which had bitrotted Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/mmzone.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index 8d48dc7e43a..69baaa8a3ce 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h @@ -17,16 +17,15 @@ /* Simple perfect hash to map physical addresses to node numbers */ extern int memnode_shift; extern u8 memnodemap[NODEMAPSIZE]; -extern int maxnode; extern struct pglist_data *node_data[]; static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) { - int nid; + unsigned nid; VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); nid = memnodemap[addr >> memnode_shift]; - VIRTUAL_BUG_ON(nid > maxnode); + VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); return nid; } -- cgit v1.2.3 From 94605eff572b727aaad9b4b29bc358b919096503 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86-64/i386: Intel HT, Multi core detection fixes Fields obtained through cpuid vector 0x1(ebx[16:23]) and vector 0x4(eax[14:25], eax[26:31]) indicate the maximum values and might not always be the same as what is available and what OS sees. So make sure "siblings" and "cpu cores" values in /proc/cpuinfo reflect the values as seen by OS instead of what cpuid instruction says. This will also fix the buggy BIOS cases (for example where cpuid on a single core cpu says there are "2" siblings, even when HT is disabled in the BIOS. http://bugzilla.kernel.org/show_bug.cgi?id=4359) Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-i386/processor.h | 4 +++- include/asm-x86_64/processor.h | 4 +++- include/linux/bitops.h | 10 ++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 0a4ec764377..9cd4a05234a 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -65,7 +65,9 @@ struct cpuinfo_x86 { int f00f_bug; int coma_bug; unsigned long loops_per_jiffy; - unsigned char x86_num_cores; + unsigned char x86_max_cores; /* cpuid returned max cores value */ + unsigned char booted_cores; /* number of cores as seen by OS */ + unsigned char apicid; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h index 03837d34fba..4861246548f 100644 --- a/include/asm-x86_64/processor.h +++ b/include/asm-x86_64/processor.h @@ -61,10 +61,12 @@ struct cpuinfo_x86 { int x86_cache_alignment; int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ __u8 x86_virt_bits, x86_phys_bits; - __u8 x86_num_cores; + __u8 x86_max_cores; /* cpuid returned max cores value */ __u32 x86_power; __u32 extended_cpuid_level; /* Max extended CPUID function supported */ unsigned long loops_per_jiffy; + __u8 apicid; + __u8 booted_cores; /* number of cores as seen by OS */ } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cb3c3ef50f5..38c2fb7ebe0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -84,6 +84,16 @@ static __inline__ int get_bitmask_order(unsigned int count) return order; /* We could be slightly more clever with -1 here... */ } +static __inline__ int get_count_order(unsigned int count) +{ + int order; + + order = fls(count) - 1; + if (count & (count - 1)) + order++; + return order; +} + /* * hweightN: returns the hamming weight (i.e. the number * of bits set) of a N-bit word -- cgit v1.2.3 From a6f5deb2be4c82f24fefadcbf7e448f540c05ae6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Reduce number of retries for reset through keyboard controller Old code could retry for 10 seconds worst time. Only try it for one second now. Suggested by Yinghai Lu Cc: Yinghai.Lu@amd.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-i386/mach-default/mach_reboot.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-i386/mach-default/mach_reboot.h b/include/asm-i386/mach-default/mach_reboot.h index 06ae4d81ba6..a955e57ad01 100644 --- a/include/asm-i386/mach-default/mach_reboot.h +++ b/include/asm-i386/mach-default/mach_reboot.h @@ -19,7 +19,7 @@ static inline void kb_wait(void) static inline void mach_reboot(void) { int i; - for (i = 0; i < 100; i++) { + for (i = 0; i < 10; i++) { kb_wait(); udelay(50); outb(0x60, 0x64); /* write Controller Command Byte */ -- cgit v1.2.3 From 8e0d4f4e9132ae6e353f9cf27261627bcc7c65cc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Remove asm-x86_64/rwsem.h Not needed since x86-64 always uses the spinlock based rwsems. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/rwsem.h | 283 --------------------------------------------- 1 file changed, 283 deletions(-) delete mode 100644 include/asm-x86_64/rwsem.h (limited to 'include') diff --git a/include/asm-x86_64/rwsem.h b/include/asm-x86_64/rwsem.h deleted file mode 100644 index 46077e9c191..00000000000 --- a/include/asm-x86_64/rwsem.h +++ /dev/null @@ -1,283 +0,0 @@ -/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for x86_64+ - * - * Written by David Howells (dhowells@redhat.com). - * Ported by Andi Kleen to x86-64. - * - * Derived from asm-i386/semaphore.h and asm-i386/rwsem.h - * - * - * The MSW of the count is the negated number of active writers and waiting - * lockers, and the LSW is the total number of active locks - * - * The lock count is initialized to 0 (no active and no waiting lockers). - * - * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an - * uncontended lock. This can be determined because XADD returns the old value. - * Readers increment by 1 and see a positive value when uncontended, negative - * if there are writers (and maybe) readers waiting (in which case it goes to - * sleep). - * - * The value of WAITING_BIAS supports up to 32766 waiting processes. This can - * be extended to 65534 by manually checking the whole MSW rather than relying - * on the S flag. - * - * The value of ACTIVE_BIAS supports up to 65535 active processes. - * - * This should be totally fair - if anything is waiting, a process that wants a - * lock will go to the back of the queue. When the currently active lock is - * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consecutive readers at the - * front, then they'll all be woken up, but no other readers will be. - */ - -#ifndef _X8664_RWSEM_H -#define _X8664_RWSEM_H - -#ifndef _LINUX_RWSEM_H -#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" -#endif - -#ifdef __KERNEL__ - -#include -#include - -struct rwsem_waiter; - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -/* - * the semaphore definition - */ -struct rw_semaphore { - signed int count; -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif -}; - -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - -#define __RWSEM_INITIALIZER(name) \ -{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif -} - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "# beginning down_read\n\t" -LOCK_PREFIX " incl (%%rdi)\n\t" /* adds 0x00000001, returns the old value */ - " js 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") \ - "2:\n\t" - " call rwsem_down_read_failed_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END \ - "# ending down_read\n\t" - : "+m"(sem->count) - : "D"(sem) - : "memory", "cc"); -} - - -/* - * trylock for reading -- returns 1 if successful, 0 if contention - */ -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - __s32 result, tmp; - __asm__ __volatile__( - "# beginning __down_read_trylock\n\t" - " movl %0,%1\n\t" - "1:\n\t" - " movl %1,%2\n\t" - " addl %3,%2\n\t" - " jle 2f\n\t" -LOCK_PREFIX " cmpxchgl %2,%0\n\t" - " jnz 1b\n\t" - "2:\n\t" - "# ending __down_read_trylock\n\t" - : "+m"(sem->count), "=&a"(result), "=&r"(tmp) - : "i"(RWSEM_ACTIVE_READ_BIAS) - : "memory", "cc"); - return result>=0 ? 1 : 0; -} - - -/* - * lock for writing - */ -static inline void __down_write(struct rw_semaphore *sem) -{ - int tmp; - - tmp = RWSEM_ACTIVE_WRITE_BIAS; - __asm__ __volatile__( - "# beginning down_write\n\t" -LOCK_PREFIX " xaddl %0,(%%rdi)\n\t" /* subtract 0x0000ffff, returns the old value */ - " testl %0,%0\n\t" /* was the count 0 before? */ - " jnz 2f\n\t" /* jump if we weren't granted the lock */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " call rwsem_down_write_failed_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END - "# ending down_write" - : "=&r" (tmp) - : "0"(tmp), "D"(sem) - : "memory", "cc"); -} - -/* - * trylock for writing -- returns 1 if successful, 0 if contention - */ -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - signed long ret = cmpxchg(&sem->count, - RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); - if (ret == RWSEM_UNLOCKED_VALUE) - return 1; - return 0; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; - __asm__ __volatile__( - "# beginning __up_read\n\t" -LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* subtracts 1, returns the old value */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " decw %w[tmp]\n\t" /* do nothing if still outstanding active readers */ - " jnz 1b\n\t" - " call rwsem_wake_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END - "# ending __up_read\n" - : "+m"(sem->count), [tmp] "+r" (tmp) - : "D"(sem) - : "memory", "cc"); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - unsigned tmp; - __asm__ __volatile__( - "# beginning __up_write\n\t" - " movl %[bias],%[tmp]\n\t" -LOCK_PREFIX " xaddl %[tmp],(%%rdi)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ - " jnz 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " decw %w[tmp]\n\t" /* did the active count reduce to 0? */ - " jnz 1b\n\t" /* jump back if not */ - " call rwsem_wake_thunk\n\t" - " jmp 1b\n" - LOCK_SECTION_END - "# ending __up_write\n" - : "+m"(sem->count), [tmp] "=r" (tmp) - : "D"(sem), [bias] "i"(-RWSEM_ACTIVE_WRITE_BIAS) - : "memory", "cc"); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - __asm__ __volatile__( - "# beginning __downgrade_write\n\t" -LOCK_PREFIX " addl %[bias],(%%rdi)\n\t" /* transitions 0xZZZZ0001 -> 0xYYYY0001 */ - " js 2f\n\t" /* jump if the lock is being waited upon */ - "1:\n\t" - LOCK_SECTION_START("") - "2:\n\t" - " call rwsem_downgrade_thunk\n" - " jmp 1b\n" - LOCK_SECTION_END - "# ending __downgrade_write\n" - : "=m"(sem->count) - : "D"(sem), [bias] "i"(-RWSEM_WAITING_BIAS), "m"(sem->count) - : "memory", "cc"); -} - -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) -{ - __asm__ __volatile__( -LOCK_PREFIX "addl %1,%0" - :"=m"(sem->count) - :"ir"(delta), "m"(sem->count)); -} - -/* - * implement exchange and add functionality - */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) -{ - int tmp = delta; - - __asm__ __volatile__( -LOCK_PREFIX "xaddl %0,(%2)" - : "=r"(tmp), "=m"(sem->count) - : "r"(sem), "m"(sem->count), "0" (tmp) - : "memory"); - - return tmp+delta; -} - -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - -#endif /* __KERNEL__ */ -#endif /* _X8664_RWSEM_H */ -- cgit v1.2.3 From bf0f2e23834e2bf7d64b467ef07095b1c7e2c04b Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Set ____cacheline_maxaligned_in_smp alignment to 128 bytes The current value was correct before the introduction of Intel EM64T support - but now L1_CACHE_SHIFT_MAX can be less than L1_CACHE_SHIFT, which _is_ funny! Between the few users of ____cacheline_maxaligned_in_smp, we also have (for example) rcu_ctrlblk, and struct zone, with zone->{lru_,}lock. I.e. we have a lot of excess cacheline bouncing on them. No correctness issues, obviously. So this could even be merged for 2.6.14 (I'm not a fan of this idea, though). CC: Andi Kleen Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/cache.h b/include/asm-x86_64/cache.h index eda62bae124..33e53424128 100644 --- a/include/asm-x86_64/cache.h +++ b/include/asm-x86_64/cache.h @@ -9,6 +9,6 @@ /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) -#define L1_CACHE_SHIFT_MAX 6 /* largest L1 which this arch supports */ +#define L1_CACHE_SHIFT_MAX 7 /* largest L1 which this arch supports */ #endif -- cgit v1.2.3 From efbbdce94f6ea54cf06d9a06e4c95f6874ad64a8 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Use common sys_time64 Keeping this function does not makes sense because it's a copied (and buggy) copy of sys_time. The only difference is that now.tv_sec (which is a time_t, i.e. a 64-bit long) is copied (and truncated) into a int (32-bit). The prototype is the same (they both take a long __user *), so let's drop this and redirect it to sys_time (and make sure it exists by defining __ARCH_WANT_SYS_TIME). Only disadvantage is that the sys_stime definition is also compiled (may be fixed if needed by adding a separate __ARCH_WANT_SYS_STIME macro, and defining it for all arch's defining __ARCH_WANT_SYS_TIME except x86_64). Acked-by: Andi Kleen Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/unistd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h index 3c494b65d33..2c42150bce0 100644 --- a/include/asm-x86_64/unistd.h +++ b/include/asm-x86_64/unistd.h @@ -462,7 +462,7 @@ __SYSCALL(__NR_fremovexattr, sys_fremovexattr) #define __NR_tkill 200 __SYSCALL(__NR_tkill, sys_tkill) #define __NR_time 201 -__SYSCALL(__NR_time, sys_time64) +__SYSCALL(__NR_time, sys_time) #define __NR_futex 202 __SYSCALL(__NR_futex, sys_futex) #define __NR_sched_setaffinity 203 @@ -608,6 +608,7 @@ do { \ #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_COMPAT_SYS_TIME #endif -- cgit v1.2.3 From 8893166ff8694f36655009aa9bf8e7f2e1c9339f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Increase the maximum number of local APICs to the maximum This is needed for large multinode IBM systems which have a sparse APIC space in clustered mode, fully covering the available 8 bits. The previous kernels would limit the local APIC number to 127, which caused it to reject some of the CPUs at boot. I increased the maximum and shrunk the apic_version array a bit to make up for that (the version is only 8 bit, so don't need an full int to store) Cc: Chris McDermott Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- include/asm-x86_64/mpspec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h index 6b375384f5c..6f8a17d105a 100644 --- a/include/asm-x86_64/mpspec.h +++ b/include/asm-x86_64/mpspec.h @@ -16,7 +16,7 @@ /* * A maximum of 255 APICs with the current APIC ID architecture. */ -#define MAX_APICS 128 +#define MAX_APICS 255 struct intel_mp_floating { @@ -173,7 +173,7 @@ extern int smp_found_config; extern void find_smp_config (void); extern void get_smp_config (void); extern int nr_ioapics; -extern int apic_version [MAX_APICS]; +extern unsigned char apic_version [MAX_APICS]; extern int mp_irq_entries; extern struct mpc_config_intsrc mp_irqs [MAX_IRQ_SOURCES]; extern int mpc_default_type; -- cgit v1.2.3