From d86bb0dac792c6a9c92944b6db2687980c808094 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 25 Jul 2008 16:48:57 +0200 Subject: x86: convert init_64.c from round_up to roundup Signed-off-by: Joerg Roedel Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ec37121f670..e4805771b5b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -258,7 +258,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) void __init cleanup_highmap(void) { unsigned long vaddr = __START_KERNEL_map; - unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; + unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; pmd_t *pmd = level2_kernel_pgt; pmd_t *last_pmd = pmd + PTRS_PER_PMD; @@ -474,14 +474,14 @@ static void __init find_early_table_space(unsigned long end) unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); if (direct_gbpages) { unsigned long extra; extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; } else pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); if (cpu_has_pse) { unsigned long extra; @@ -489,7 +489,7 @@ static void __init find_early_table_space(unsigned long end) ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); /* * RED-PEN putting page tables only on node 0 could -- cgit v1.2.3 From bd220a24a9263eaa70d5e6821383085950b5a13c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 5 Sep 2008 00:58:28 -0700 Subject: x86: move nonx_setup etc from common.c to init_64.c like 32 bit put it in init_32.c Signed-off-by: Yinghai Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d3746efb060..9c750d6307b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -88,6 +88,60 @@ early_param("gbpages", parse_direct_gbpages_on); int after_bootmem; +unsigned long __supported_pte_mask __read_mostly = ~0UL; +EXPORT_SYMBOL_GPL(__supported_pte_mask); + +static int do_not_nx __cpuinitdata; + +/* noexec=on|off +Control non executable mappings for 64bit processes. + +on Enable(default) +off Disable +*/ +static int __init nonx_setup(char *str) +{ + if (!str) + return -EINVAL; + if (!strncmp(str, "on", 2)) { + __supported_pte_mask |= _PAGE_NX; + do_not_nx = 0; + } else if (!strncmp(str, "off", 3)) { + do_not_nx = 1; + __supported_pte_mask &= ~_PAGE_NX; + } + return 0; +} +early_param("noexec", nonx_setup); + +void __cpuinit check_efer(void) +{ + unsigned long efer; + + rdmsrl(MSR_EFER, efer); + if (!(efer & EFER_NX) || do_not_nx) + __supported_pte_mask &= ~_PAGE_NX; +} + +int force_personality32; + +/* noexec32=on|off +Control non executable heap for 32bit processes. +To control the stack too use noexec=off + +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC +*/ +static int __init nonx32_setup(char *str) +{ + if (!strcmp(str, "on")) + force_personality32 &= ~READ_IMPLIES_EXEC; + else if (!strcmp(str, "off")) + force_personality32 |= READ_IMPLIES_EXEC; + return 1; +} +__setup("noexec32=", nonx32_setup); + /* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. -- cgit v1.2.3 From deed05b7c017e49473e8c386efba196410fd18b3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 5 Sep 2008 10:23:26 +0200 Subject: x86, init_64.c: cleanup Clean up comments. Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9c750d6307b..1f6806b62eb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -93,12 +93,13 @@ EXPORT_SYMBOL_GPL(__supported_pte_mask); static int do_not_nx __cpuinitdata; -/* noexec=on|off -Control non executable mappings for 64bit processes. - -on Enable(default) -off Disable -*/ +/* + * noexec=on|off + * Control non-executable mappings for 64-bit processes. + * + * on Enable (default) + * off Disable + */ static int __init nonx_setup(char *str) { if (!str) @@ -125,13 +126,14 @@ void __cpuinit check_efer(void) int force_personality32; -/* noexec32=on|off -Control non executable heap for 32bit processes. -To control the stack too use noexec=off - -on PROT_READ does not imply PROT_EXEC for 32bit processes (default) -off PROT_READ implies PROT_EXEC -*/ +/* + * noexec32=on|off + * Control non executable heap for 32bit processes. + * To control the stack too use noexec=off + * + * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default) + * off PROT_READ implies PROT_EXEC + */ static int __init nonx32_setup(char *str) { if (!strcmp(str, "on")) -- cgit v1.2.3 From bb577f980ef35e2b0d00aeed566724e5032aa5eb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sun, 7 Sep 2008 01:51:33 -0700 Subject: x86: add periodic corruption check Perodically check for corruption in low phusical memory. Don't bother checking at fault time, since it won't show anything useful. Signed-off-by: Hugh Dickins Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d3746efb060..f4db5276fa2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -769,6 +769,8 @@ void __init mem_init(void) { long codesize, reservedpages, datasize, initsize; + start_periodic_check_for_corruption(); + pci_iommu_alloc(); /* clear_bss() already clear the empty_zero_page */ -- cgit v1.2.3 From a2699e477b8e6b17d4da64916f766dd5a2576c9c Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:38 -0700 Subject: x86, cpa: make the kernel physical mapping initialization a two pass sequence In the first pass, kernel physical mapping will be setup using large or small pages but uses the same PTE attributes as that of the early PTE attributes setup by early boot code in head_[32|64].S After flushing TLB's, we go through the second pass, which setups the direct mapped PTE's with the appropriate attributes (like NX, GLOBAL etc) which are runtime detectable. This two pass mechanism conforms to the TLB app note which says: "Software should not write to a paging-structure entry in a way that would change, for any linear address, both the page size and either the page frame or attributes." Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 99 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 84 insertions(+), 15 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d3746efb060..1ba945eb628 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -270,6 +270,8 @@ static __ref void unmap_low_page(void *adr) early_iounmap(adr, PAGE_SIZE); } +static int physical_mapping_iter; + static unsigned long __meminit phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) { @@ -290,16 +292,19 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) } if (pte_val(*pte)) - continue; + goto repeat_set_pte; if (0) printk(" pte=%p addr=%lx pte=%016lx\n", pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); + pages++; +repeat_set_pte: set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; - pages++; } - update_page_count(PG_LEVEL_4K, pages); + + if (physical_mapping_iter == 1) + update_page_count(PG_LEVEL_4K, pages); return last_map_addr; } @@ -318,7 +323,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, { unsigned long pages = 0; unsigned long last_map_addr = end; - unsigned long start = address; int i = pmd_index(address); @@ -341,15 +345,14 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, last_map_addr = phys_pte_update(pmd, address, end); spin_unlock(&init_mm.page_table_lock); + continue; } - /* Count entries we're using from level2_ident_pgt */ - if (start == 0) - pages++; - continue; + goto repeat_set_pte; } if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); @@ -366,7 +369,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); spin_unlock(&init_mm.page_table_lock); } - update_page_count(PG_LEVEL_2M, pages); + if (physical_mapping_iter == 1) + update_page_count(PG_LEVEL_2M, pages); return last_map_addr; } @@ -405,14 +409,18 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, } if (pud_val(*pud)) { - if (!pud_large(*pud)) + if (!pud_large(*pud)) { last_map_addr = phys_pmd_update(pud, addr, end, page_size_mask); - continue; + continue; + } + + goto repeat_set_pte; } if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); @@ -430,7 +438,9 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, spin_unlock(&init_mm.page_table_lock); } __flush_tlb_all(); - update_page_count(PG_LEVEL_1G, pages); + + if (physical_mapping_iter == 1) + update_page_count(PG_LEVEL_1G, pages); return last_map_addr; } @@ -494,15 +504,54 @@ static void __init init_gbpages(void) direct_gbpages = 0; } +static int is_kernel(unsigned long pfn) +{ + unsigned long pg_addresss = pfn << PAGE_SHIFT; + + if (pg_addresss >= (unsigned long) __pa(_text) && + pg_addresss <= (unsigned long) __pa(_end)) + return 1; + + return 0; +} + static unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) { - unsigned long next, last_map_addr = end; + unsigned long next, last_map_addr; + u64 cached_supported_pte_mask = __supported_pte_mask; + unsigned long cache_start = start; + unsigned long cache_end = end; + + /* + * First iteration will setup identity mapping using large/small pages + * based on page_size_mask, with other attributes same as set by + * the early code in head_64.S + * + * Second iteration will setup the appropriate attributes + * as desired for the kernel identity mapping. + * + * This two pass mechanism conforms to the TLB app note which says: + * + * "Software should not write to a paging-structure entry in a way + * that would change, for any linear address, both the page size + * and either the page frame or attributes." + * + * For now, only difference between very early PTE attributes used in + * head_64.S and here is _PAGE_NX. + */ + BUILD_BUG_ON((__PAGE_KERNEL_LARGE & ~__PAGE_KERNEL_IDENT_LARGE_EXEC) + != _PAGE_NX); + __supported_pte_mask &= ~(_PAGE_NX); + physical_mapping_iter = 1; - start = (unsigned long)__va(start); - end = (unsigned long)__va(end); +repeat: + last_map_addr = cache_end; + + start = (unsigned long)__va(cache_start); + end = (unsigned long)__va(cache_end); for (; start < end; start = next) { pgd_t *pgd = pgd_offset_k(start); @@ -514,11 +563,21 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, next = end; if (pgd_val(*pgd)) { + /* + * Static identity mappings will be overwritten + * with run-time mappings. For example, this allows + * the static 0-1GB identity mapping to be mapped + * non-executable with this. + */ + if (is_kernel(pte_pfn(*((pte_t *) pgd)))) + goto realloc; + last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end), page_size_mask); continue; } +realloc: pud = alloc_low_page(&pud_phys); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); @@ -528,6 +587,16 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, pgd_populate(&init_mm, pgd, __va(pud_phys)); spin_unlock(&init_mm.page_table_lock); } + __flush_tlb_all(); + + if (physical_mapping_iter == 1) { + physical_mapping_iter = 2; + /* + * Second iteration will set the actual desired PTE attributes. + */ + __supported_pte_mask = cached_supported_pte_mask; + goto repeat; + } return last_map_addr; } -- cgit v1.2.3 From 0b8fdcbcd287a1fbe66817491e6149841ae25705 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 23 Sep 2008 14:00:39 -0700 Subject: x86, cpa: dont use large pages for kernel identity mapping with DEBUG_PAGEALLOC Don't use large pages for kernel identity mapping with DEBUG_PAGEALLOC. This will remove the need to split the large page for the allocated kernel page in the interrupt context. This will simplify cpa code(as we don't do the split any more from the interrupt context). cpa code simplication in the subsequent patches. Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ba945eb628..9d7587ac1eb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -456,13 +456,14 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, return phys_pud_init(pud, addr, end, page_size_mask); } -static void __init find_early_table_space(unsigned long end) +static void __init find_early_table_space(unsigned long end, int use_pse, + int use_gbpages) { unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); - if (direct_gbpages) { + if (use_gbpages) { unsigned long extra; extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; @@ -470,7 +471,7 @@ static void __init find_early_table_space(unsigned long end) pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); - if (cpu_has_pse) { + if (use_pse) { unsigned long extra; extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -640,6 +641,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, struct map_range mr[NR_RANGE_MR]; int nr_range, i; + int use_pse, use_gbpages; printk(KERN_INFO "init_memory_mapping\n"); @@ -653,9 +655,21 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, if (!after_bootmem) init_gbpages(); - if (direct_gbpages) +#ifdef CONFIG_DEBUG_PAGEALLOC + /* + * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. + * This will simplify cpa(), which otherwise needs to support splitting + * large pages into small in interrupt context, etc. + */ + use_pse = use_gbpages = 0; +#else + use_pse = cpu_has_pse; + use_gbpages = direct_gbpages; +#endif + + if (use_gbpages) page_size_mask |= 1 << PG_LEVEL_1G; - if (cpu_has_pse) + if (use_pse) page_size_mask |= 1 << PG_LEVEL_2M; memset(mr, 0, sizeof(mr)); @@ -716,7 +730,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, (mr[i].page_size_mask & (1< Date: Tue, 23 Sep 2008 14:00:41 -0700 Subject: x86, cpa: remove cpa pool code Interrupt context no longer splits large page in cpa(). So we can do away with cpa memory pool code. Signed-off-by: Suresh Siddha Cc: Suresh Siddha Cc: arjan@linux.intel.com Cc: venkatesh.pallipadi@intel.com Cc: jeremy@goop.org Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9d7587ac1eb..f54a4d97530 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -889,8 +889,6 @@ void __init mem_init(void) reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10); - - cpa_init(); } void free_init_pages(char *what, unsigned long begin, unsigned long end) -- cgit v1.2.3 From 28dd033f43ca957cd751e02652b36c6fa364ca18 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 29 Sep 2008 12:13:26 -0700 Subject: x86: fix pagetable init 64-bit breakage Fix _end alignment check - can trigger a crash if _end happens to be on a page boundary. Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index f54a4d97530..6116ff0d741 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -510,7 +510,7 @@ static int is_kernel(unsigned long pfn) unsigned long pg_addresss = pfn << PAGE_SHIFT; if (pg_addresss >= (unsigned long) __pa(_text) && - pg_addresss <= (unsigned long) __pa(_end)) + pg_addresss < (unsigned long) __pa(_end)) return 1; return 0; -- cgit v1.2.3 From b27a43c1e90582facad44de67d02bc9e9f900289 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 7 Oct 2008 13:58:46 -0700 Subject: x86, cpa: make the kernel physical mapping initialization a two pass sequence, fix Jeremy Fitzhardinge wrote: > I'd noticed that current tip/master hasn't been booting under Xen, and I > just got around to bisecting it down to this change. > > commit 065ae73c5462d42e9761afb76f2b52965ff45bd6 > Author: Suresh Siddha > > x86, cpa: make the kernel physical mapping initialization a two pass sequence > > This patch is causing Xen to fail various pagetable updates because it > ends up remapping pagetables to RW, which Xen explicitly prohibits (as > that would allow guests to make arbitrary changes to pagetables, rather > than have them mediated by the hypervisor). Instead of making init a two pass sequence, to satisfy the Intel's TLB Application note (developer.intel.com/design/processor/applnots/317080.pdf Section 6 page 26), we preserve the original page permissions when fragmenting the large mappings and don't touch the existing memory mapping (which satisfies Xen's requirements). Only open issue is: on a native linux kernel, we will go back to mapping the first 0-1GB kernel identity mapping as executable (because of the static mapping setup in head_64.S). We can fix this in a different patch if needed. Signed-off-by: Suresh Siddha Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 149 +++++++++++++++++++++----------------------------- 1 file changed, 61 insertions(+), 88 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6116ff0d741..8c7eae490a2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -270,10 +270,9 @@ static __ref void unmap_low_page(void *adr) early_iounmap(adr, PAGE_SIZE); } -static int physical_mapping_iter; - static unsigned long __meminit -phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) +phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, + pgprot_t prot) { unsigned pages = 0; unsigned long last_map_addr = end; @@ -291,35 +290,40 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) break; } + /* + * We will re-use the existing mapping. + * Xen for example has some special requirements, like mapping + * pagetable pages as RO. So assume someone who pre-setup + * these mappings are more intelligent. + */ if (pte_val(*pte)) - goto repeat_set_pte; + continue; if (0) printk(" pte=%p addr=%lx pte=%016lx\n", pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); pages++; -repeat_set_pte: - set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); + set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, prot)); last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; } - if (physical_mapping_iter == 1) - update_page_count(PG_LEVEL_4K, pages); + update_page_count(PG_LEVEL_4K, pages); return last_map_addr; } static unsigned long __meminit -phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) +phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end, + pgprot_t prot) { pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); - return phys_pte_init(pte, address, end); + return phys_pte_init(pte, address, end, prot); } static unsigned long __meminit phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { unsigned long pages = 0; unsigned long last_map_addr = end; @@ -330,6 +334,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); pte_t *pte; + pgprot_t new_prot = prot; if (address >= end) { if (!after_bootmem) { @@ -343,45 +348,58 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pte_update(pmd, address, - end); + end, prot); spin_unlock(&init_mm.page_table_lock); continue; } - goto repeat_set_pte; + /* + * If we are ok with PG_LEVEL_2M mapping, then we will + * use the existing mapping, + * + * Otherwise, we will split the large page mapping but + * use the same existing protection bits except for + * large page, so that we don't violate Intel's TLB + * Application note (317080) which says, while changing + * the page sizes, new and old translations should + * not differ with respect to page frame and + * attributes. + */ + if (page_size_mask & (1 << PG_LEVEL_2M)) + continue; + new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); } if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + pfn_pte(address >> PAGE_SHIFT, + __pgprot(pgprot_val(prot) | _PAGE_PSE))); spin_unlock(&init_mm.page_table_lock); last_map_addr = (address & PMD_MASK) + PMD_SIZE; continue; } pte = alloc_low_page(&pte_phys); - last_map_addr = phys_pte_init(pte, address, end); + last_map_addr = phys_pte_init(pte, address, end, new_prot); unmap_low_page(pte); spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); spin_unlock(&init_mm.page_table_lock); } - if (physical_mapping_iter == 1) - update_page_count(PG_LEVEL_2M, pages); + update_page_count(PG_LEVEL_2M, pages); return last_map_addr; } static unsigned long __meminit phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, - unsigned long page_size_mask) + unsigned long page_size_mask, pgprot_t prot) { pmd_t *pmd = pmd_offset(pud, 0); unsigned long last_map_addr; - last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); + last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot); __flush_tlb_all(); return last_map_addr; } @@ -398,6 +416,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; + pgprot_t prot = PAGE_KERNEL; if (addr >= end) break; @@ -411,16 +430,28 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (pud_val(*pud)) { if (!pud_large(*pud)) { last_map_addr = phys_pmd_update(pud, addr, end, - page_size_mask); + page_size_mask, prot); continue; } - - goto repeat_set_pte; + /* + * If we are ok with PG_LEVEL_1G mapping, then we will + * use the existing mapping. + * + * Otherwise, we will split the gbpage mapping but use + * the same existing protection bits except for large + * page, so that we don't violate Intel's TLB + * Application note (317080) which says, while changing + * the page sizes, new and old translations should + * not differ with respect to page frame and + * attributes. + */ + if (page_size_mask & (1 << PG_LEVEL_1G)) + continue; + prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); } if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); @@ -430,7 +461,8 @@ repeat_set_pte: } pmd = alloc_low_page(&pmd_phys); - last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); + last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, + prot); unmap_low_page(pmd); spin_lock(&init_mm.page_table_lock); @@ -439,8 +471,7 @@ repeat_set_pte: } __flush_tlb_all(); - if (physical_mapping_iter == 1) - update_page_count(PG_LEVEL_1G, pages); + update_page_count(PG_LEVEL_1G, pages); return last_map_addr; } @@ -505,54 +536,15 @@ static void __init init_gbpages(void) direct_gbpages = 0; } -static int is_kernel(unsigned long pfn) -{ - unsigned long pg_addresss = pfn << PAGE_SHIFT; - - if (pg_addresss >= (unsigned long) __pa(_text) && - pg_addresss < (unsigned long) __pa(_end)) - return 1; - - return 0; -} - static unsigned long __init kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask) { - unsigned long next, last_map_addr; - u64 cached_supported_pte_mask = __supported_pte_mask; - unsigned long cache_start = start; - unsigned long cache_end = end; - - /* - * First iteration will setup identity mapping using large/small pages - * based on page_size_mask, with other attributes same as set by - * the early code in head_64.S - * - * Second iteration will setup the appropriate attributes - * as desired for the kernel identity mapping. - * - * This two pass mechanism conforms to the TLB app note which says: - * - * "Software should not write to a paging-structure entry in a way - * that would change, for any linear address, both the page size - * and either the page frame or attributes." - * - * For now, only difference between very early PTE attributes used in - * head_64.S and here is _PAGE_NX. - */ - BUILD_BUG_ON((__PAGE_KERNEL_LARGE & ~__PAGE_KERNEL_IDENT_LARGE_EXEC) - != _PAGE_NX); - __supported_pte_mask &= ~(_PAGE_NX); - physical_mapping_iter = 1; + unsigned long next, last_map_addr = end; -repeat: - last_map_addr = cache_end; - - start = (unsigned long)__va(cache_start); - end = (unsigned long)__va(cache_end); + start = (unsigned long)__va(start); + end = (unsigned long)__va(end); for (; start < end; start = next) { pgd_t *pgd = pgd_offset_k(start); @@ -564,21 +556,11 @@ repeat: next = end; if (pgd_val(*pgd)) { - /* - * Static identity mappings will be overwritten - * with run-time mappings. For example, this allows - * the static 0-1GB identity mapping to be mapped - * non-executable with this. - */ - if (is_kernel(pte_pfn(*((pte_t *) pgd)))) - goto realloc; - last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end), page_size_mask); continue; } -realloc: pud = alloc_low_page(&pud_phys); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); @@ -590,15 +572,6 @@ realloc: } __flush_tlb_all(); - if (physical_mapping_iter == 1) { - physical_mapping_iter = 2; - /* - * Second iteration will set the actual desired PTE attributes. - */ - __supported_pte_mask = cached_supported_pte_mask; - goto repeat; - } - return last_map_addr; } -- cgit v1.2.3 From 46eaa6702016e3ac9a188172a2c309d6ca1be1cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 12 Oct 2008 15:06:29 +0200 Subject: x86: memory corruption check - cleanup Move the prototypes from the generic kernel.h header to the more appropriate include/asm-x86/bios_ebda.h header file. Also, remove the check from the power management code - this is a pure x86 matter for now. Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d84d3e91d34..3e10054c573 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -31,6 +31,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From be43d72835ba610e4af274f2d123b26f66f4f7ed Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:13 -0700 Subject: x86: add _PAGE_IOMAP pte flag for IO mappings Use one of the software-defined PTE bits to indicate that a mapping is intended for an IO address. On native hardware this is irrelevent, since a physical address is a physical address. But in a virtual environment, physical addresses are also virtualized, so there needs to be some way to distinguish between pseudo-physical addresses and actual hardware addresses; _PAGE_IOMAP indicates this intent. By default, __supported_pte_mask masks out _PAGE_IOMAP, so it doesn't even appear in the final pagetable. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3e10054c573..dec5c775e92 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -89,7 +89,7 @@ early_param("gbpages", parse_direct_gbpages_on); int after_bootmem; -unsigned long __supported_pte_mask __read_mostly = ~0UL; +pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); static int do_not_nx __cpuinitdata; -- cgit v1.2.3 From 1494177942b23b7094ca291d37e6f6263fa60fdd Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:15 -0700 Subject: x86: add early_memremap() early_ioremap() is also used to map normal memory when constructing the linear memory mapping. However, since we sometimes need to be able to distinguish between actual IO mappings and normal memory mappings, add a early_memremap() call, which maps with PAGE_KERNEL (as opposed to PAGE_KERNEL_IO for early_ioremap()), and use it when constructing pagetables. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dec5c775e92..5beb8968345 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -313,7 +313,7 @@ static __ref void *alloc_low_page(unsigned long *phys) if (pfn >= table_top) panic("alloc_low_page: ran out of memory"); - adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); + adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); memset(adr, 0, PAGE_SIZE); *phys = pfn * PAGE_SIZE; return adr; -- cgit v1.2.3 From a32ad4626776f09b30ef98a872a5f6fb64fe6607 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sun, 7 Sep 2008 15:21:17 -0700 Subject: x86-64: don't check for map replacement The check prevents flags on mappings from being changed, which is not desireable. There's no need to check for replacing a mapping, and x86-32 does not do this check. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5beb8968345..7c8bb46e83e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -196,9 +196,6 @@ set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte) } pte = pte_offset_kernel(pmd, vaddr); - if (!pte_none(*pte) && pte_val(new_pte) && - pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask)) - pte_ERROR(*pte); set_pte(pte, new_pte); /* -- cgit v1.2.3 From 5e72d9e4850c91b6a0f06fa803f7393b55a38aa8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 12 Sep 2008 15:43:04 +0100 Subject: x86-64: fix combining of regions in init_memory_mapping() When nr_range gets decremented, the same slot must be considered for coalescing with its new successor again. The issue is apparently pretty benign to native code, but surfaces as a boot time crash in our forward ported Xen tree (where the page table setup overall works differently than in native). Signed-off-by: Jan Beulich Acked-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm/init_64.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 7c8bb46e83e..b8e461d4941 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -746,7 +746,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, old_start = mr[i].start; memmove(&mr[i], &mr[i+1], (nr_range - 1 - i) * sizeof (struct map_range)); - mr[i].start = old_start; + mr[i--].start = old_start; nr_range--; } -- cgit v1.2.3