aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64/mm')
-rw-r--r--arch/x86_64/mm/fault.c79
-rw-r--r--arch/x86_64/mm/init.c38
-rw-r--r--arch/x86_64/mm/k8topology.c2
-rw-r--r--arch/x86_64/mm/numa.c39
-rw-r--r--arch/x86_64/mm/srat.c8
5 files changed, 118 insertions, 48 deletions
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 2e7c3c8ffe0..316c53de47b 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address)
return -1;
if (pgd_none(*pgd))
set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
/* Below here mismatches are bugs because these lower tables
are shared */
@@ -312,21 +314,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
unsigned long flags;
siginfo_t info;
+ tsk = current;
+ mm = tsk->mm;
+ prefetchw(&mm->mmap_sem);
+
/* get the address */
__asm__("movq %%cr2,%0":"=r" (address));
- if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
- SIGSEGV) == NOTIFY_STOP)
- return;
-
- if (likely(regs->eflags & X86_EFLAGS_IF))
- local_irq_enable();
- if (unlikely(page_fault_trace))
- printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
- regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
-
- tsk = current;
- mm = tsk->mm;
info.si_code = SEGV_MAPERR;
@@ -351,10 +345,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
*/
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
((address >= VMALLOC_START && address < VMALLOC_END))) {
- if (vmalloc_fault(address) < 0)
- goto bad_area_nosemaphore;
- return;
+ if (vmalloc_fault(address) >= 0)
+ return;
}
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
/*
* Don't take the mm semaphore here. If we fixup a prefetch
* fault we could otherwise deadlock.
@@ -362,6 +358,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
goto bad_area_nosemaphore;
}
+ if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
+ SIGSEGV) == NOTIFY_STOP)
+ return;
+
+ if (likely(regs->eflags & X86_EFLAGS_IF))
+ local_irq_enable();
+
+ if (unlikely(page_fault_trace))
+ printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n",
+ regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code);
+
if (unlikely(error_code & PF_RSVD))
pgtable_bad(address, regs, error_code);
@@ -571,6 +578,48 @@ do_sigbus:
return;
}
+DEFINE_SPINLOCK(pgd_lock);
+struct page *pgd_list;
+
+void vmalloc_sync_all(void)
+{
+ /* Note that races in the updates of insync and start aren't
+ problematic:
+ insync can only get set bits added, and updates to start are only
+ improving performance (without affecting correctness if undone). */
+ static DECLARE_BITMAP(insync, PTRS_PER_PGD);
+ static unsigned long start = VMALLOC_START & PGDIR_MASK;
+ unsigned long address;
+
+ for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
+ if (!test_bit(pgd_index(address), insync)) {
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+ spin_lock(&pgd_lock);
+ for (page = pgd_list; page;
+ page = (struct page *)page->index) {
+ pgd_t *pgd;
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
+ }
+ spin_unlock(&pgd_lock);
+ set_bit(pgd_index(address), insync);
+ }
+ if (address == start)
+ start = address + PGDIR_SIZE;
+ }
+ /* Check that there is no need to do the same for the modules area. */
+ BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+ BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
+ (__START_KERNEL & PGDIR_MASK)));
+}
+
static int __init enable_pagefaulttrace(char *str)
{
page_fault_trace = 1;
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 40ed13d263c..e5f7f1c3446 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -72,7 +72,7 @@ void show_mem(void)
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
- for_each_pgdat(pgdat) {
+ for_each_online_pgdat(pgdat) {
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pfn_to_page(pgdat->node_start_pfn + i);
total++;
@@ -94,7 +94,7 @@ void show_mem(void)
int after_bootmem;
-static void *spp_getpage(void)
+static __init void *spp_getpage(void)
{
void *ptr;
if (after_bootmem)
@@ -108,7 +108,7 @@ static void *spp_getpage(void)
return ptr;
}
-static void set_pte_phys(unsigned long vaddr,
+static __init void set_pte_phys(unsigned long vaddr,
unsigned long phys, pgprot_t prot)
{
pgd_t *pgd;
@@ -157,7 +157,8 @@ static void set_pte_phys(unsigned long vaddr,
}
/* NOTE: this is meant to be run only at boot */
-void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __init
+__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
{
unsigned long address = __fix_to_virt(idx);
@@ -225,6 +226,33 @@ static __meminit void unmap_low_page(int i)
ti->allocated = 0;
}
+/* Must run before zap_low_mappings */
+__init void *early_ioremap(unsigned long addr, unsigned long size)
+{
+ unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
+
+ /* actually usually some more */
+ if (size >= LARGE_PAGE_SIZE) {
+ printk("SMBIOS area too long %lu\n", size);
+ return NULL;
+ }
+ set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+ map += LARGE_PAGE_SIZE;
+ set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
+ __flush_tlb();
+ return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
+}
+
+/* To avoid virtual aliases later */
+__init void early_iounmap(void *addr, unsigned long size)
+{
+ if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
+ printk("early_iounmap: bad address %p\n", addr);
+ set_pmd(temp_mappings[0].pmd, __pmd(0));
+ set_pmd(temp_mappings[1].pmd, __pmd(0));
+ __flush_tlb();
+}
+
static void __meminit
phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
{
@@ -344,7 +372,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end)
pud_t *pud;
if (after_bootmem)
- pud = pud_offset_k(pgd, __PAGE_OFFSET);
+ pud = pud_offset_k(pgd, start & PGDIR_MASK);
else
pud = alloc_low_page(&map, &pud_phys);
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index dd60e71fdba..7c45c2d2b8b 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -43,7 +43,7 @@ static __init int find_northbridge(void)
int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
- struct node nodes[8];
+ struct bootnode nodes[8];
int nodeid, i, nb;
unsigned char nodeids[8];
int found = 0;
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 22e51beee8d..4be82d6e2b4 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -25,8 +25,7 @@
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
bootmem_data_t plat_node_bdata[MAX_NUMNODES];
-int memnode_shift;
-u8 memnodemap[NODEMAPSIZE];
+struct memnode memnode;
unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
[0 ... NR_CPUS-1] = NUMA_NO_NODE
@@ -47,7 +46,7 @@ int numa_off __initdata;
* -1 if node overlap or lost ram (shift too big)
*/
static int __init
-populate_memnodemap(const struct node *nodes, int numnodes, int shift)
+populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift)
{
int i;
int res = -1;
@@ -74,7 +73,7 @@ populate_memnodemap(const struct node *nodes, int numnodes, int shift)
return res;
}
-int __init compute_hash_shift(struct node *nodes, int numnodes)
+int __init compute_hash_shift(struct bootnode *nodes, int numnodes)
{
int shift = 20;
@@ -149,7 +148,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en
/* Initialize final allocator for a zone */
void __init setup_node_zones(int nodeid)
{
- unsigned long start_pfn, end_pfn;
+ unsigned long start_pfn, end_pfn, memmapsize, limit;
unsigned long zones[MAX_NR_ZONES];
unsigned long holes[MAX_NR_ZONES];
@@ -159,6 +158,16 @@ void __init setup_node_zones(int nodeid)
Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n",
nodeid, start_pfn, end_pfn);
+ /* Try to allocate mem_map at end to not fill up precious <4GB
+ memory. */
+ memmapsize = sizeof(struct page) * (end_pfn-start_pfn);
+ limit = end_pfn << PAGE_SHIFT;
+ NODE_DATA(nodeid)->node_mem_map =
+ __alloc_bootmem_core(NODE_DATA(nodeid)->bdata,
+ memmapsize, SMP_CACHE_BYTES,
+ round_down(limit - memmapsize, PAGE_SIZE),
+ limit);
+
size_zones(zones, holes, start_pfn, end_pfn);
free_area_init_node(nodeid, NODE_DATA(nodeid), zones,
start_pfn, holes);
@@ -191,7 +200,7 @@ int numa_fake __initdata = 0;
static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn)
{
int i;
- struct node nodes[MAX_NUMNODES];
+ struct bootnode nodes[MAX_NUMNODES];
unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
/* Kludge needed for the hash function */
@@ -357,8 +366,7 @@ void __init init_cpu_to_node(void)
EXPORT_SYMBOL(cpu_to_node);
EXPORT_SYMBOL(node_to_cpumask);
-EXPORT_SYMBOL(memnode_shift);
-EXPORT_SYMBOL(memnodemap);
+EXPORT_SYMBOL(memnode);
EXPORT_SYMBOL(node_data);
#ifdef CONFIG_DISCONTIGMEM
@@ -369,21 +377,6 @@ EXPORT_SYMBOL(node_data);
* Should do that.
*/
-/* Requires pfn_valid(pfn) to be true */
-struct page *pfn_to_page(unsigned long pfn)
-{
- int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT);
- return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map;
-}
-EXPORT_SYMBOL(pfn_to_page);
-
-unsigned long page_to_pfn(struct page *page)
-{
- return (long)(((page) - page_zone(page)->zone_mem_map) +
- page_zone(page)->zone_start_pfn);
-}
-EXPORT_SYMBOL(page_to_pfn);
-
int pfn_valid(unsigned long pfn)
{
unsigned nid;
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 482c2576736..2eb879590dc 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -23,7 +23,7 @@ static struct acpi_table_slit *acpi_slit;
static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
-static struct node nodes[MAX_NUMNODES] __initdata;
+static struct bootnode nodes[MAX_NUMNODES] __initdata;
static u8 pxm2node[256] = { [0 ... 255] = 0xff };
/* Too small nodes confuse the VM badly. Usually they result
@@ -57,7 +57,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end)
{
int i;
for_each_node_mask(i, nodes_parsed) {
- struct node *nd = &nodes[i];
+ struct bootnode *nd = &nodes[i];
if (nd->start == nd->end)
continue;
if (nd->end > start && nd->start < end)
@@ -70,7 +70,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end)
static __init void cutoff_node(int i, unsigned long start, unsigned long end)
{
- struct node *nd = &nodes[i];
+ struct bootnode *nd = &nodes[i];
if (nd->start < start) {
nd->start = start;
if (nd->end < nd->start)
@@ -159,7 +159,7 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa)
void __init
acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma)
{
- struct node *nd;
+ struct bootnode *nd;
unsigned long start, end;
int node, pxm;
int i;