aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig9
-rw-r--r--arch/x86_64/defconfig21
-rw-r--r--arch/x86_64/ia32/Makefile3
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c17
-rw-r--r--arch/x86_64/ia32/mmap32.c78
-rw-r--r--arch/x86_64/kernel/apic.c5
-rw-r--r--arch/x86_64/kernel/asm-offsets.c5
-rw-r--r--arch/x86_64/kernel/entry.S12
-rw-r--r--arch/x86_64/kernel/head.S108
-rw-r--r--arch/x86_64/kernel/setup64.c2
-rw-r--r--arch/x86_64/mm/Makefile2
-rw-r--r--arch/x86_64/mm/init.c180
-rw-r--r--arch/x86_64/mm/mmap.c30
13 files changed, 315 insertions, 157 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 2efc4be2270..2f9deca31cc 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -305,7 +305,11 @@ config ARCH_DISCONTIGMEM_DEFAULT
config ARCH_SPARSEMEM_ENABLE
def_bool y
- depends on NUMA
+ depends on (NUMA || EXPERIMENTAL)
+
+config ARCH_MEMORY_PROBE
+ def_bool y
+ depends on MEMORY_HOTPLUG
config ARCH_FLATMEM_ENABLE
def_bool y
@@ -315,6 +319,7 @@ source "mm/Kconfig"
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool y
+ depends on NUMA
config NR_CPUS
int "Maximum number of CPUs (2-256)"
@@ -350,7 +355,7 @@ config HPET_TIMER
<http://www.intel.com/hardwaredesign/hpetspec.htm>.
config X86_PM_TIMER
- bool "PM timer"
+ bool "PM timer" if EMBEDDED
depends on ACPI
default y
help
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 054dcd8a5e9..5231fe83ea4 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.15-git7
-# Wed Jan 11 11:57:36 2006
+# Linux kernel version: 2.6.15-git12
+# Mon Jan 16 13:09:08 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
@@ -319,6 +319,11 @@ CONFIG_IPV6=y
# CONFIG_ATALK is not set
# CONFIG_X25 is not set
# CONFIG_LAPB is not set
+
+#
+# TIPC Configuration (EXPERIMENTAL)
+#
+# CONFIG_TIPC is not set
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
@@ -537,8 +542,7 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y
# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
-CONFIG_SCSI_QLA2XXX=y
-# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set
+# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_LPFC is not set
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
@@ -805,6 +809,7 @@ CONFIG_SOFT_WATCHDOG=y
# CONFIG_W83877F_WDT is not set
# CONFIG_W83977F_WDT is not set
# CONFIG_MACHZ_WDT is not set
+# CONFIG_SBC_EPX_C3_WATCHDOG is not set
#
# PCI-based Watchdog Cards
@@ -850,6 +855,12 @@ CONFIG_HPET_MMAP=y
# CONFIG_I2C is not set
#
+# SPI support
+#
+# CONFIG_SPI is not set
+# CONFIG_SPI_MASTER is not set
+
+#
# Dallas's 1-wire bus
#
# CONFIG_W1 is not set
@@ -992,6 +1003,7 @@ CONFIG_USB_STORAGE=y
#
CONFIG_USB_HID=y
CONFIG_USB_HIDINPUT=y
+# CONFIG_USB_HIDINPUT_POWERBOOK is not set
# CONFIG_HID_FF is not set
# CONFIG_USB_HIDDEV is not set
# CONFIG_USB_AIPTEK is not set
@@ -1276,6 +1288,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
CONFIG_DEBUG_FS=y
# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
+# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_INIT_DEBUG=y
# CONFIG_DEBUG_RODATA is not set
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile
index 051608d5592..929e6b0771f 100644
--- a/arch/x86_64/ia32/Makefile
+++ b/arch/x86_64/ia32/Makefile
@@ -3,7 +3,8 @@
#
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \
- ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o
+ ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \
+ mmap32.o
sysv-$(CONFIG_SYSVIPC) := ipc32.o
obj-$(CONFIG_IA32_EMULATION) += $(sysv-y)
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 029bddab045..572b3b28772 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -293,8 +293,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int
} while(0)
-#define elf_map elf32_map
-
#include <linux/module.h>
MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries.");
@@ -390,21 +388,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top,
}
EXPORT_SYMBOL(ia32_setup_arg_pages);
-static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
-{
- unsigned long map_addr;
- struct task_struct *me = current;
-
- down_write(&me->mm->mmap_sem);
- map_addr = do_mmap(filep, ELF_PAGESTART(addr),
- eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot,
- type,
- eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr));
- up_write(&me->mm->mmap_sem);
- return(map_addr);
-}
-
#ifdef CONFIG_SYSCTL
/* Register vsyscall32 into the ABI table */
#include <linux/sysctl.h>
diff --git a/arch/x86_64/ia32/mmap32.c b/arch/x86_64/ia32/mmap32.c
new file mode 100644
index 00000000000..079f4132575
--- /dev/null
+++ b/arch/x86_64/ia32/mmap32.c
@@ -0,0 +1,78 @@
+/*
+ * linux/arch/x86_64/ia32/mm/mmap.c
+ *
+ * flexible mmap layout support
+ *
+ * Based on the i386 version which was
+ *
+ * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * Started by Ingo Molnar <mingo@elte.hu>
+ */
+
+#include <linux/personality.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+
+/*
+ * Top of mmap area (just below the process stack).
+ *
+ * Leave an at least ~128 MB hole.
+ */
+#define MIN_GAP (128*1024*1024)
+#define MAX_GAP (TASK_SIZE/6*5)
+
+static inline unsigned long mmap_base(struct mm_struct *mm)
+{
+ unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur;
+ unsigned long random_factor = 0;
+
+ if (current->flags & PF_RANDOMIZE)
+ random_factor = get_random_int() % (1024*1024);
+
+ if (gap < MIN_GAP)
+ gap = MIN_GAP;
+ else if (gap > MAX_GAP)
+ gap = MAX_GAP;
+
+ return PAGE_ALIGN(TASK_SIZE - gap - random_factor);
+}
+
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void ia32_pick_mmap_layout(struct mm_struct *mm)
+{
+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (sysctl_legacy_va_layout ||
+ (current->personality & ADDR_COMPAT_LAYOUT) ||
+ current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base(mm);
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+}
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 8fdd089fd17..5d3c5b07b8d 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -499,13 +499,10 @@ static int lapic_resume(struct sys_device *dev)
if (!apic_pm_state.active)
return 0;
- /* XXX: Pavel needs this for S3 resume, but can't explain why */
- set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
-
local_irq_save(flags);
rdmsr(MSR_IA32_APICBASE, l, h);
l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
wrmsr(MSR_IA32_APICBASE, l, h);
apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
apic_write(APIC_ID, apic_pm_state.apic_id);
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c
index cfb4f9cebea..38834bbbae1 100644
--- a/arch/x86_64/kernel/asm-offsets.c
+++ b/arch/x86_64/kernel/asm-offsets.c
@@ -43,6 +43,7 @@ int main(void)
ENTRY(irqcount);
ENTRY(cpunumber);
ENTRY(irqstackptr);
+ ENTRY(data_offset);
BLANK();
#undef ENTRY
#ifdef CONFIG_IA32_EMULATION
@@ -66,8 +67,6 @@ int main(void)
DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
DEFINE(pbe_next, offsetof(struct pbe, next));
BLANK();
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
- DEFINE(DEBUG_IST, DEBUG_STACK);
-#endif
+ DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
return 0;
}
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 632fc0f59fc..dbdba56e8fa 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -41,6 +41,7 @@
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/hw_irq.h>
+#include <asm/page.h>
.code64
@@ -674,9 +675,6 @@ ENTRY(spurious_interrupt)
/* error code is on the stack already */
/* handle NMI like exceptions that can happen everywhere */
-#ifndef DEBUG_IST
-# define DEBUG_IST 0
-#endif
.macro paranoidentry sym, ist=0
SAVE_ALL
cld
@@ -695,11 +693,11 @@ ENTRY(spurious_interrupt)
movq ORIG_RAX(%rsp),%rsi
movq $-1,ORIG_RAX(%rsp)
.if \ist
- subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
call \sym
.if \ist
- addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
+ addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
.endif
cli
.endm
@@ -918,7 +916,7 @@ KPROBE_ENTRY(debug)
INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_debug, DEBUG_IST
+ paranoidentry do_debug, DEBUG_STACK
jmp paranoid_exit
CFI_ENDPROC
.previous .text
@@ -976,7 +974,7 @@ KPROBE_ENTRY(int3)
INTR_FRAME
pushq $0
CFI_ADJUST_CFA_OFFSET 8
- paranoidentry do_int3, DEBUG_IST
+ paranoidentry do_int3, DEBUG_STACK
jmp paranoid_exit
CFI_ENDPROC
.previous .text
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 38fc3d5112e..692c737fedd 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -241,104 +241,70 @@ ljumpvector:
ENTRY(stext)
ENTRY(_stext)
-.org 0x1000
-ENTRY(init_level4_pgt)
+ $page = 0
+#define NEXT_PAGE(name) \
+ $page = $page + 1; \
+ .org $page * 0x1000; \
+ phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \
+ENTRY(name)
+
+NEXT_PAGE(init_level4_pgt)
/* This gets initialized in x86_64_start_kernel */
.fill 512,8,0
-.org 0x2000
-ENTRY(level3_ident_pgt)
- .quad 0x0000000000004007 + __PHYSICAL_START
+NEXT_PAGE(level3_ident_pgt)
+ .quad phys_level2_ident_pgt | 0x007
.fill 511,8,0
-.org 0x3000
-ENTRY(level3_kernel_pgt)
+NEXT_PAGE(level3_kernel_pgt)
.fill 510,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt */
+ .quad phys_level2_kernel_pgt | 0x007
.fill 1,8,0
-.org 0x4000
-ENTRY(level2_ident_pgt)
+NEXT_PAGE(level2_ident_pgt)
/* 40MB for bootup. */
- .quad 0x0000000000000083
- .quad 0x0000000000200083
- .quad 0x0000000000400083
- .quad 0x0000000000600083
- .quad 0x0000000000800083
- .quad 0x0000000000A00083
- .quad 0x0000000000C00083
- .quad 0x0000000000E00083
- .quad 0x0000000001000083
- .quad 0x0000000001200083
- .quad 0x0000000001400083
- .quad 0x0000000001600083
- .quad 0x0000000001800083
- .quad 0x0000000001A00083
- .quad 0x0000000001C00083
- .quad 0x0000000001E00083
- .quad 0x0000000002000083
- .quad 0x0000000002200083
- .quad 0x0000000002400083
- .quad 0x0000000002600083
+ i = 0
+ .rept 20
+ .quad i << 21 | 0x083
+ i = i + 1
+ .endr
/* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */
.globl temp_boot_pmds
temp_boot_pmds:
.fill 492,8,0
-.org 0x5000
-ENTRY(level2_kernel_pgt)
+NEXT_PAGE(level2_kernel_pgt)
/* 40MB kernel mapping. The kernel code cannot be bigger than that.
When you change this change KERNEL_TEXT_SIZE in page.h too. */
/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
- .quad 0x0000000000000183
- .quad 0x0000000000200183
- .quad 0x0000000000400183
- .quad 0x0000000000600183
- .quad 0x0000000000800183
- .quad 0x0000000000A00183
- .quad 0x0000000000C00183
- .quad 0x0000000000E00183
- .quad 0x0000000001000183
- .quad 0x0000000001200183
- .quad 0x0000000001400183
- .quad 0x0000000001600183
- .quad 0x0000000001800183
- .quad 0x0000000001A00183
- .quad 0x0000000001C00183
- .quad 0x0000000001E00183
- .quad 0x0000000002000183
- .quad 0x0000000002200183
- .quad 0x0000000002400183
- .quad 0x0000000002600183
+ i = 0
+ .rept 20
+ .quad i << 21 | 0x183
+ i = i + 1
+ .endr
/* Module mapping starts here */
.fill 492,8,0
-.org 0x6000
-ENTRY(empty_zero_page)
-
-.org 0x7000
-ENTRY(empty_bad_page)
+NEXT_PAGE(empty_zero_page)
-.org 0x8000
-ENTRY(empty_bad_pte_table)
+NEXT_PAGE(level3_physmem_pgt)
+ .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */
+ .fill 511,8,0
-.org 0x9000
-ENTRY(empty_bad_pmd_table)
+#undef NEXT_PAGE
-.org 0xa000
-ENTRY(level3_physmem_pgt)
- .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */
+ .data
- .org 0xb000
#ifdef CONFIG_ACPI_SLEEP
+ .align PAGE_SIZE
ENTRY(wakeup_level4_pgt)
- .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
+ .quad phys_level3_ident_pgt | 0x007
.fill 255,8,0
- .quad 0x000000000000a007 + __PHYSICAL_START
+ .quad phys_level3_physmem_pgt | 0x007
.fill 254,8,0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
+ .quad phys_level3_kernel_pgt | 0x007
#endif
#ifndef CONFIG_HOTPLUG_CPU
@@ -352,12 +318,12 @@ ENTRY(wakeup_level4_pgt)
*/
.align PAGE_SIZE
ENTRY(boot_level4_pgt)
- .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
+ .quad phys_level3_ident_pgt | 0x007
.fill 255,8,0
- .quad 0x000000000000a007 + __PHYSICAL_START
+ .quad phys_level3_physmem_pgt | 0x007
.fill 254,8,0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
- .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
+ .quad phys_level3_kernel_pgt | 0x007
.data
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index 6eff51e9400..8ac4db09610 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -38,7 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
unsigned long __supported_pte_mask __read_mostly = ~0UL;
-static int do_not_nx __initdata = 0;
+static int do_not_nx __cpuinitdata = 0;
/* noexec=on|off
Control non executable mappings for 64bit processes.
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile
index 1d232a87f11..d25ac86fe27 100644
--- a/arch/x86_64/mm/Makefile
+++ b/arch/x86_64/mm/Makefile
@@ -2,7 +2,7 @@
# Makefile for the linux x86_64-specific parts of the memory manager.
#
-obj-y := init.o fault.o ioremap.o extable.o pageattr.o
+obj-y := init.o fault.o ioremap.o extable.o pageattr.o mmap.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_K8_NUMA) += k8topology.o
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index eca60125efc..7af1742aa95 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -24,6 +24,8 @@
#include <linux/proc_fs.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include <linux/memory_hotplug.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -180,13 +182,19 @@ static struct temp_map {
{}
};
-static __init void *alloc_low_page(int *index, unsigned long *phys)
+static __meminit void *alloc_low_page(int *index, unsigned long *phys)
{
struct temp_map *ti;
int i;
unsigned long pfn = table_end++, paddr;
void *adr;
+ if (after_bootmem) {
+ adr = (void *)get_zeroed_page(GFP_ATOMIC);
+ *phys = __pa(adr);
+ return adr;
+ }
+
if (pfn >= end_pfn)
panic("alloc_low_page: ran out of memory");
for (i = 0; temp_mappings[i].allocated; i++) {
@@ -199,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys)
ti->allocated = 1;
__flush_tlb();
adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK);
+ memset(adr, 0, PAGE_SIZE);
*index = i;
*phys = pfn * PAGE_SIZE;
return adr;
}
-static __init void unmap_low_page(int i)
+static __meminit void unmap_low_page(int i)
{
- struct temp_map *ti = &temp_mappings[i];
+ struct temp_map *ti;
+
+ if (after_bootmem)
+ return;
+
+ ti = &temp_mappings[i];
set_pmd(ti->pmd, __pmd(0));
ti->allocated = 0;
}
-static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+static void __meminit
+phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
+{
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) {
+ unsigned long entry;
+
+ if (address > end) {
+ for (; i < PTRS_PER_PMD; i++, pmd++)
+ set_pmd(pmd, __pmd(0));
+ break;
+ }
+ entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+ entry &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(entry));
+ }
+}
+
+static void __meminit
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+{
+ pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
+
+ if (pmd_none(*pmd)) {
+ spin_lock(&init_mm.page_table_lock);
+ phys_pmd_init(pmd, address, end);
+ spin_unlock(&init_mm.page_table_lock);
+ __flush_tlb_all();
+ }
+}
+
+static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
{
- long i, j;
+ long i = pud_index(address);
- i = pud_index(address);
pud = pud + i;
+
+ if (after_bootmem && pud_val(*pud)) {
+ phys_pmd_update(pud, address, end);
+ return;
+ }
+
for (; i < PTRS_PER_PUD; pud++, i++) {
int map;
unsigned long paddr, pmd_phys;
pmd_t *pmd;
- paddr = address + i*PUD_SIZE;
- if (paddr >= end) {
- for (; i < PTRS_PER_PUD; i++, pud++)
- set_pud(pud, __pud(0));
+ paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
+ if (paddr >= end)
break;
- }
- if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
+ if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) {
set_pud(pud, __pud(0));
continue;
}
pmd = alloc_low_page(&map, &pmd_phys);
+ spin_lock(&init_mm.page_table_lock);
set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end) {
- for (; j < PTRS_PER_PMD; j++, pmd++)
- set_pmd(pmd, __pmd(0));
- break;
- }
- pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
+ phys_pmd_init(pmd, paddr, end);
+ spin_unlock(&init_mm.page_table_lock);
unmap_low_page(map);
}
__flush_tlb();
@@ -262,30 +301,25 @@ static void __init find_early_table_space(unsigned long end)
tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
- /* Put page tables beyond the DMA zones if possible.
- RED-PEN might be better to spread them out more over
- memory to avoid hotspots */
- if (end > MAX_DMA32_PFN<<PAGE_SHIFT)
- start = MAX_DMA32_PFN << PAGE_SHIFT;
- else if (end > MAX_DMA_PFN << PAGE_SHIFT)
- start = MAX_DMA_PFN << PAGE_SHIFT;
- else
- start = 0x8000;
-
- table_start = find_e820_area(start, end, tables);
- if (table_start == -1)
- table_start = find_e820_area(0x8000, end, tables);
+ /* RED-PEN putting page tables only on node 0 could
+ cause a hotspot and fill up ZONE_DMA. The page tables
+ need roughly 0.5KB per GB. */
+ start = 0x8000;
+ table_start = find_e820_area(start, end, tables);
if (table_start == -1UL)
panic("Cannot find space for the kernel page tables");
table_start >>= PAGE_SHIFT;
table_end = table_start;
+
+ early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
+ end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT);
}
/* Setup the direct mapping of the physical memory at PAGE_OFFSET.
This runs before bootmem is initialized and gets pages directly from the
physical memory. To access them they are temporarily mapped. */
-void __init init_memory_mapping(unsigned long start, unsigned long end)
+void __meminit init_memory_mapping(unsigned long start, unsigned long end)
{
unsigned long next;
@@ -297,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
* mapped. Unfortunately this is done currently before the nodes are
* discovered.
*/
- find_early_table_space(end);
+ if (!after_bootmem)
+ find_early_table_space(end);
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
@@ -305,20 +340,26 @@ void __init init_memory_mapping(unsigned long start, unsigned long end)
for (; start < end; start = next) {
int map;
unsigned long pud_phys;
- pud_t *pud = alloc_low_page(&map, &pud_phys);
+ pgd_t *pgd = pgd_offset_k(start);
+ pud_t *pud;
+
+ if (after_bootmem)
+ pud = pud_offset_k(pgd, __PAGE_OFFSET);
+ else
+ pud = alloc_low_page(&map, &pud_phys);
+
next = start + PGDIR_SIZE;
if (next > end)
next = end;
phys_pud_init(pud, __pa(start), __pa(next));
- set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
+ if (!after_bootmem)
+ set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
unmap_low_page(map);
}
- asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
+ if (!after_bootmem)
+ asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features));
__flush_tlb_all();
- early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end,
- table_start<<PAGE_SHIFT,
- table_end<<PAGE_SHIFT);
}
void __cpuinit zap_low_mappings(int cpu)
@@ -393,6 +434,9 @@ size_zones(unsigned long *z, unsigned long *h,
void __init paging_init(void)
{
unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+
+ memory_present(0, 0, end_pfn);
+ sparse_init();
size_zones(zones, holes, 0, end_pfn);
free_area_init_node(0, NODE_DATA(0), zones,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
@@ -433,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
__flush_tlb_all();
}
+/*
+ * Memory hotplug specific functions
+ * These are only for non-NUMA machines right now.
+ */
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+void online_page(struct page *page)
+{
+ ClearPageReserved(page);
+ set_page_count(page, 1);
+ __free_page(page);
+ totalram_pages++;
+ num_physpages++;
+}
+
+int add_memory(u64 start, u64 size)
+{
+ struct pglist_data *pgdat = NODE_DATA(0);
+ struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ int ret;
+
+ ret = __add_pages(zone, start_pfn, nr_pages);
+ if (ret)
+ goto error;
+
+ init_memory_mapping(start, (start + size -1));
+
+ return ret;
+error:
+ printk("%s: Problem encountered in __add_pages!\n", __func__);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(add_memory);
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(remove_memory);
+
+#endif
+
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
kcore_vsyscall;
@@ -539,7 +627,7 @@ void mark_rodata_ro(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start < (unsigned long)&_end)
+ if (start >= end)
return;
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
diff --git a/arch/x86_64/mm/mmap.c b/arch/x86_64/mm/mmap.c
new file mode 100644
index 00000000000..43e9b99bdf2
--- /dev/null
+++ b/arch/x86_64/mm/mmap.c
@@ -0,0 +1,30 @@
+/* Copyright 2005 Andi Kleen, SuSE Labs.
+ * Licensed under GPL, v.2
+ */
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+#include <asm/ia32.h>
+
+/* Notebook: move the mmap code from sys_x86_64.c over here. */
+
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+#ifdef CONFIG_IA32_EMULATION
+ if (current_thread_info()->flags & _TIF_IA32)
+ return ia32_pick_mmap_layout(mm);
+#endif
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ if (current->flags & PF_RANDOMIZE) {
+ /* Add 28bit randomness which is about 40bits of address space
+ because mmap base has to be page aligned.
+ or ~1/128 of the total user VM
+ (total user address space is 47bits) */
+ unsigned rnd = get_random_int() & 0xfffffff;
+ mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT;
+ }
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+}
+