diff options
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig | 9 | ||||
-rw-r--r-- | arch/x86_64/defconfig | 21 | ||||
-rw-r--r-- | arch/x86_64/ia32/Makefile | 3 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_binfmt.c | 17 | ||||
-rw-r--r-- | arch/x86_64/ia32/mmap32.c | 78 | ||||
-rw-r--r-- | arch/x86_64/kernel/apic.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/asm-offsets.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/entry.S | 12 | ||||
-rw-r--r-- | arch/x86_64/kernel/head.S | 108 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup64.c | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/Makefile | 2 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 180 | ||||
-rw-r--r-- | arch/x86_64/mm/mmap.c | 30 |
13 files changed, 315 insertions, 157 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 2efc4be2270..2f9deca31cc 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -305,7 +305,11 @@ config ARCH_DISCONTIGMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on NUMA + depends on (NUMA || EXPERIMENTAL) + +config ARCH_MEMORY_PROBE + def_bool y + depends on MEMORY_HOTPLUG config ARCH_FLATMEM_ENABLE def_bool y @@ -315,6 +319,7 @@ source "mm/Kconfig" config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y + depends on NUMA config NR_CPUS int "Maximum number of CPUs (2-256)" @@ -350,7 +355,7 @@ config HPET_TIMER <http://www.intel.com/hardwaredesign/hpetspec.htm>. config X86_PM_TIMER - bool "PM timer" + bool "PM timer" if EMBEDDED depends on ACPI default y help diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 054dcd8a5e9..5231fe83ea4 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.15-git7 -# Wed Jan 11 11:57:36 2006 +# Linux kernel version: 2.6.15-git12 +# Mon Jan 16 13:09:08 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -319,6 +319,11 @@ CONFIG_IPV6=y # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set + +# +# TIPC Configuration (EXPERIMENTAL) +# +# CONFIG_TIPC is not set # CONFIG_NET_DIVERT is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set @@ -537,8 +542,7 @@ CONFIG_SCSI_SATA_INTEL_COMBINED=y # CONFIG_SCSI_IPR is not set # CONFIG_SCSI_QLOGIC_FC is not set # CONFIG_SCSI_QLOGIC_1280 is not set -CONFIG_SCSI_QLA2XXX=y -# CONFIG_SCSI_QLA2XXX_EMBEDDED_FIRMWARE is not set +# CONFIG_SCSI_QLA_FC is not set # CONFIG_SCSI_LPFC is not set # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set @@ -805,6 +809,7 @@ CONFIG_SOFT_WATCHDOG=y # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set +# CONFIG_SBC_EPX_C3_WATCHDOG is not set # # PCI-based Watchdog Cards @@ -850,6 +855,12 @@ CONFIG_HPET_MMAP=y # CONFIG_I2C is not set # +# SPI support +# +# CONFIG_SPI is not set +# CONFIG_SPI_MASTER is not set + +# # Dallas's 1-wire bus # # CONFIG_W1 is not set @@ -992,6 +1003,7 @@ CONFIG_USB_STORAGE=y # CONFIG_USB_HID=y CONFIG_USB_HIDINPUT=y +# CONFIG_USB_HIDINPUT_POWERBOOK is not set # CONFIG_HID_FF is not set # CONFIG_USB_HIDDEV is not set # CONFIG_USB_AIPTEK is not set @@ -1276,6 +1288,7 @@ CONFIG_DETECT_SOFTLOCKUP=y CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_FRAME_POINTER is not set +# CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set CONFIG_INIT_DEBUG=y # CONFIG_DEBUG_RODATA is not set diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile index 051608d5592..929e6b0771f 100644 --- a/arch/x86_64/ia32/Makefile +++ b/arch/x86_64/ia32/Makefile @@ -3,7 +3,8 @@ # obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ - ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o + ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \ + mmap32.o sysv-$(CONFIG_SYSVIPC) := ipc32.o obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 029bddab045..572b3b28772 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -293,8 +293,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int } while(0) -#define elf_map elf32_map - #include <linux/module.h> MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); @@ -390,21 +388,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, } EXPORT_SYMBOL(ia32_setup_arg_pages); -static unsigned long -elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) -{ - unsigned long map_addr; - struct task_struct *me = current; - - down_write(&me->mm->mmap_sem); - map_addr = do_mmap(filep, ELF_PAGESTART(addr), - eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, - type, - eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); - up_write(&me->mm->mmap_sem); - return(map_addr); -} - #ifdef CONFIG_SYSCTL /* Register vsyscall32 into the ABI table */ #include <linux/sysctl.h> diff --git a/arch/x86_64/ia32/mmap32.c b/arch/x86_64/ia32/mmap32.c new file mode 100644 index 00000000000..079f4132575 --- /dev/null +++ b/arch/x86_64/ia32/mmap32.c @@ -0,0 +1,78 @@ +/* + * linux/arch/x86_64/ia32/mm/mmap.c + * + * flexible mmap layout support + * + * Based on the i386 version which was + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar <mingo@elte.hu> + */ + +#include <linux/personality.h> +#include <linux/mm.h> +#include <linux/random.h> + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(struct mm_struct *mm) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + unsigned long random_factor = 0; + + if (current->flags & PF_RANDOMIZE) + random_factor = get_random_int() % (1024*1024); + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return PAGE_ALIGN(TASK_SIZE - gap - random_factor); +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void ia32_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (sysctl_legacy_va_layout || + (current->personality & ADDR_COMPAT_LAYOUT) || + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(mm); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index 8fdd089fd17..5d3c5b07b8d 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -499,13 +499,10 @@ static int lapic_resume(struct sys_device *dev) if (!apic_pm_state.active) return 0; - /* XXX: Pavel needs this for S3 resume, but can't explain why */ - set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); - local_irq_save(flags); rdmsr(MSR_IA32_APICBASE, l, h); l &= ~MSR_IA32_APICBASE_BASE; - l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; + l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; wrmsr(MSR_IA32_APICBASE, l, h); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index cfb4f9cebea..38834bbbae1 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c @@ -43,6 +43,7 @@ int main(void) ENTRY(irqcount); ENTRY(cpunumber); ENTRY(irqstackptr); + ENTRY(data_offset); BLANK(); #undef ENTRY #ifdef CONFIG_IA32_EMULATION @@ -66,8 +67,6 @@ int main(void) DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); DEFINE(pbe_next, offsetof(struct pbe, next)); BLANK(); -#if DEBUG_STKSZ > EXCEPTION_STKSZ - DEFINE(DEBUG_IST, DEBUG_STACK); -#endif + DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); return 0; } diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 632fc0f59fc..dbdba56e8fa 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -41,6 +41,7 @@ #include <asm/unistd.h> #include <asm/thread_info.h> #include <asm/hw_irq.h> +#include <asm/page.h> .code64 @@ -674,9 +675,6 @@ ENTRY(spurious_interrupt) /* error code is on the stack already */ /* handle NMI like exceptions that can happen everywhere */ -#ifndef DEBUG_IST -# define DEBUG_IST 0 -#endif .macro paranoidentry sym, ist=0 SAVE_ALL cld @@ -695,11 +693,11 @@ ENTRY(spurious_interrupt) movq ORIG_RAX(%rsp),%rsi movq $-1,ORIG_RAX(%rsp) .if \ist - subq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif call \sym .if \ist - addq $EXCEPTION_STACK_SIZE, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif cli .endm @@ -918,7 +916,7 @@ KPROBE_ENTRY(debug) INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_debug, DEBUG_IST + paranoidentry do_debug, DEBUG_STACK jmp paranoid_exit CFI_ENDPROC .previous .text @@ -976,7 +974,7 @@ KPROBE_ENTRY(int3) INTR_FRAME pushq $0 CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_int3, DEBUG_IST + paranoidentry do_int3, DEBUG_STACK jmp paranoid_exit CFI_ENDPROC .previous .text diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 38fc3d5112e..692c737fedd 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -241,104 +241,70 @@ ljumpvector: ENTRY(stext) ENTRY(_stext) -.org 0x1000 -ENTRY(init_level4_pgt) + $page = 0 +#define NEXT_PAGE(name) \ + $page = $page + 1; \ + .org $page * 0x1000; \ + phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \ +ENTRY(name) + +NEXT_PAGE(init_level4_pgt) /* This gets initialized in x86_64_start_kernel */ .fill 512,8,0 -.org 0x2000 -ENTRY(level3_ident_pgt) - .quad 0x0000000000004007 + __PHYSICAL_START +NEXT_PAGE(level3_ident_pgt) + .quad phys_level2_ident_pgt | 0x007 .fill 511,8,0 -.org 0x3000 -ENTRY(level3_kernel_pgt) +NEXT_PAGE(level3_kernel_pgt) .fill 510,8,0 /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ - .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt */ + .quad phys_level2_kernel_pgt | 0x007 .fill 1,8,0 -.org 0x4000 -ENTRY(level2_ident_pgt) +NEXT_PAGE(level2_ident_pgt) /* 40MB for bootup. */ - .quad 0x0000000000000083 - .quad 0x0000000000200083 - .quad 0x0000000000400083 - .quad 0x0000000000600083 - .quad 0x0000000000800083 - .quad 0x0000000000A00083 - .quad 0x0000000000C00083 - .quad 0x0000000000E00083 - .quad 0x0000000001000083 - .quad 0x0000000001200083 - .quad 0x0000000001400083 - .quad 0x0000000001600083 - .quad 0x0000000001800083 - .quad 0x0000000001A00083 - .quad 0x0000000001C00083 - .quad 0x0000000001E00083 - .quad 0x0000000002000083 - .quad 0x0000000002200083 - .quad 0x0000000002400083 - .quad 0x0000000002600083 + i = 0 + .rept 20 + .quad i << 21 | 0x083 + i = i + 1 + .endr /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ .globl temp_boot_pmds temp_boot_pmds: .fill 492,8,0 -.org 0x5000 -ENTRY(level2_kernel_pgt) +NEXT_PAGE(level2_kernel_pgt) /* 40MB kernel mapping. The kernel code cannot be bigger than that. When you change this change KERNEL_TEXT_SIZE in page.h too. */ /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ - .quad 0x0000000000000183 - .quad 0x0000000000200183 - .quad 0x0000000000400183 - .quad 0x0000000000600183 - .quad 0x0000000000800183 - .quad 0x0000000000A00183 - .quad 0x0000000000C00183 - .quad 0x0000000000E00183 - .quad 0x0000000001000183 - .quad 0x0000000001200183 - .quad 0x0000000001400183 - .quad 0x0000000001600183 - .quad 0x0000000001800183 - .quad 0x0000000001A00183 - .quad 0x0000000001C00183 - .quad 0x0000000001E00183 - .quad 0x0000000002000183 - .quad 0x0000000002200183 - .quad 0x0000000002400183 - .quad 0x0000000002600183 + i = 0 + .rept 20 + .quad i << 21 | 0x183 + i = i + 1 + .endr /* Module mapping starts here */ .fill 492,8,0 -.org 0x6000 -ENTRY(empty_zero_page) - -.org 0x7000 -ENTRY(empty_bad_page) +NEXT_PAGE(empty_zero_page) -.org 0x8000 -ENTRY(empty_bad_pte_table) +NEXT_PAGE(level3_physmem_pgt) + .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ + .fill 511,8,0 -.org 0x9000 -ENTRY(empty_bad_pmd_table) +#undef NEXT_PAGE -.org 0xa000 -ENTRY(level3_physmem_pgt) - .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */ + .data - .org 0xb000 #ifdef CONFIG_ACPI_SLEEP + .align PAGE_SIZE ENTRY(wakeup_level4_pgt) - .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ + .quad phys_level3_ident_pgt | 0x007 .fill 255,8,0 - .quad 0x000000000000a007 + __PHYSICAL_START + .quad phys_level3_physmem_pgt | 0x007 .fill 254,8,0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ + .quad phys_level3_kernel_pgt | 0x007 #endif #ifndef CONFIG_HOTPLUG_CPU @@ -352,12 +318,12 @@ ENTRY(wakeup_level4_pgt) */ .align PAGE_SIZE ENTRY(boot_level4_pgt) - .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ + .quad phys_level3_ident_pgt | 0x007 .fill 255,8,0 - .quad 0x000000000000a007 + __PHYSICAL_START + .quad phys_level3_physmem_pgt | 0x007 .fill 254,8,0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ + .quad phys_level3_kernel_pgt | 0x007 .data diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 6eff51e9400..8ac4db09610 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -38,7 +38,7 @@ struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); unsigned long __supported_pte_mask __read_mostly = ~0UL; -static int do_not_nx __initdata = 0; +static int do_not_nx __cpuinitdata = 0; /* noexec=on|off Control non executable mappings for 64bit processes. diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 1d232a87f11..d25ac86fe27 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux x86_64-specific parts of the memory manager. # -obj-y := init.o fault.o ioremap.o extable.o pageattr.o +obj-y := init.o fault.o ioremap.o extable.o pageattr.o mmap.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_K8_NUMA) += k8topology.o diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index eca60125efc..7af1742aa95 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -24,6 +24,8 @@ #include <linux/proc_fs.h> #include <linux/pci.h> #include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/memory_hotplug.h> #include <asm/processor.h> #include <asm/system.h> @@ -180,13 +182,19 @@ static struct temp_map { {} }; -static __init void *alloc_low_page(int *index, unsigned long *phys) +static __meminit void *alloc_low_page(int *index, unsigned long *phys) { struct temp_map *ti; int i; unsigned long pfn = table_end++, paddr; void *adr; + if (after_bootmem) { + adr = (void *)get_zeroed_page(GFP_ATOMIC); + *phys = __pa(adr); + return adr; + } + if (pfn >= end_pfn) panic("alloc_low_page: ran out of memory"); for (i = 0; temp_mappings[i].allocated; i++) { @@ -199,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys) ti->allocated = 1; __flush_tlb(); adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); + memset(adr, 0, PAGE_SIZE); *index = i; *phys = pfn * PAGE_SIZE; return adr; } -static __init void unmap_low_page(int i) +static __meminit void unmap_low_page(int i) { - struct temp_map *ti = &temp_mappings[i]; + struct temp_map *ti; + + if (after_bootmem) + return; + + ti = &temp_mappings[i]; set_pmd(ti->pmd, __pmd(0)); ti->allocated = 0; } -static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) +static void __meminit +phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) +{ + int i; + + for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { + unsigned long entry; + + if (address > end) { + for (; i < PTRS_PER_PMD; i++, pmd++) + set_pmd(pmd, __pmd(0)); + break; + } + entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; + entry &= __supported_pte_mask; + set_pmd(pmd, __pmd(entry)); + } +} + +static void __meminit +phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) +{ + pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); + + if (pmd_none(*pmd)) { + spin_lock(&init_mm.page_table_lock); + phys_pmd_init(pmd, address, end); + spin_unlock(&init_mm.page_table_lock); + __flush_tlb_all(); + } +} + +static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) { - long i, j; + long i = pud_index(address); - i = pud_index(address); pud = pud + i; + + if (after_bootmem && pud_val(*pud)) { + phys_pmd_update(pud, address, end); + return; + } + for (; i < PTRS_PER_PUD; pud++, i++) { int map; unsigned long paddr, pmd_phys; pmd_t *pmd; - paddr = address + i*PUD_SIZE; - if (paddr >= end) { - for (; i < PTRS_PER_PUD; i++, pud++) - set_pud(pud, __pud(0)); + paddr = (address & PGDIR_MASK) + i*PUD_SIZE; + if (paddr >= end) break; - } - if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { + if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) { set_pud(pud, __pud(0)); continue; } pmd = alloc_low_page(&map, &pmd_phys); + spin_lock(&init_mm.page_table_lock); set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); - for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { - unsigned long pe; - - if (paddr >= end) { - for (; j < PTRS_PER_PMD; j++, pmd++) - set_pmd(pmd, __pmd(0)); - break; - } - pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr; - pe &= __supported_pte_mask; - set_pmd(pmd, __pmd(pe)); - } + phys_pmd_init(pmd, paddr, end); + spin_unlock(&init_mm.page_table_lock); unmap_low_page(map); } __flush_tlb(); @@ -262,30 +301,25 @@ static void __init find_early_table_space(unsigned long end) tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + round_up(pmds * sizeof(pmd_t), PAGE_SIZE); - /* Put page tables beyond the DMA zones if possible. - RED-PEN might be better to spread them out more over - memory to avoid hotspots */ - if (end > MAX_DMA32_PFN<<PAGE_SHIFT) - start = MAX_DMA32_PFN << PAGE_SHIFT; - else if (end > MAX_DMA_PFN << PAGE_SHIFT) - start = MAX_DMA_PFN << PAGE_SHIFT; - else - start = 0x8000; - - table_start = find_e820_area(start, end, tables); - if (table_start == -1) - table_start = find_e820_area(0x8000, end, tables); + /* RED-PEN putting page tables only on node 0 could + cause a hotspot and fill up ZONE_DMA. The page tables + need roughly 0.5KB per GB. */ + start = 0x8000; + table_start = find_e820_area(start, end, tables); if (table_start == -1UL) panic("Cannot find space for the kernel page tables"); table_start >>= PAGE_SHIFT; table_end = table_start; + + early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", + end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); } /* Setup the direct mapping of the physical memory at PAGE_OFFSET. This runs before bootmem is initialized and gets pages directly from the physical memory. To access them they are temporarily mapped. */ -void __init init_memory_mapping(unsigned long start, unsigned long end) +void __meminit init_memory_mapping(unsigned long start, unsigned long end) { unsigned long next; @@ -297,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) * mapped. Unfortunately this is done currently before the nodes are * discovered. */ - find_early_table_space(end); + if (!after_bootmem) + find_early_table_space(end); start = (unsigned long)__va(start); end = (unsigned long)__va(end); @@ -305,20 +340,26 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) for (; start < end; start = next) { int map; unsigned long pud_phys; - pud_t *pud = alloc_low_page(&map, &pud_phys); + pgd_t *pgd = pgd_offset_k(start); + pud_t *pud; + + if (after_bootmem) + pud = pud_offset_k(pgd, __PAGE_OFFSET); + else + pud = alloc_low_page(&map, &pud_phys); + next = start + PGDIR_SIZE; if (next > end) next = end; phys_pud_init(pud, __pa(start), __pa(next)); - set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); + if (!after_bootmem) + set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); unmap_low_page(map); } - asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); + if (!after_bootmem) + asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); __flush_tlb_all(); - early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, - table_start<<PAGE_SHIFT, - table_end<<PAGE_SHIFT); } void __cpuinit zap_low_mappings(int cpu) @@ -393,6 +434,9 @@ size_zones(unsigned long *z, unsigned long *h, void __init paging_init(void) { unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; + + memory_present(0, 0, end_pfn); + sparse_init(); size_zones(zones, holes, 0, end_pfn); free_area_init_node(0, NODE_DATA(0), zones, __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); @@ -433,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) __flush_tlb_all(); } +/* + * Memory hotplug specific functions + * These are only for non-NUMA machines right now. + */ +#ifdef CONFIG_MEMORY_HOTPLUG + +void online_page(struct page *page) +{ + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + totalram_pages++; + num_physpages++; +} + +int add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdat = NODE_DATA(0); + struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + ret = __add_pages(zone, start_pfn, nr_pages); + if (ret) + goto error; + + init_memory_mapping(start, (start + size -1)); + + return ret; +error: + printk("%s: Problem encountered in __add_pages!\n", __func__); + return ret; +} +EXPORT_SYMBOL_GPL(add_memory); + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +EXPORT_SYMBOL_GPL(remove_memory); + +#endif + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; @@ -539,7 +627,7 @@ void mark_rodata_ro(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - if (start < (unsigned long)&_end) + if (start >= end) return; printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); for (; start < end; start += PAGE_SIZE) { diff --git a/arch/x86_64/mm/mmap.c b/arch/x86_64/mm/mmap.c new file mode 100644 index 00000000000..43e9b99bdf2 --- /dev/null +++ b/arch/x86_64/mm/mmap.c @@ -0,0 +1,30 @@ +/* Copyright 2005 Andi Kleen, SuSE Labs. + * Licensed under GPL, v.2 + */ +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/random.h> +#include <asm/ia32.h> + +/* Notebook: move the mmap code from sys_x86_64.c over here. */ + +void arch_pick_mmap_layout(struct mm_struct *mm) +{ +#ifdef CONFIG_IA32_EMULATION + if (current_thread_info()->flags & _TIF_IA32) + return ia32_pick_mmap_layout(mm); +#endif + mm->mmap_base = TASK_UNMAPPED_BASE; + if (current->flags & PF_RANDOMIZE) { + /* Add 28bit randomness which is about 40bits of address space + because mmap base has to be page aligned. + or ~1/128 of the total user VM + (total user address space is 47bits) */ + unsigned rnd = get_random_int() & 0xfffffff; + mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT; + } + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; +} + |