From 4f1933620f57145212cdbb1ac6ce099eeeb21c5a Mon Sep 17 00:00:00 2001 From: Paul Smith Date: Sun, 5 Mar 2006 17:14:10 -0500 Subject: kbuild: change kbuild to not rely on incorrect GNU make behavior The kbuild system takes advantage of an incorrect behavior in GNU make. Once this behavior is fixed, all files in the kernel rebuild every time, even if nothing has changed. This patch ensures kbuild works with both the incorrect and correct behaviors of GNU make. For more details on the incorrect behavior, see: http://lists.gnu.org/archive/html/bug-make/2006-03/msg00003.html Changes in this patch: - Keep all targets that are to be marked .PHONY in a variable, PHONY. - Add .PHONY: $(PHONY) to mark them properly. - Remove any $(PHONY) files from the $? list when determining whether targets are up-to-date or not. Signed-off-by: Paul Smith Signed-off-by: Sam Ravnborg --- arch/x86_64/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index d7fd46479c5..7405dfd6522 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -67,8 +67,8 @@ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ boot := arch/x86_64/boot -.PHONY: bzImage bzlilo install archmrproper \ - fdimage fdimage144 fdimage288 archclean +PHONY += bzImage bzlilo install archmrproper \ + fdimage fdimage144 fdimage288 archclean #Default target when executing "make" all: bzImage -- cgit v1.2.3 From 68ed0040a8c9d06b73cda322a1f740749bd6e41a Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Wed, 22 Mar 2006 00:07:38 -0800 Subject: [PATCH] x86: mark cyc2ns_scale readmostly This variable is rarely written to. Mark the variable accordingly. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 3080f84bf7b..ee5ce3d3cbc 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -477,7 +477,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) return IRQ_HANDLED; } -static unsigned int cyc2ns_scale; +static unsigned int cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) -- cgit v1.2.3 From f4a641d66c6e135dcfc861521e8008faed2411e1 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Mar 2006 00:07:46 -0800 Subject: [PATCH] multiple exports of strpbrk Sam's tree includes a new check, which found that we're exporting strpbrk() multiple times. It seems that the convention is that this is exported from the arch files, so reove the lib/string.c export. Cc: Sam Ravnborg Cc: Yoshinori Sato Cc: David Howells Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/x8664_ksyms.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 3496abc8d37..c9dc7e46731 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -124,6 +124,7 @@ extern void * __memcpy(void *,const void *,__kernel_size_t); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strpbrk); EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); -- cgit v1.2.3 From 20aaffd6a6492a5f7975dd7be967cdf0e4d978c4 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:32 -0800 Subject: [PATCH] x86_64: pageattr use single list Use page->lru.next to implement the singly linked list of pages rather than the struct deferred_page which needs to be allocated and freed for each page. Signed-off-by: Nick Piggin Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/pageattr.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 35f1f1aab06..efe5af14c7d 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -77,26 +77,12 @@ static inline void flush_map(unsigned long address) on_each_cpu(flush_kernel_map, (void *)address, 1, 1); } -struct deferred_page { - struct deferred_page *next; - struct page *fpage; - unsigned long address; -}; -static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */ - -static inline void save_page(unsigned long address, struct page *fpage) +static struct page *deferred_pages; /* protected by init_mm.mmap_sem */ + +static inline void save_page(struct page *fpage) { - struct deferred_page *df; - df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL); - if (!df) { - flush_map(address); - __free_page(fpage); - } else { - df->next = df_list; - df->fpage = fpage; - df->address = address; - df_list = df; - } + fpage->lru.next = (struct list_head *)deferred_pages; + deferred_pages = fpage; } /* @@ -163,7 +149,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, switch (page_count(kpte_page)) { case 1: - save_page(address, kpte_page); + save_page(kpte_page); revert_page(address, ref_prot); break; case 0: @@ -220,17 +206,17 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) void global_flush_tlb(void) { - struct deferred_page *df, *next_df; + struct page *dpage; down_read(&init_mm.mmap_sem); - df = xchg(&df_list, NULL); + dpage = xchg(&deferred_pages, NULL); up_read(&init_mm.mmap_sem); - flush_map((df && !df->next) ? df->address : 0); - for (; df; df = next_df) { - next_df = df->next; - if (df->fpage) - __free_page(df->fpage); - kfree(df); + + flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0); + while (dpage) { + struct page *tmp = dpage; + dpage = (struct page *)dpage->lru.next; + __free_page(tmp); } } -- cgit v1.2.3 From 4fa4f53bf92139595cae6f1a3d972fc0a3451d29 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:33 -0800 Subject: [PATCH] x86_64: pageattr remove __put_page Remove page_count and __put_page from x86-64 pageattr Signed-off-by: Nick Piggin Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/pageattr.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index efe5af14c7d..531ad21447b 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, pte_t *pbase; if (!base) return NULL; + /* + * page_private is used to track the number of entries in + * the page table page have non standard attributes. + */ + SetPagePrivate(base); + page_private(base) = 0; + address = __pa(address); addr = address & LARGE_PAGE_MASK; pbase = (pte_t *)page_address(base); @@ -124,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, set_pte(kpte, pfn_pte(pfn, prot)); } else { /* - * split_large_page will take the reference for this change_page_attr - * on the split page. + * split_large_page will take the reference for this + * change_page_attr on the split page. */ struct page *split; @@ -137,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, set_pte(kpte,mk_pte(split, ref_prot2)); kpte_page = split; } - get_page(kpte_page); + page_private(kpte_page)++; } else if ((kpte_flags & _PAGE_PSE) == 0) { set_pte(kpte, pfn_pte(pfn, ref_prot)); - __put_page(kpte_page); + BUG_ON(page_private(kpte_page) == 0); + page_private(kpte_page)--; } else BUG(); /* on x86-64 the direct mapping set at boot is not using 4k pages */ BUG_ON(PageReserved(kpte_page)); - switch (page_count(kpte_page)) { - case 1: + if (page_private(kpte_page) == 0) { save_page(kpte_page); revert_page(address, ref_prot); - break; - case 0: - BUG(); /* memleak and failed 2M page regeneration */ } return 0; } @@ -216,6 +220,7 @@ void global_flush_tlb(void) while (dpage) { struct page *tmp = dpage; dpage = (struct page *)dpage->lru.next; + ClearPagePrivate(tmp); __free_page(tmp); } } -- cgit v1.2.3 From 7835e98b2e3c66dba79cb0ff8ebb90a2fe030c29 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 22 Mar 2006 00:08:40 -0800 Subject: [PATCH] remove set_page_count() outside mm/ set_page_count usage outside mm/ is limited to setting the refcount to 1. Remove set_page_count from outside mm/, and replace those users with init_page_count() and set_page_refcounted(). This allows more debug checking, and tighter control on how code is allowed to play around with page->_count. Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 7af1742aa95..40ed13d263c 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -486,7 +486,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) void online_page(struct page *page) { ClearPageReserved(page); - set_page_count(page, 1); + init_page_count(page); __free_page(page); totalram_pages++; num_physpages++; @@ -592,7 +592,7 @@ void free_initmem(void) addr = (unsigned long)(&__init_begin); for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); - set_page_count(virt_to_page(addr), 1); + init_page_count(virt_to_page(addr)); memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE); free_page(addr); totalram_pages++; @@ -632,7 +632,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); for (; start < end; start += PAGE_SIZE) { ClearPageReserved(virt_to_page(start)); - set_page_count(virt_to_page(start), 1); + init_page_count(virt_to_page(start)); free_page(start); totalram_pages++; } -- cgit v1.2.3 From 99b7de33477882b86d54ce8ecbf90147f9d106d7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 23 Mar 2006 02:59:41 -0800 Subject: [PATCH] x86: early printk handling fixes The history is that -mm kernels do not work for me for a few months already. The things started from crashing somewhere after starting init, and for the last month - no boot at all, just "Uncompressing... OK, booting kernel", and silence. Early console didn't work too. With the latest releases this degraded into an infinite stream of the "Unknown interrupt or fault" messages. So today my patience ran out and I started to think how can I collect at least some info for the bug-report. Attached is the patch that allows to gather some valueable debug info on the problem by making an early console more useable. I can't properly test the patch, as the kernel still doesn't boot, so I'll explain it in details in a hope someone else can justify the intrusive changes. arch_hooks.h: added prototypes for setup_early_printk() and early_printk(). setup.c: killed wrong setup_early_printk() prototype. Moved setup_early_printk() a bit earlier, as it was not "early enough" to cover the bug I was fighting with. early_printk.c: made it to start printing from the bottom of the screen, otherwise the messages interfere with the ones of the boot-loader, so you can't read them. Signed-off-by: Stas Sergeev Cc: Andi Kleen Cc: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/early_printk.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 6dffb498ccd..6fcdcb80b07 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -21,7 +21,7 @@ #define MAX_XPOS max_xpos static int max_ypos = 25, max_xpos = 80; -static int current_ypos = 1, current_xpos = 0; +static int current_ypos = 25, current_xpos = 0; static void early_vga_write(struct console *con, const char *str, unsigned n) { @@ -244,6 +244,7 @@ int __init setup_early_printk(char *opt) && SCREEN_INFO.orig_video_isVGA == 1) { max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; + current_ypos = max_ypos; early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); -- cgit v1.2.3 From 98e7d9b0529b6a348f2978a926806930a9902067 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:42 -0800 Subject: [PATCH] x86: start early_printk at sensible screen row Use boot info to start early_printk() at the current row on VGA console, as left by the boot loader. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Cc: Stas Sergeev Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/early_printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 6fcdcb80b07..484025f4f08 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -244,7 +244,7 @@ int __init setup_early_printk(char *opt) && SCREEN_INFO.orig_video_isVGA == 1) { max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; - current_ypos = max_ypos; + current_ypos = SCREEN_INFO.orig_y; early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); -- cgit v1.2.3 From ffa930ef55ae82c09e30b2a0c4ce5d7fdea041e2 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Mar 2006 02:59:43 -0800 Subject: [PATCH] x86: early_printk(): remove MAX_YPOS and MAX_XPOS macros Expand out these fairly pointless macros. Cc: Chuck Ebbert <76306.1226@compuserve.com> Cc: Stas Sergeev Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/early_printk.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 484025f4f08..a8a6aa70d69 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -17,9 +17,6 @@ #define VGABASE ((void __iomem *)0xffffffff800b8000UL) #endif -#define MAX_YPOS max_ypos -#define MAX_XPOS max_xpos - static int max_ypos = 25, max_xpos = 80; static int current_ypos = 25, current_xpos = 0; @@ -29,26 +26,26 @@ static void early_vga_write(struct console *con, const char *str, unsigned n) int i, k, j; while ((c = *str++) != '\0' && n-- > 0) { - if (current_ypos >= MAX_YPOS) { + if (current_ypos >= max_ypos) { /* scroll 1 line up */ - for (k = 1, j = 0; k < MAX_YPOS; k++, j++) { - for (i = 0; i < MAX_XPOS; i++) { - writew(readw(VGABASE + 2*(MAX_XPOS*k + i)), - VGABASE + 2*(MAX_XPOS*j + i)); + for (k = 1, j = 0; k < max_ypos; k++, j++) { + for (i = 0; i < max_xpos; i++) { + writew(readw(VGABASE+2*(max_xpos*k+i)), + VGABASE + 2*(max_xpos*j + i)); } } - for (i = 0; i < MAX_XPOS; i++) - writew(0x720, VGABASE + 2*(MAX_XPOS*j + i)); - current_ypos = MAX_YPOS-1; + for (i = 0; i < max_xpos; i++) + writew(0x720, VGABASE + 2*(max_xpos*j + i)); + current_ypos = max_ypos-1; } if (c == '\n') { current_xpos = 0; current_ypos++; } else if (c != '\r') { writew(((0x7 << 8) | (unsigned short) c), - VGABASE + 2*(MAX_XPOS*current_ypos + + VGABASE + 2*(max_xpos*current_ypos + current_xpos++)); - if (current_xpos >= MAX_XPOS) { + if (current_xpos >= max_xpos) { current_xpos = 0; current_ypos++; } -- cgit v1.2.3 From fc558a7496bfab3d29a68953b07a95883fdcfbb1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Mar 2006 03:00:05 -0800 Subject: [PATCH] swsusp: finally solve mysqld problem This patch from Pavel moves userland freeze signals handling into more logical place. It now hits even with mysqld running. Signed-off-by: Rafael J. Wysocki Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/signal.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 5876df116c9..e5f5ce7909a 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c @@ -443,9 +443,6 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) if (!user_mode(regs)) return 1; - if (try_to_freeze()) - goto no_signal; - if (!oldset) oldset = ¤t->blocked; @@ -463,7 +460,6 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) return handle_signal(signr, &info, &ka, oldset, regs); } - no_signal: /* Did we come from a system call? */ if ((long)regs->orig_rax >= 0) { /* Restart the system call - no handlers present */ -- cgit v1.2.3 From 7a7d1cf95408863a657035701606b13644c9f55e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Mar 2006 03:00:35 -0800 Subject: [PATCH] sem2mutex: kprobes Semaphore to mutex conversion. The conversion was generated via scripts, and the result was validated automatically via a script as well. Signed-off-by: Ingo Molnar Acked-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 8b866a8572c..14f0ced613b 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -222,9 +222,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) -- cgit v1.2.3 From 394e3902c55e667945f6f1c2bdbc59842cce70f7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Mar 2006 03:01:05 -0800 Subject: [PATCH] more for_each_cpu() conversions When we stop allocating percpu memory for not-possible CPUs we must not touch the percpu data for not-possible CPUs at all. The correct way of doing this is to test cpu_possible() or to use for_each_cpu(). This patch is a kernel-wide sweep of all instances of NR_CPUS. I found very few instances of this bug, if any. But the patch converts lots of open-coded test to use the preferred helper macros. Cc: Mikael Starvik Cc: David Howells Acked-by: Kyle McMartin Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: "David S. Miller" Cc: William Lee Irwin III Cc: Andi Kleen Cc: Christian Zankel Cc: Philippe Elie Cc: Nathan Scott Cc: Jens Axboe Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/irq.c | 21 ++++++++------------- arch/x86_64/kernel/nmi.c | 4 +--- 2 files changed, 9 insertions(+), 16 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 30d2a1e545f..d8bd0b345b1 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -38,9 +38,8 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for (j=0; jtypename); @@ -68,15 +65,13 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); + for_each_online_cpu(j) + seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); seq_putc(p, '\n'); #endif seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 5bf17e41cd2..66c009e10ba 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -162,9 +162,7 @@ int __init check_nmi_watchdog (void) local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) - continue; + for_each_online_cpu(cpu) { if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { endflag = 1; printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", -- cgit v1.2.3 From 92c05fc1a32e5ccef5e0e8201f32dcdab041524c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 23 Mar 2006 14:35:12 -0800 Subject: [PATCH] PCI: Give PCI config access initialization a defined ordering I moved it to a separate function which is safer. This avoids problems with the linker reordering them and the less useful PCI config space access methods taking priority over the better ones. Fixes some problems with broken MMCONFIG Cc: Dave Hansen Signed-off-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- arch/x86_64/pci/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index a8f75a2a0f6..a3f6ad57017 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile @@ -7,7 +7,7 @@ CFLAGS += -Iarch/i386/pci obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o -obj-y += fixup.o +obj-y += fixup.o init.o obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special @@ -22,3 +22,4 @@ irq-y += ../../i386/pci/irq.o common-y += ../../i386/pci/common.o fixup-y += ../../i386/pci/fixup.o i386-y += ../../i386/pci/i386.o +init-y += ../../i386/pci/init.o -- cgit v1.2.3 From 3d1712c91df01d2573b934e972e231e8edb102c7 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Fri, 24 Mar 2006 03:15:11 -0800 Subject: [PATCH] x86_64: {set,clear,test}_bit() related cleanup and pci_mmcfg_init() fix While working on these patch set, I found several possible cleanup on x86-64 and ia64. akpm: I stole this from Andi's queue. Not only does it clean up bitops. It also unrelatedly changes the prototype of pci_mmcfg_init() and removes its arch_initcall(). It seems that the wrong two patches got joined together, but this is the one which has been tested. This patch fixes the current x86_64 build error (the pci_mmcfg_init() declaration in arch/i386/pci/pci.h disagrees with the definition in arch/x86_64/pci/mmconfig.c) This also means that x86_64's pci_mmcfg_init() gets called in the same (new) manner as x86's: from arch/i386/pci/init.c:pci_access_init(), rather than via initcall. The bitops cleanups came along for free. All this worked OK in -mm testing (since 2.6.16-rc4-mm1) because x86_64 was tested with both patches applied. Signed-off-by: Akinobu Mita Signed-off-by: Andi Kleen Cc: Con Kolivas Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 3 +-- arch/x86_64/kernel/setup.c | 3 +-- arch/x86_64/pci/mmconfig.c | 18 +++++++----------- 3 files changed, 9 insertions(+), 15 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index b8b9529fa89..04282ef9fbd 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -139,8 +139,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start) static int mce_available(struct cpuinfo_x86 *c) { - return test_bit(X86_FEATURE_MCE, &c->x86_capability) && - test_bit(X86_FEATURE_MCA, &c->x86_capability); + return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); } static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index aa55e3cec66..f227d0c23dc 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1344,8 +1344,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) { int i; for ( i = 0 ; i < 32*NCAPINTS ; i++ ) - if ( test_bit(i, &c->x86_capability) && - x86_cap_flags[i] != NULL ) + if (cpu_has(c, i) && x86_cap_flags[i] != NULL) seq_printf(m, " %s", x86_cap_flags[i]); } diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 18f371fe37f..e616500207e 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -55,7 +55,7 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { char __iomem *addr; - if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) + if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), fallback_slots)) return NULL; addr = get_virt(seg, bus); if (!addr) @@ -143,29 +143,29 @@ static __init void unreachable_devices(void) continue; addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); if (addr == NULL|| readl(addr) != val1) { - set_bit(i, &fallback_slots); + set_bit(i, fallback_slots); } } } -static int __init pci_mmcfg_init(void) +void __init pci_mmcfg_init(void) { int i; if ((pci_probe & PCI_PROBE_MMCONF) == 0) - return 0; + return; acpi_table_parse(ACPI_MCFG, acpi_parse_mcfg); if ((pci_mmcfg_config_num == 0) || (pci_mmcfg_config == NULL) || (pci_mmcfg_config[0].base_address == 0)) - return 0; + return; /* RED-PEN i386 doesn't do _nocache right now */ pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); if (pci_mmcfg_virt == NULL) { printk("PCI: Can not allocate memory for mmconfig structures\n"); - return 0; + return; } for (i = 0; i < pci_mmcfg_config_num; ++i) { pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; @@ -173,7 +173,7 @@ static int __init pci_mmcfg_init(void) if (!pci_mmcfg_virt[i].virt) { printk("PCI: Cannot map mmconfig aperture for segment %d\n", pci_mmcfg_config[i].pci_segment_group_number); - return 0; + return; } printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address); } @@ -182,8 +182,4 @@ static int __init pci_mmcfg_init(void) raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; - - return 0; } - -arch_initcall(pci_mmcfg_init); -- cgit v1.2.3 From cdb0452789d365695b5b173542af9c7e3d24f185 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 24 Mar 2006 03:15:57 -0800 Subject: [PATCH] kill include/linux/platform.h, default_idle() cleanup include/linux/platform.h contained nothing that was actually used except the default_idle() prototype, and is therefore removed by this patch. This patch does the following with the platform specific default_idle() functions on different architectures: - remove the unused function: - parisc - sparc64 - make the needlessly global function static: - arm - h8300 - m68k - m68knommu - s390 - v850 - x86_64 - add a prototype in asm/system.h: - cris - i386 - ia64 Signed-off-by: Adrian Bunk Acked-by: Patrick Mochel Acked-by: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 22a05dec81a..80a8f307917 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -114,7 +114,7 @@ void exit_idle(void) * We use this if we don't have any better * idle routine.. */ -void default_idle(void) +static void default_idle(void) { local_irq_enable(); -- cgit v1.2.3 From a94ddf3ab866df9c187c9d8b3870b7ec38c6f7ad Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 24 Mar 2006 03:18:05 -0800 Subject: [PATCH] early_printk: cleanup trailiing whitespace Remove all trailing tabs and spaces. No other changes. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/early_printk.c | 90 +++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index a8a6aa70d69..13af920b659 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c @@ -60,7 +60,7 @@ static struct console early_vga_console = { .index = -1, }; -/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ +/* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ static int early_serial_base = 0x3f8; /* ttyS0 */ @@ -80,30 +80,30 @@ static int early_serial_base = 0x3f8; /* ttyS0 */ #define DLL 0 /* Divisor Latch Low */ #define DLH 1 /* Divisor latch High */ -static int early_serial_putc(unsigned char ch) -{ - unsigned timeout = 0xffff; - while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) +static int early_serial_putc(unsigned char ch) +{ + unsigned timeout = 0xffff; + while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) cpu_relax(); outb(ch, early_serial_base + TXR); return timeout ? 0 : -1; -} +} static void early_serial_write(struct console *con, const char *s, unsigned n) { - while (*s && n-- > 0) { - early_serial_putc(*s); - if (*s == '\n') - early_serial_putc('\r'); - s++; - } -} + while (*s && n-- > 0) { + early_serial_putc(*s); + if (*s == '\n') + early_serial_putc('\r'); + s++; + } +} #define DEFAULT_BAUD 9600 static __init void early_serial_init(char *s) { - unsigned char c; + unsigned char c; unsigned divisor; unsigned baud = DEFAULT_BAUD; char *e; @@ -112,7 +112,7 @@ static __init void early_serial_init(char *s) ++s; if (*s) { - unsigned port; + unsigned port; if (!strncmp(s,"0x",2)) { early_serial_base = simple_strtoul(s, &e, 16); } else { @@ -136,16 +136,16 @@ static __init void early_serial_init(char *s) outb(0x3, early_serial_base + MCR); /* DTR + RTS */ if (*s) { - baud = simple_strtoul(s, &e, 0); - if (baud == 0 || s == e) + baud = simple_strtoul(s, &e, 0); + if (baud == 0 || s == e) baud = DEFAULT_BAUD; - } - - divisor = 115200 / baud; - c = inb(early_serial_base + LCR); - outb(c | DLAB, early_serial_base + LCR); - outb(divisor & 0xff, early_serial_base + DLL); - outb((divisor >> 8) & 0xff, early_serial_base + DLH); + } + + divisor = 115200 / baud; + c = inb(early_serial_base + LCR); + outb(c | DLAB, early_serial_base + LCR); + outb(divisor & 0xff, early_serial_base + DLL); + outb((divisor >> 8) & 0xff, early_serial_base + DLH); outb(c & ~DLAB, early_serial_base + LCR); } @@ -202,68 +202,68 @@ struct console *early_console = &early_vga_console; static int early_console_initialized = 0; void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; +{ + char buf[512]; + int n; va_list ap; - va_start(ap,fmt); + va_start(ap,fmt); n = vscnprintf(buf,512,fmt,ap); early_console->write(early_console,buf,n); - va_end(ap); -} + va_end(ap); +} static int __initdata keep_early; -int __init setup_early_printk(char *opt) -{ +int __init setup_early_printk(char *opt) +{ char *space; - char buf[256]; + char buf[256]; if (early_console_initialized) return -1; - strlcpy(buf,opt,sizeof(buf)); - space = strchr(buf, ' '); + strlcpy(buf,opt,sizeof(buf)); + space = strchr(buf, ' '); if (space) - *space = 0; + *space = 0; if (strstr(buf,"keep")) - keep_early = 1; + keep_early = 1; - if (!strncmp(buf, "serial", 6)) { + if (!strncmp(buf, "serial", 6)) { early_serial_init(buf + 6); early_console = &early_serial_console; - } else if (!strncmp(buf, "ttyS", 4)) { + } else if (!strncmp(buf, "ttyS", 4)) { early_serial_init(buf); - early_console = &early_serial_console; + early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3) && SCREEN_INFO.orig_video_isVGA == 1) { max_xpos = SCREEN_INFO.orig_video_cols; max_ypos = SCREEN_INFO.orig_video_lines; current_ypos = SCREEN_INFO.orig_y; - early_console = &early_vga_console; + early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { simnow_init(buf + 6); early_console = &simnow_console; keep_early = 1; } early_console_initialized = 1; - register_console(early_console); + register_console(early_console); return 0; } void __init disable_early_printk(void) -{ +{ if (!early_console_initialized || !early_console) return; if (!keep_early) { printk("disabling early console\n"); unregister_console(early_console); early_console_initialized = 0; - } else { + } else { printk("keeping early console\n"); } -} +} __setup("earlyprintk=", setup_early_printk); -- cgit v1.2.3 From c08b8a49100715b20e6f7c997e992428b5e06078 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Mar 2006 03:06:33 -0800 Subject: [PATCH] sys_alarm() unsigned signed conversion fixup alarm() calls the kernel with an unsigend int timeout in seconds. The value is stored in the tv_sec field of a struct timeval to setup the itimer. The tv_sec field of struct timeval is of type long, which causes the tv_sec value to be negative on 32 bit machines if seconds > INT_MAX. Before the hrtimer merge (pre 2.6.16) such a negative value was converted to the maximum jiffies timeout by the timeval_to_jiffies conversion. It's not clear whether this was intended or just happened to be done by the timeval_to_jiffies code. hrtimers expect a timeval in canonical form and treat a negative timeout as already expired. This breaks the legitimate usage of alarm() with a timeout value > INT_MAX seconds. For 32 bit machines it is therefor necessary to limit the internal seconds value to avoid API breakage. Instead of doing this in all implementations of sys_alarm the duplicated sys_alarm code is moved into a common function in itimer.c Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/sys_ia32.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 2bc55af9541..2b2d029f477 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -430,24 +430,12 @@ put_tv32(struct compat_timeval __user *o, struct timeval *i) return err; } -extern int do_setitimer(int which, struct itimerval *, struct itimerval *); +extern unsigned int alarm_setitimer(unsigned int seconds); asmlinkage long sys32_alarm(unsigned int seconds) { - struct itimerval it_new, it_old; - unsigned int oldalarm; - - it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0; - it_new.it_value.tv_sec = seconds; - it_new.it_value.tv_usec = 0; - do_setitimer(ITIMER_REAL, &it_new, &it_old); - oldalarm = it_old.it_value.tv_sec; - /* ehhh.. We can't return 0 if we have an alarm pending.. */ - /* And we'd better return too much than too little anyway */ - if (it_old.it_value.tv_usec) - oldalarm++; - return oldalarm; + return alarm_setitimer(seconds); } /* Translations due to time_t size differences. Which affects all -- cgit v1.2.3 From af8fc1f528fd744e0b92cdb981eec0c8841f6f61 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:00 +0100 Subject: [PATCH] x86_64: Update defconfig Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/defconfig | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index ce4de61ed85..566ecc97ee5 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.16-rc3-git9 -# Sat Feb 18 00:27:03 2006 +# Linux kernel version: 2.6.16-git9 +# Sat Mar 25 15:18:40 2006 # CONFIG_X86_64=y CONFIG_64BIT=y @@ -38,6 +38,7 @@ CONFIG_SYSCTL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y # CONFIG_CPUSETS is not set +# CONFIG_RELAY is not set CONFIG_INITRAMFS_SOURCE="" CONFIG_UID16=y CONFIG_VM86=y @@ -79,6 +80,7 @@ CONFIG_STOP_MACHINE=y # Block layer # CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set # # IO Schedulers @@ -139,7 +141,6 @@ CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y CONFIG_NR_CPUS=32 CONFIG_HOTPLUG_CPU=y CONFIG_HPET_TIMER=y -CONFIG_X86_PM_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_GART_IOMMU=y CONFIG_SWIOTLB=y @@ -148,12 +149,13 @@ CONFIG_X86_MCE_INTEL=y CONFIG_X86_MCE_AMD=y # CONFIG_KEXEC is not set # CONFIG_CRASH_DUMP is not set -CONFIG_PHYSICAL_START=0x100000 +CONFIG_PHYSICAL_START=0x200000 CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_1000 is not set CONFIG_HZ=250 +# CONFIG_REORDER is not set CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y CONFIG_ISA_DMA_API=y @@ -189,12 +191,14 @@ CONFIG_ACPI_NUMA=y # CONFIG_ACPI_ASUS is not set # CONFIG_ACPI_IBM is not set CONFIG_ACPI_TOSHIBA=y -CONFIG_ACPI_BLACKLIST_YEAR=2001 +CONFIG_ACPI_BLACKLIST_YEAR=0 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y CONFIG_ACPI_SYSTEM=y +CONFIG_X86_PM_TIMER=y CONFIG_ACPI_CONTAINER=y +CONFIG_ACPI_HOTPLUG_MEMORY=y # # CPU Frequency scaling @@ -232,10 +236,8 @@ CONFIG_X86_ACPI_CPUFREQ_PROC_INTF=y CONFIG_PCI=y CONFIG_PCI_DIRECT=y CONFIG_PCI_MMCONFIG=y -CONFIG_UNORDERED_IO=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_MSI=y -# CONFIG_PCI_LEGACY_PROC is not set # CONFIG_PCI_DEBUG is not set # @@ -294,6 +296,7 @@ CONFIG_INET_TCP_DIAG=y CONFIG_TCP_CONG_BIC=y CONFIG_IPV6=y # CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set @@ -701,6 +704,7 @@ CONFIG_S2IO=m # Wireless LAN (non-hamradio) # # CONFIG_NET_RADIO is not set +# CONFIG_NET_WIRELESS_RTNETLINK is not set # # Wan interfaces @@ -861,6 +865,8 @@ CONFIG_RTC=y CONFIG_AGP=y CONFIG_AGP_AMD64=y CONFIG_AGP_INTEL=y +# CONFIG_AGP_SIS is not set +# CONFIG_AGP_VIA is not set # CONFIG_DRM is not set # CONFIG_MWAVE is not set CONFIG_RAW_DRIVER=y @@ -906,10 +912,6 @@ CONFIG_HWMON=y # # CONFIG_IBM_ASM is not set -# -# Multimedia Capabilities Port drivers -# - # # Multimedia devices # @@ -974,6 +976,7 @@ CONFIG_SOUND_ICH=y # CONFIG_USB_ARCH_HAS_HCD=y CONFIG_USB_ARCH_HAS_OHCI=y +CONFIG_USB_ARCH_HAS_EHCI=y CONFIG_USB=y # CONFIG_USB_DEBUG is not set @@ -1002,7 +1005,6 @@ CONFIG_USB_UHCI_HCD=y # # USB Device Class drivers # -# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set # CONFIG_USB_ACM is not set CONFIG_USB_PRINTER=y @@ -1121,11 +1123,7 @@ CONFIG_USB_MON=y # CONFIG_INFINIBAND is not set # -# SN Devices -# - -# -# EDAC - error detection and reporting (RAS) +# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) # # CONFIG_EDAC is not set @@ -1198,7 +1196,6 @@ CONFIG_TMPFS=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_RAMFS=y -CONFIG_RELAYFS_FS=y # CONFIG_CONFIGFS_FS is not set # @@ -1321,6 +1318,7 @@ CONFIG_DETECT_SOFTLOCKUP=y CONFIG_DEBUG_FS=y # CONFIG_DEBUG_VM is not set # CONFIG_FRAME_POINTER is not set +# CONFIG_UNWIND_INFO is not set # CONFIG_FORCED_INLINING is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_DEBUG_RODATA is not set -- cgit v1.2.3 From c7ea1a96ec007ba761c9d5d11d788cd8fdd5c8b6 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:03 +0100 Subject: [PATCH] x86_64: Use correct PUD for memory hotadd Memory >39bits has a different PUD. Cc: "Tolentino, Matthew E" Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 40ed13d263c..675a4569133 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -344,7 +344,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) pud_t *pud; if (after_bootmem) - pud = pud_offset_k(pgd, __PAGE_OFFSET); + pud = pud_offset_k(pgd, start & PGDIR_MASK); else pud = alloc_low_page(&map, &pud_phys); -- cgit v1.2.3 From dca99a38bccceda9e079d4c95abefbd9028605fe Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:06 +0100 Subject: [PATCH] x86-64: Use -mtune=generic for generic kernels The upcomming gcc 4.2 got a new option -mtune=generic to tune code for both common AMD and Intel CPUs. Use this option when available for generic kernels. On x86-64 it is used with CONFIG_GENERIC_CPU. On i386 it is enabled with CONFIG_X86_GENERIC. It won't affect the base line CPU support in any ways and also not the minimum supported CPU. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 7405dfd6522..f5e48ba9bec 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -29,6 +29,7 @@ CHECKFLAGS += -D__x86_64__ -m64 cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) +cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) CFLAGS += $(cflags-y) CFLAGS += -m64 -- cgit v1.2.3 From abe059e7590fd4475285f2d037c70dec712a4572 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:12 +0100 Subject: [PATCH] x86_64: Rename struct node in x86-64 NUMA code to struct bootnode It conflicts with the struct node in node.h Actually the x86-64 version was there first, but .. Suggested by Jan Beulich Cc: jbeulich@novell.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/mm/k8topology.c | 2 +- arch/x86_64/mm/numa.c | 6 +++--- arch/x86_64/mm/srat.c | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index dd60e71fdba..7c45c2d2b8b 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c @@ -43,7 +43,7 @@ static __init int find_northbridge(void) int __init k8_scan_nodes(unsigned long start, unsigned long end) { unsigned long prevbase; - struct node nodes[8]; + struct bootnode nodes[8]; int nodeid, i, nb; unsigned char nodeids[8]; int found = 0; diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 22e51beee8d..e4b62753a19 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -47,7 +47,7 @@ int numa_off __initdata; * -1 if node overlap or lost ram (shift too big) */ static int __init -populate_memnodemap(const struct node *nodes, int numnodes, int shift) +populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) { int i; int res = -1; @@ -74,7 +74,7 @@ populate_memnodemap(const struct node *nodes, int numnodes, int shift) return res; } -int __init compute_hash_shift(struct node *nodes, int numnodes) +int __init compute_hash_shift(struct bootnode *nodes, int numnodes) { int shift = 20; @@ -191,7 +191,7 @@ int numa_fake __initdata = 0; static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) { int i; - struct node nodes[MAX_NUMNODES]; + struct bootnode nodes[MAX_NUMNODES]; unsigned long sz = ((end_pfn - start_pfn)<start == nd->end) continue; if (nd->end > start && nd->start < end) @@ -70,7 +70,7 @@ static __init int conflicting_nodes(unsigned long start, unsigned long end) static __init void cutoff_node(int i, unsigned long start, unsigned long end) { - struct node *nd = &nodes[i]; + struct bootnode *nd = &nodes[i]; if (nd->start < start) { nd->start = start; if (nd->end < nd->start) @@ -159,7 +159,7 @@ acpi_numa_processor_affinity_init(struct acpi_table_processor_affinity *pa) void __init acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) { - struct node *nd; + struct bootnode *nd; unsigned long start, end; int node, pxm; int i; -- cgit v1.2.3 From 3240114d23793384fa9c3c53f391f672d1c22d2e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:16 +0100 Subject: [PATCH] x86_64: cpu_pda array to macro followup correction Fix one place where the previous change of cpu_pda from being an array to being a macro was not properly carried out. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 70f1bb808a2..349d55c2ac4 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -248,7 +248,7 @@ void __cpuinit cpu_init (void) switch (v + 1) { #if DEBUG_STKSZ > EXCEPTION_STKSZ case DEBUG_STACK: - cpu_pda[cpu].debugstack = (unsigned long)estacks; + cpu_pda(cpu)->debugstack = (unsigned long)estacks; estacks += DEBUG_STKSZ; break; #endif -- cgit v1.2.3 From 893efca9279d7a78bae6532de0524e53370819d5 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:19 +0100 Subject: [PATCH] x86_64: disallow multi-byte hardware execution breakpoints While AMD formally permits multi-byte execution breakpoints, Intel disallows 8-byte as much as 2- or 4-byte ones. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/ptrace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index 53205622351..d44b2c1e63a 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c @@ -420,9 +420,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case offsetof(struct user, u_debugreg[7]): /* See arch/i386/kernel/ptrace.c for an explanation of * this awkward check.*/ - data &= ~DR_CONTROL_RESERVED; - for(i=0; i<4; i++) - if ((0x5454 >> ((data >> (16 + 4*i)) & 0xf)) & 1) + data &= ~DR_CONTROL_RESERVED; + for(i=0; i<4; i++) + if ((0x5554 >> ((data >> (16 + 4*i)) & 0xf)) & 1) break; if (i == 4) { child->thread.debugreg7 = data; -- cgit v1.2.3 From 2b514e74f4e59e3b8e54891580fef2c9ff6c7bd0 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:22 +0100 Subject: [PATCH] x86_64: eliminate set_debug() For consistency and to have only a single place of definition, replace set_debug() uses with set_debugreg(), and eliminate the definition of thj former. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 2 +- arch/x86_64/kernel/setup64.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 80a8f307917..0cb93abbf56 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -508,7 +508,7 @@ out: /* * This special macro can be used to load a debugging register */ -#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r) +#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r) /* * switch_to(x,y) should switch tasks from x to y. diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 349d55c2ac4..044587812b0 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -281,12 +281,12 @@ void __cpuinit cpu_init (void) * Clear all 6 debug registers: */ - set_debug(0UL, 0); - set_debug(0UL, 1); - set_debug(0UL, 2); - set_debug(0UL, 3); - set_debug(0UL, 6); - set_debug(0UL, 7); + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); + set_debugreg(0UL, 6); + set_debugreg(0UL, 7); fpu_init(); } -- cgit v1.2.3 From 45948d7720e8bd0104696087c6305fdd90e3b298 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:25 +0100 Subject: [PATCH] x86_64: save FPU context slightly later Touching of the floating point state in a kernel debugger must be NMI-safe, specifically math_state_restore() must be able to deal with being called out of an NMI context. In order to do that reliably, the context switch code must take care to not leave a window open where the current task's TS_USEDFPU flag and CR0.TS could get out of sync. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 0cb93abbf56..81111835722 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -527,8 +527,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(init_tss, cpu); - unlazy_fpu(prev_p); - /* * Reload esp0, LDT and the page table pointer: */ @@ -586,11 +584,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) } /* - * Switch the PDA context. + * Switch the PDA and FPU contexts. */ prev->userrsp = read_pda(oldrsp); write_pda(oldrsp, next->userrsp); write_pda(pcurrent, next_p); + /* This must be here to ensure both math_state_restore() and + kernel_fpu_begin() work consistently. */ + unlazy_fpu(prev_p); write_pda(kernelstack, task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); -- cgit v1.2.3 From 51f62e186b78f8743246a349b09be370c8735479 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Mar 2006 16:29:28 +0100 Subject: [PATCH] x86_64: cleanup allocating logical cpu numbers in x86_64 Minor cleanup to lend better for physical CPU hotplug. Earlier way of using num_processors as index doesnt fit if CPUs come and go. This makes the code little bit better to read, and helps physical hotplug use the same functions as boot. Reserving CPU0 for BSP is too late to be done in smp_prepare_boot_cpu(). Since logical assignments from MADT is already done via setup_arch()->acpi_boot_init()->parse lapic Signed-off-by: Ashok Raj Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mpparse.c | 19 ++++++++----------- arch/x86_64/kernel/setup.c | 6 ++++++ 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 9013a90b5c2..b17cf3eba35 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -106,11 +106,11 @@ static int __init mpf_checksum(unsigned char *mp, int len) return sum & 0xFF; } -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info (struct mpc_config_processor *m) { int cpu; unsigned char ver; - static int found_bsp=0; + cpumask_t tmp_map; if (!(m->mpc_cpuflag & CPU_ENABLED)) { disabled_cpus++; @@ -133,8 +133,10 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - cpu = num_processors++; - + num_processors++; + cpus_complement(tmp_map, cpu_present_map); + cpu = first_cpu(tmp_map); + #if MAX_APICS < 255 if ((int)m->mpc_apicid > MAX_APICS) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", @@ -160,12 +162,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) * entry is BSP, and so on. */ cpu = 0; - - bios_cpu_apicid[0] = m->mpc_apicid; - x86_cpu_to_apicid[0] = m->mpc_apicid; - found_bsp = 1; - } else - cpu = num_processors - found_bsp; + } bios_cpu_apicid[cpu] = m->mpc_apicid; x86_cpu_to_apicid[cpu] = m->mpc_apicid; @@ -691,7 +688,7 @@ void __init mp_register_lapic_address ( } -void __init mp_register_lapic ( +void __cpuinit mp_register_lapic ( u8 id, u8 enabled) { diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index f227d0c23dc..50cd2bbd75a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -708,6 +708,12 @@ void __init setup_arch(char **cmdline_p) check_ioapic(); + /* + * set this early, so we dont allocate cpu0 + * if MADT list doesnt list BSP first + * mpparse.c/MP_processor_info() allocates logical cpu numbers. + */ + cpu_set(0, cpu_present_map); #ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. -- cgit v1.2.3 From 9ede6b0945223d1e353501f41b988a3db56b4e27 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:31 +0100 Subject: [PATCH] x86_64: Don't need to read PIT in timer handler when PM timer is used The PM timer path through main_timer_handler doesn't need the delay variable because it figures it out in a different way. Don't try to read it from the PIT. With stopped PIT timer it is even useless. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index ee5ce3d3cbc..86039baaaf1 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -354,7 +354,7 @@ void main_timer_handler(struct pt_regs *regs) { static unsigned long rtc_update = 0; unsigned long tsc; - int delay, offset = 0, lost = 0; + int delay = 0, offset = 0, lost = 0; /* * Here we are in the timer irq handler. We have irqs locally disabled (so we @@ -375,7 +375,7 @@ void main_timer_handler(struct pt_regs *regs) */ offset = hpet_readl(HPET_T0_CMP) - hpet_tick; delay = hpet_readl(HPET_COUNTER) - offset; - } else { + } else if (!pmtmr_ioport) { spin_lock(&i8253_lock); outb_p(0x00, 0x43); delay = inb_p(0x40); -- cgit v1.2.3 From 77d910f557c44db0fa72ee856c41cdd56972cfca Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:34 +0100 Subject: [PATCH] x86_64: Report local APIC ID when initializing CPU Makes some debugging easier. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 50cd2bbd75a..6ca07eaa26d 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -878,8 +878,8 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) } numa_set_node(cpu, node); - printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", - cpu, c->x86_max_cores, node, cpu_core_id[cpu]); + printk(KERN_INFO "CPU %d/%x(%d) -> Node %d -> Core %d\n", + cpu, apicid, c->x86_max_cores, node, cpu_core_id[cpu]); #endif #endif } -- cgit v1.2.3 From 85f9eebccde51e24896f31383f5b70776362e1a6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:37 +0100 Subject: [PATCH] x86_64: Use cpu_relax in poll loop in GART IOMMU The code waits for the GART to clear the TLB flush bit. Use cpu_relax in this time to allow hypervisors to yield the CPU in this time. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-gart.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 0c3f052ba6c..ebd125e968b 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -148,9 +148,12 @@ static void flush_gart(struct device *dev) if (!northbridges[i]) continue; /* Make sure the hardware actually executed the flush. */ - do { + for (;;) { pci_read_config_dword(northbridges[i], 0x9c, &w); - } while (w & 1); + if (!(w & 1)) + break; + cpu_relax(); + } } if (!flushed) printk("nothing to flush?\n"); -- cgit v1.2.3 From 8c914cb704a11460eec7ed2a572bb5e9bd513d24 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:40 +0100 Subject: [PATCH] x86_64: actively synchronize vmalloc area when registering certain callbacks While the modular aspect of the respective i386 patch doesn't apply to x86-64 (as the top level page directory entry is shared between modules and the base kernel), handlers registered with register_die_notifier() are still under similar constraints for touching ioremap()ed or vmalloc()ed memory. The likelihood of this problem becoming visible is of course significantly lower, as the assigned virtual addresses would have to cross a 2**39 byte boundary. This is because the callback gets invoked (a) in the page fault path before the top level page table propagation gets carried out (hence a fault to propagate the top level page table entry/entries mapping to module's code/data would nest infinitly) and (b) in the NMI path, where nested faults must absolutely not happen, since otherwise the IRET from the nested fault re-enables NMIs, potentially resulting in nested NMI occurences. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/nmi.c | 1 + arch/x86_64/kernel/traps.c | 2 ++ arch/x86_64/mm/fault.c | 73 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 63 insertions(+), 13 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 66c009e10ba..d9e4067faf0 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -534,6 +534,7 @@ asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 28d50dc540e..b25bc904d42 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -78,6 +78,8 @@ int register_die_notifier(struct notifier_block *nb) { int err = 0; unsigned long flags; + + vmalloc_sync_all(); spin_lock_irqsave(&die_notifier_lock, flags); err = notifier_chain_register(&die_chain, nb); spin_unlock_irqrestore(&die_notifier_lock, flags); diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 2e7c3c8ffe0..de91e17daf6 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -264,6 +264,8 @@ static int vmalloc_fault(unsigned long address) return -1; if (pgd_none(*pgd)) set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); /* Below here mismatches are bugs because these lower tables are shared */ @@ -314,16 +316,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, /* get the address */ __asm__("movq %%cr2,%0":"=r" (address)); - if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, - SIGSEGV) == NOTIFY_STOP) - return; - - if (likely(regs->eflags & X86_EFLAGS_IF)) - local_irq_enable(); - - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); tsk = current; mm = tsk->mm; @@ -351,10 +343,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, */ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && ((address >= VMALLOC_START && address < VMALLOC_END))) { - if (vmalloc_fault(address) < 0) - goto bad_area_nosemaphore; - return; + if (vmalloc_fault(address) >= 0) + return; } + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -362,6 +356,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, goto bad_area_nosemaphore; } + if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, + SIGSEGV) == NOTIFY_STOP) + return; + + if (likely(regs->eflags & X86_EFLAGS_IF)) + local_irq_enable(); + + if (unlikely(page_fault_trace)) + printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", + regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); + if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -571,6 +576,48 @@ do_sigbus: return; } +DEFINE_SPINLOCK(pgd_lock); +struct page *pgd_list; + +void vmalloc_sync_all(void) +{ + /* Note that races in the updates of insync and start aren't + problematic: + insync can only get set bits added, and updates to start are only + improving performance (without affecting correctness if undone). */ + static DECLARE_BITMAP(insync, PTRS_PER_PGD); + static unsigned long start = VMALLOC_START & PGDIR_MASK; + unsigned long address; + + for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { + if (!test_bit(pgd_index(address), insync)) { + const pgd_t *pgd_ref = pgd_offset_k(address); + struct page *page; + + if (pgd_none(*pgd_ref)) + continue; + spin_lock(&pgd_lock); + for (page = pgd_list; page; + page = (struct page *)page->index) { + pgd_t *pgd; + pgd = (pgd_t *)page_address(page) + pgd_index(address); + if (pgd_none(*pgd)) + set_pgd(pgd, *pgd_ref); + else + BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref)); + } + spin_unlock(&pgd_lock); + set_bit(pgd_index(address), insync); + } + if (address == start) + start = address + PGDIR_SIZE; + } + /* Check that there is no need to do the same for the modules area. */ + BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); + BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == + (__START_KERNEL & PGDIR_MASK))); +} + static int __init enable_pagefaulttrace(char *str) { page_fault_trace = 1; -- cgit v1.2.3 From 86ebcea899ff01274c1e8e15bf1d1f1cf5fac471 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:29:43 +0100 Subject: [PATCH] x86_64: remove dead do_softirq_thunk Appearantly a left-over... Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/x8664_ksyms.c | 3 --- arch/x86_64/lib/thunk.S | 1 - 2 files changed, 4 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index c9dc7e46731..365e5fbae9a 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -151,9 +151,6 @@ EXPORT_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(smp_num_siblings); #endif -extern void do_softirq_thunk(void); -EXPORT_SYMBOL(do_softirq_thunk); - #ifdef CONFIG_BUG EXPORT_SYMBOL(out_of_line_bug); #endif diff --git a/arch/x86_64/lib/thunk.S b/arch/x86_64/lib/thunk.S index acc1e2ca7ed..e49af0032e9 100644 --- a/arch/x86_64/lib/thunk.S +++ b/arch/x86_64/lib/thunk.S @@ -42,7 +42,6 @@ thunk rwsem_wake_thunk,rwsem_wake thunk rwsem_downgrade_thunk,rwsem_downgrade_wake #endif - thunk do_softirq_thunk,do_softirq thunk __down_failed,__down thunk_retrax __down_failed_interruptible,__down_interruptible -- cgit v1.2.3 From 681558fdb5848f0a6dc248108f0f7323f7380857 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:46 +0100 Subject: [PATCH] x86_64: Check that early arguments are words on their own We've always had the problem that arguments only did a prefix match, which resulted e.g. in noapic and noapictimer getting confused. Fix the early argument parsing code to always check that arguments are whole words (except for those that take additional arguments of course) I factored out the checking code for that while also makes the code easier to maintain. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 47 ++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 6ca07eaa26d..01e2de4d254 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -270,6 +271,13 @@ static void __init probe_roms(void) } } +/* Check for full argument with no trailing characters */ +static int fullarg(char *p, char *arg) +{ + int l = strlen(arg); + return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l])); +} + static __init void parse_cmdline_early (char ** cmdline_p) { char c = ' ', *to = command_line, *from = COMMAND_LINE; @@ -293,10 +301,10 @@ static __init void parse_cmdline_early (char ** cmdline_p) #endif #ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter init */ - if (!memcmp(from, "acpi=off", 8)) + if (fullarg(from,"acpi=off")) disable_acpi(); - if (!memcmp(from, "acpi=force", 10)) { + if (fullarg(from, "acpi=force")) { /* add later when we do DMI horrors: */ acpi_force = 1; acpi_disabled = 0; @@ -304,52 +312,47 @@ static __init void parse_cmdline_early (char ** cmdline_p) /* acpi=ht just means: do ACPI MADT parsing at bootup, but don't enable the full ACPI interpreter */ - if (!memcmp(from, "acpi=ht", 7)) { + if (fullarg(from, "acpi=ht")) { if (!acpi_force) disable_acpi(); acpi_ht = 1; } - else if (!memcmp(from, "pci=noacpi", 10)) + else if (fullarg(from, "pci=noacpi")) acpi_disable_pci(); - else if (!memcmp(from, "acpi=noirq", 10)) + else if (fullarg(from, "acpi=noirq")) acpi_noirq_set(); - else if (!memcmp(from, "acpi_sci=edge", 13)) + else if (fullarg(from, "acpi_sci=edge")) acpi_sci_flags.trigger = 1; - else if (!memcmp(from, "acpi_sci=level", 14)) + else if (fullarg(from, "acpi_sci=level")) acpi_sci_flags.trigger = 3; - else if (!memcmp(from, "acpi_sci=high", 13)) + else if (fullarg(from, "acpi_sci=high")) acpi_sci_flags.polarity = 1; - else if (!memcmp(from, "acpi_sci=low", 12)) + else if (fullarg(from, "acpi_sci=low")) acpi_sci_flags.polarity = 3; /* acpi=strict disables out-of-spec workarounds */ - else if (!memcmp(from, "acpi=strict", 11)) { + else if (fullarg(from, "acpi=strict")) { acpi_strict = 1; } #ifdef CONFIG_X86_IO_APIC - else if (!memcmp(from, "acpi_skip_timer_override", 24)) + else if (fullarg(from, "acpi_skip_timer_override")) acpi_skip_timer_override = 1; #endif #endif - if (!memcmp(from, "disable_timer_pin_1", 19)) + if (fullarg(from, "disable_timer_pin_1")) disable_timer_pin_1 = 1; - if (!memcmp(from, "enable_timer_pin_1", 18)) + if (fullarg(from, "enable_timer_pin_1")) disable_timer_pin_1 = -1; - if (!memcmp(from, "nolapic", 7) || - !memcmp(from, "disableapic", 11)) + if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) disable_apic = 1; - /* Don't confuse with noapictimer */ - if (!memcmp(from, "noapic", 6) && - (from[6] == ' ' || from[6] == 0)) + if (fullarg(from, "noapic")) skip_ioapic_setup = 1; - /* Make sure to not confuse with apic= */ - if (!memcmp(from, "apic", 4) && - (from[4] == ' ' || from[4] == 0)) { + if (fullarg(from,"apic")) { skip_ioapic_setup = 0; ioapic_force = 1; } @@ -388,7 +391,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) iommu_setup(from+6); } - if (!memcmp(from,"oops=panic", 10)) + if (fullarg(from,"oops=panic")) panic_on_oops = 1; if (!memcmp(from, "noexec=", 7)) -- cgit v1.2.3 From 2b692a872c21849edb0a398937e31991526a9216 Mon Sep 17 00:00:00 2001 From: Roberto Nibali Date: Sat, 25 Mar 2006 16:29:55 +0100 Subject: [PATCH] x86_64: Clean up white space in traps.c Attached is a small code style cleanup patch that resulted from my skimming through the arch/x86_64/kernel/traps.c code to figure out what went haywire. Signed-off-by: Roberto Nibali Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/traps.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index b25bc904d42..05dbbf40ccf 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -124,7 +124,7 @@ int printk_address(unsigned long address) if (!modname) modname = delim = ""; return printk("<%016lx>{%s%s%s%s%+ld}", - address,delim,modname,delim,symname,offset); + address, delim, modname, delim, symname, offset); } #else int printk_address(unsigned long address) @@ -336,13 +336,12 @@ void show_registers(struct pt_regs *regs) show_stack(NULL, (unsigned long*)rsp); printk("\nCode: "); - if(regs->rip < PAGE_OFFSET) + if (regs->rip < PAGE_OFFSET) goto bad; - for(i=0;i<20;i++) - { + for (i=0; i<20; i++) { unsigned char c; - if(__get_user(c, &((unsigned char*)regs->rip)[i])) { + if (__get_user(c, &((unsigned char*)regs->rip)[i])) { bad: printk(" Bad RIP value."); break; @@ -481,7 +480,7 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, printk(KERN_INFO "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, str, - regs->rip,regs->rsp,error_code); + regs->rip, regs->rsp, error_code); if (info) force_sig_info(signr, info, tsk); @@ -495,9 +494,9 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->rip); - if (fixup) { + if (fixup) regs->rip = fixup->fixup; - } else + else die(str, regs, error_code); return; } @@ -570,7 +569,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, printk(KERN_INFO "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", tsk->comm, tsk->pid, - regs->rip,regs->rsp,error_code); + regs->rip, regs->rsp, error_code); force_sig(SIGSEGV, tsk); return; -- cgit v1.2.3 From 1f50249e940baa7133e0bdb32cd564bb3ba28456 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:29:58 +0100 Subject: [PATCH] x86_64: Make pfn_valid work early in boot It needs num_physpages, so initialize it early. It's later overwritten again. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 01e2de4d254..ad3b7fc24a0 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -614,6 +614,7 @@ void __init setup_arch(char **cmdline_p) * we are rounding upwards: */ end_pfn = e820_end_of_ram(); + num_physpages = end_pfn; /* for pfn_valid */ check_efer(); -- cgit v1.2.3 From e57113bc1ff591005ec0b0fb4885d97c01de73d8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 25 Mar 2006 16:30:01 +0100 Subject: [PATCH] x86_64: miscellaneous cleanup - adjust limits of GDT/IDT pseudo-descriptors (some were off by one) - move empty_zero_page into .bss.page_aligned - move cpu_gdt_table into .data.page_aligned - move idt_table into .bss - align inital_code and init_rsp - eliminate pointless (re-)declaration of idt_table in traps.c Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/head.S | 25 ++++++++++++++----------- arch/x86_64/kernel/setup64.c | 2 +- arch/x86_64/kernel/traps.c | 2 -- 3 files changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 02fc7fa0ea2..0cddcc03842 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -192,7 +192,8 @@ startup_64: movq initial_code(%rip),%rax jmp *%rax - /* SMP bootup changes these two */ + /* SMP bootup changes these two */ + .align 8 .globl initial_code initial_code: .quad x86_64_start_kernel @@ -237,7 +238,7 @@ ENTRY(no_long_mode) .org 0xf00 .globl pGDT32 pGDT32: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 .long cpu_gdt_table-__START_KERNEL_map .org 0xf10 @@ -293,8 +294,6 @@ NEXT_PAGE(level2_kernel_pgt) /* Module mapping starts here */ .fill 492,8,0 -NEXT_PAGE(empty_zero_page) - NEXT_PAGE(level3_physmem_pgt) .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ .fill 511,8,0 @@ -337,7 +336,7 @@ ENTRY(boot_level4_pgt) .align 16 .globl cpu_gdt_descr cpu_gdt_descr: - .word gdt_end-cpu_gdt_table + .word gdt_end-cpu_gdt_table-1 gdt: .quad cpu_gdt_table #ifdef CONFIG_SMP @@ -352,7 +351,8 @@ gdt: * Also sysret mandates a special GDT layout */ -.align PAGE_SIZE + .section .data.page_aligned, "aw" + .align PAGE_SIZE /* The TLS descriptors are currently at a different place compared to i386. Hopefully nobody expects them at a fixed place (Wine?) */ @@ -378,9 +378,12 @@ gdt_end: /* zero the remaining page */ .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 -ENTRY(idt_table) - .rept 256 - .quad 0 - .quad 0 - .endr + .section .bss, "aw", @nobits + .align L1_CACHE_BYTES +ENTRY(idt_table) + .skip 256 * 16 + .section .bss.page_aligned, "aw", @nobits + .align PAGE_SIZE +ENTRY(empty_zero_page) + .skip PAGE_SIZE diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 044587812b0..356b462cb0e 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -33,7 +33,7 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; -struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; +struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 05dbbf40ccf..7b148309c52 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -47,8 +47,6 @@ #include #include -extern struct gate_struct idt_table[256]; - asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); -- cgit v1.2.3 From 60c1bc82d9d42486e2b96de668b0b49fadaffd12 Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Sat, 25 Mar 2006 16:30:04 +0100 Subject: [PATCH] x86_64: to use lapic ids instead of initial apic ids phys_proc_id[] on AMD boxes is right now populated with the initial apic id, obtained by the cpuid instruction. But, the initial apic id need not be the local apic id on clustered APIC systems (see comment at x86_64/kernel/genapic_cluster.c, line 110). On vSMPowered with AMD CPUs the cpu_to_node will turn out to be incorrect (as apicid_to_node[] is indexed by the initial apic id rather than the local apic id). On vSMPowered boxes with Intel CPUs this is working correctly as phys_proc_id[] is initialized correctly in detect_ht(). This fixes AMD boot path according to specification, to use the correct routines for local apic id and socket ids. We use hard_smp_processor_id() to read the local apic id, and phys_pkg_id() to determine socket id for phys_proc_id[] Patch tested on Tyan multicore boxes as well as vSMPowered boxes. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index ad3b7fc24a0..49f285871df 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -846,7 +846,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) unsigned bits; #ifdef CONFIG_NUMA int node = 0; - unsigned apicid = phys_proc_id[cpu]; + unsigned apicid = hard_smp_processor_id(); #endif bits = 0; @@ -856,7 +856,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) /* Low order bits define the core id (index of core in socket) */ cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1); /* Convert the APIC ID into the socket ID */ - phys_proc_id[cpu] >>= bits; + phys_proc_id[cpu] = phys_pkg_id(bits); #ifdef CONFIG_NUMA node = phys_proc_id[cpu]; -- cgit v1.2.3 From a9ba9a3b3897561d01e04cd21433746df46548c0 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 25 Mar 2006 16:30:10 +0100 Subject: [PATCH] x86_64: prefetch the mmap_sem in the fault path In a micro-benchmark that stresses the pagefault path, the down_read_trylock on the mmap_sem showed up quite high on the profile. Turns out this lock is bouncing between cpus quite a bit and thus is cache-cold a lot. This patch prefetches the lock (for write) as early as possible (and before some other somewhat expensive operations). With this patch, the down_read_trylock basically fell out of the top of profile. Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/mm/fault.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index de91e17daf6..316c53de47b 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c @@ -314,11 +314,13 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, unsigned long flags; siginfo_t info; + tsk = current; + mm = tsk->mm; + prefetchw(&mm->mmap_sem); + /* get the address */ __asm__("movq %%cr2,%0":"=r" (address)); - tsk = current; - mm = tsk->mm; info.si_code = SEGV_MAPERR; -- cgit v1.2.3 From 04103609322daca31197068e310ba8037ecd3470 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:13 +0100 Subject: [PATCH] x86_64: Move kernel to 2MB As suggested by Andi (and Alan), move the default kernel location from 1Mb to 2Mb, to align to the start of a TLB entry. Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index e18eb79bf85..9fad9df8662 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -429,10 +429,10 @@ config CRASH_DUMP config PHYSICAL_START hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) default "0x1000000" if CRASH_DUMP - default "0x100000" + default "0x200000" help This gives the physical address where the kernel is loaded. Normally - for regular kernels this value is 0x100000 (1MB). But in the case + for regular kernels this value is 0x200000 (2MB). But in the case of kexec on panic the fail safe kernel needs to run at a different address than the panic-ed kernel. This option is used to set the load address for kernels used to capture crash dump on being kexec'ed -- cgit v1.2.3 From e6fc99c6aba0350a3c4c0206b7047d4893491485 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 25 Mar 2006 16:30:16 +0100 Subject: [PATCH] x86_64: s/Overwrite/Override/ in arch/x86-64 s/Overwrite/Override/ Signed-off-by: Dave Jones Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/ia32_binfmt.c | 2 +- arch/x86_64/kernel/io_apic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 572b3b28772..e776139afb2 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -58,7 +58,7 @@ struct elf_phdr; #define USE_ELF_CORE_DUMP 1 -/* Overwrite elfcore.h */ +/* Override elfcore.h */ #define _LINUX_ELFCORE_H 1 typedef unsigned int elf_greg_t; diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index ffed464e6b1..f264d9dc97f 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -310,7 +310,7 @@ void __init check_ioapic(void) force_iommu) && !iommu_aperture_allowed) { printk(KERN_INFO - "Looks like a VIA chipset. Disabling IOMMU. Overwrite with \"iommu=allowed\"\n"); + "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n"); iommu_aperture_disabled = 1; } #endif -- cgit v1.2.3 From f2d3efedbecc04dc348d723e4c90b46731b3bb48 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:22 +0100 Subject: [PATCH] x86_64: Implement early DMI scanning There are more and more cases where we need to know DMI information early to work around bugs. i386 already had early DMI scanning, but x86-64 didn't. Implement this now. This required some cleanup in the i386 code. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 16 +++++++++------- arch/x86_64/mm/init.c | 27 +++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 49f285871df..8acf5313e83 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -68,6 +68,7 @@ #include #include #include +#include /* * Machine setup.. @@ -92,6 +93,12 @@ int bootloader_type; unsigned long saved_video_mode; +/* + * Early DMI memory + */ +int dmi_alloc_index; +char dmi_alloc_data[DMI_MAX_DATA]; + /* * Setup options */ @@ -620,6 +627,8 @@ void __init setup_arch(char **cmdline_p) init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); + dmi_scan_machine(); + zap_low_mappings(0); #ifdef CONFIG_ACPI @@ -1412,10 +1421,3 @@ struct seq_operations cpuinfo_op = { .show = show_cpuinfo, }; -static int __init run_dmi_scan(void) -{ - dmi_scan_machine(); - return 0; -} -core_initcall(run_dmi_scan); - diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 675a4569133..54c7f5975b4 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -225,6 +225,33 @@ static __meminit void unmap_low_page(int i) ti->allocated = 0; } +/* Must run before zap_low_mappings */ +__init void *early_ioremap(unsigned long addr, unsigned long size) +{ + unsigned long map = round_down(addr, LARGE_PAGE_SIZE); + + /* actually usually some more */ + if (size >= LARGE_PAGE_SIZE) { + printk("SMBIOS area too long %lu\n", size); + return NULL; + } + set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); + map += LARGE_PAGE_SIZE; + set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); + __flush_tlb(); + return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); +} + +/* To avoid virtual aliases later */ +__init void early_iounmap(void *addr, unsigned long size) +{ + if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) + printk("early_iounmap: bad address %p\n", addr); + set_pmd(temp_mappings[0].pmd, __pmd(0)); + set_pmd(temp_mappings[1].pmd, __pmd(0)); + __flush_tlb(); +} + static void __meminit phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) { -- cgit v1.2.3 From 5f44a669805ab2c18b347436ec5333173b821268 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:25 +0100 Subject: [PATCH] x86_64: Add __init to fixmap functions that are only called during boot Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/mm/init.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 54c7f5975b4..b0441562544 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -94,7 +94,7 @@ void show_mem(void) int after_bootmem; -static void *spp_getpage(void) +static __init void *spp_getpage(void) { void *ptr; if (after_bootmem) @@ -108,7 +108,7 @@ static void *spp_getpage(void) return ptr; } -static void set_pte_phys(unsigned long vaddr, +static __init void set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) { pgd_t *pgd; @@ -157,7 +157,8 @@ static void set_pte_phys(unsigned long vaddr, } /* NOTE: this is meant to be run only at boot */ -void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) +void __init +__set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) { unsigned long address = __fix_to_virt(idx); -- cgit v1.2.3 From eaeae0cc985fa1df753da7edc8d02635cbc1ba39 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 25 Mar 2006 16:30:28 +0100 Subject: [PATCH] x86_64: Patch to make the head.S-must-be-first-in-vmlinux order explicit This patch puts the code from head.S in a special .bootstrap.text section. I'm working on a patch to reorder the functions in the kernel (I'll post that later), but for x86-64 at least the kernel bootstrap requires that the head.S functions are on the very first page/pages of the kernel text. This is understandable since the bootstrap is complex enough already and not a problem at all, it just means they aren't allowed to be reordered. This patch puts these special functions into a separate section to document this, and to guarantee this in the light of possibly reordering the rest later. (So this patch doesn't fix a bug per se, but makes things more robust by making the order of these functions explicit) Signed-off-by: Arjan van de Ven Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/head.S | 1 + arch/x86_64/kernel/vmlinux.lds.S | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 0cddcc03842..6df05e6034f 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -26,6 +26,7 @@ */ .text + .section .bootstrap.text .code32 .globl startup_32 /* %bx: 1 if coming from smp trampoline on secondary cpu */ diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 74db0062d4a..029be20acce 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -20,6 +20,7 @@ SECTIONS phys_startup_64 = startup_64 - LOAD_OFFSET; _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + *(.bootstrap.text) *(.text) SCHED_TEXT LOCK_TEXT -- cgit v1.2.3 From 6954bee829a037e4a3e06f7ecd7fc0466fadde41 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:31 +0100 Subject: [PATCH] x86_64: Handle years beyond 2100 ACPIv2 has an official but optional way to get a date >2100. Use it. But all the platforms I tested didn't seem to support it. But anyways the x86-64 kernel should be ready for the 22nd century now. Actually i shouldn't care about this because I will be dead by then @) Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 86039baaaf1..2f7c2120657 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -517,6 +517,7 @@ static unsigned long get_cmos_time(void) unsigned int timeout = 1000000, year, mon, day, hour, min, sec; unsigned char uip = 0, this = 0; unsigned long flags; + unsigned extyear = 0; /* * The Linux interpretation of the CMOS clock register contents: When the @@ -545,6 +546,11 @@ static unsigned long get_cmos_time(void) mon = CMOS_READ(RTC_MONTH); year = CMOS_READ(RTC_YEAR); +#ifdef CONFIG_ACPI + if (acpi_fadt.revision >= FADT2_REVISION_ID && acpi_fadt.century) + extyear = CMOS_READ(acpi_fadt.century); +#endif + spin_unlock_irqrestore(&rtc_lock, flags); /* @@ -559,11 +565,17 @@ static unsigned long get_cmos_time(void) BCD_TO_BIN(mon); BCD_TO_BIN(year); - /* - * x86-64 systems only exists since 2002. - * This will work up to Dec 31, 2100 - */ - year += 2000; + if (extyear) { + BCD_TO_BIN(extyear); + year += extyear; + printk(KERN_INFO "Extended CMOS year: %d\n", extyear); + } else { + /* + * x86-64 systems only exists since 2002. + * This will work up to Dec 31, 2100 + */ + year += 2000; + } return mktime(year, mon, day, hour, min, sec); } -- cgit v1.2.3 From 7351c0bfe825db2239b835c771c95affe006f51c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:34 +0100 Subject: [PATCH] x86_64: Fix formatting in time.c Only white space changes, code should be identical Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 94 +++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 49 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2f7c2120657..bb6cb83450b 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -88,7 +88,8 @@ static inline unsigned int do_gettimeoffset_tsc(void) unsigned long t; unsigned long x; t = get_cycles_sync(); - if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ + if (t < vxtime.last_tsc) + t = vxtime.last_tsc; /* hack */ x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; return x; } @@ -178,8 +179,9 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - /* Assume the lock function has either no stack frame or only a single word. - This checks if the address on the stack looks like a kernel text address. + /* Assume the lock function has either no stack frame or only a single + word. This checks if the address on the stack looks like a kernel + text address. There is a small window for false hits, but in that case the tick is just accounted to the spinlock function. Better would be to write these functions in assembler again @@ -293,8 +295,7 @@ unsigned long long monotonic_clock(void) this_offset = hpet_readl(HPET_COUNTER); } while (read_seqretry(&xtime_lock, seq)); offset = (this_offset - last_offset); - offset *=(NSEC_PER_SEC/HZ)/hpet_tick; - return base + offset; + offset *= (NSEC_PER_SEC/HZ) / hpet_tick; } else { do { seq = read_seqbegin(&xtime_lock); @@ -303,50 +304,46 @@ unsigned long long monotonic_clock(void) base = monotonic_base; } while (read_seqretry(&xtime_lock, seq)); this_offset = get_cycles_sync(); - offset = (this_offset - last_offset)*1000/cpu_khz; - return base + offset; + offset = (this_offset - last_offset)*1000 / cpu_khz; } + return base + offset; } EXPORT_SYMBOL(monotonic_clock); static noinline void handle_lost_ticks(int lost, struct pt_regs *regs) { - static long lost_count; - static int warned; - - if (report_lost_ticks) { - printk(KERN_WARNING "time.c: Lost %d timer " - "tick(s)! ", lost); - print_symbol("rip %s)\n", regs->rip); - } - - if (lost_count == 1000 && !warned) { - printk(KERN_WARNING - "warning: many lost ticks.\n" - KERN_WARNING "Your time source seems to be instable or " + static long lost_count; + static int warned; + if (report_lost_ticks) { + printk(KERN_WARNING "time.c: Lost %d timer tick(s)! ", lost); + print_symbol("rip %s)\n", regs->rip); + } + + if (lost_count == 1000 && !warned) { + printk(KERN_WARNING "warning: many lost ticks.\n" + KERN_WARNING "Your time source seems to be instable or " "some driver is hogging interupts\n"); - print_symbol("rip %s\n", regs->rip); - if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { - printk(KERN_WARNING "Falling back to HPET\n"); - if (hpet_use_timer) - vxtime.last = hpet_readl(HPET_T0_CMP) - hpet_tick; - else - vxtime.last = hpet_readl(HPET_COUNTER); - vxtime.mode = VXTIME_HPET; - do_gettimeoffset = do_gettimeoffset_hpet; - } - /* else should fall back to PIT, but code missing. */ - warned = 1; - } else - lost_count++; + print_symbol("rip %s\n", regs->rip); + if (vxtime.mode == VXTIME_TSC && vxtime.hpet_address) { + printk(KERN_WARNING "Falling back to HPET\n"); + if (hpet_use_timer) + vxtime.last = hpet_readl(HPET_T0_CMP) - + hpet_tick; + else + vxtime.last = hpet_readl(HPET_COUNTER); + vxtime.mode = VXTIME_HPET; + do_gettimeoffset = do_gettimeoffset_hpet; + } + /* else should fall back to PIT, but code missing. */ + warned = 1; + } else + lost_count++; #ifdef CONFIG_CPU_FREQ - /* In some cases the CPU can change frequency without us noticing - (like going into thermal throttle) - Give cpufreq a change to catch up. */ - if ((lost_count+1) % 25 == 0) { - cpufreq_delayed_get(); - } + /* In some cases the CPU can change frequency without us noticing + Give cpufreq a change to catch up. */ + if ((lost_count+1) % 25 == 0) + cpufreq_delayed_get(); #endif } @@ -556,7 +553,7 @@ static unsigned long get_cmos_time(void) /* * We know that x86-64 always uses BCD format, no need to check the * config register. - */ + */ BCD_TO_BIN(sec); BCD_TO_BIN(min); @@ -618,7 +615,8 @@ static void cpufreq_delayed_get(void) cpufreq_delayed_issched = 1; if (!warned) { warned = 1; - printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n"); + printk(KERN_DEBUG + "Losing some ticks... checking if CPU frequency changed.\n"); } schedule_work(&cpufreq_delayed_get_work); } @@ -641,9 +639,9 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, lpj = &dummy; if (!(freq->flags & CPUFREQ_CONST_LOOPS)) #ifdef CONFIG_SMP - lpj = &cpu_data[freq->cpu].loops_per_jiffy; + lpj = &cpu_data[freq->cpu].loops_per_jiffy; #else - lpj = &boot_cpu_data.loops_per_jiffy; + lpj = &boot_cpu_data.loops_per_jiffy; #endif if (!ref_freq) { @@ -780,9 +778,8 @@ static __init int late_hpet_init(void) int i; hpet = (struct hpet *) fix_to_virt(FIX_HPET_BASE); - - for (i = 2, timer = &hpet->hpet_timers[2]; i < ntimer; - timer++, i++) + timer = &hpet->hpet_timers[2]; + for (i = 2; i < ntimer; timer++, i++) hd.hd_irq[i] = (timer->hpet_config & Tn_INT_ROUTE_CNF_MASK) >> Tn_INT_ROUTE_CNF_SHIFT; @@ -939,8 +936,7 @@ void __init time_init(void) -xtime.tv_sec, -xtime.tv_nsec); if (!hpet_init()) - vxtime_hz = (1000000000000000L + hpet_period / 2) / - hpet_period; + vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; else vxtime.hpet_address = 0; -- cgit v1.2.3 From 28456edeff2ef9273c55cd12e4b193208717d4cd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:37 +0100 Subject: [PATCH] x86_64: Reename CMOS update warning Was disabled due to an old bug, long gone. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index bb6cb83450b..7f58fa68249 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -244,17 +244,10 @@ static void set_rtc_mmss(unsigned long nowtime) real_minutes += 30; /* correct for half hour time zone */ real_minutes %= 60; -#if 0 - /* AMD 8111 is a really bad time keeper and hits this regularly. - It probably was an attempt to avoid screwing up DST, but ignore - that for now. */ if (abs(real_minutes - cmos_minutes) >= 30) { printk(KERN_WARNING "time.c: can't update CMOS clock " "from %d to %d\n", cmos_minutes, real_minutes); - } else -#endif - - { + } else { BIN_TO_BCD(real_seconds); BIN_TO_BCD(real_minutes); CMOS_WRITE(real_seconds, RTC_SECONDS); -- cgit v1.2.3 From 3056d6be19b50cbd3f0290e816d702cc3e54a6f3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:43 +0100 Subject: [PATCH] x86_64: Don't invoke OOM killer during dma_alloc_coherent() There is a fallback logic, so it's better to not use the OOM killer in the allocations. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pci-dma.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 4ed391edd47..03c9eeedb0f 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c @@ -73,6 +73,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_mask == 0) dma_mask = 0xffffffff; + /* Don't invoke OOM killer */ + gfp |= __GFP_NORETRY; + /* Kludge to make it bug-to-bug compatible with i386. i386 uses the normal dma_mask for alloc_coherent. */ dma_mask &= *dev->dma_mask; -- cgit v1.2.3 From 9b2a13b963dece8d45e07692b7872ae5a075ca2a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:46 +0100 Subject: [PATCH] x86_64: Always use IO-APIC routing for timer. I tested it on a couple of chipsets and it worked everywhere so it should be ok as default for now. So far I haven't done the great purge of the useless old check_timer code yet though. Can be overwritten with enable_8254_timer in the worst case Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index f264d9dc97f..db7db1e3650 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -50,7 +50,7 @@ static int no_timer_check; int disable_timer_pin_1 __initdata; -int timer_over_8254 __initdata = 1; +int timer_over_8254 __initdata = 0; /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -- cgit v1.2.3 From 4bdc3b7f1b730c07f5a6ccca77ee68e044036ffc Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 25 Mar 2006 16:30:49 +0100 Subject: [PATCH] x86_64: Basic reorder infrastructure This patch puts the infrastructure in place to allow for a reordering of functions based inside the vmlinux. The general idea is that it is possible to put all "common" functions into the first 2Mb of the code, so that they are covered by one TLB entry. This as opposed to the current situation where a typical vmlinux covers about 3.5Mb (on x86-64) and thus 2 TLB entries. This is done by enabling the -ffunction-sections flag in gcc, which puts each function in its own ELF section, so that the linker can then order them in a way defined by the linker script. As per previous discussions, Linus said he wanted a "static" list for this, eg a list provided by the kernel tarbal, so that most people have the same ordering at least. A script is provided to create this list based on readprofile(1) output. The included list is provisional, and entirely biased on my own testbox and me running a few kernel compiles and some other things. I think that to get to a better list we need to invite people to submit their own profiles, and somehow add those all up and base the final list on that. I'm willing to do that effort if this is ends up being the prefered approach. Such an effort probably needs to be repeated like once a year or so to adopt to the changing nature of the kernel. Made it a CONFIG with default n because it increases link times dramatically. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 8 + arch/x86_64/Makefile | 1 + arch/x86_64/kernel/functionlist | 1286 ++++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/vmlinux.lds.S | 5 + 4 files changed, 1300 insertions(+) create mode 100644 arch/x86_64/kernel/functionlist (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 9fad9df8662..24a3016c079 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -464,6 +464,14 @@ config SECCOMP source kernel/Kconfig.hz +config REORDER + bool "Function reordering" + default n + help + This option enables the toolchain to reorder functions for a more + optimal TLB usage. If you have pretty much any version of binutils, + this can increase your kernel build time by roughly one minute. + endmenu # diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index f5e48ba9bec..0fbc0283609 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -36,6 +36,7 @@ CFLAGS += -m64 CFLAGS += -mno-red-zone CFLAGS += -mcmodel=kernel CFLAGS += -pipe +cflags-$(CONFIG_REORDER) += -ffunction-sections # this makes reading assembly source easier, but produces worse code # actually it makes the kernel smaller too. CFLAGS += -fno-reorder-blocks diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist new file mode 100644 index 00000000000..2bcebdc3eed --- /dev/null +++ b/arch/x86_64/kernel/functionlist @@ -0,0 +1,1286 @@ +*(.text.flush_thread) +*(.text.check_poison_obj) +*(.text.copy_page) +*(.text.__set_personality) +*(.text.gart_map_sg) +*(.text.kmem_cache_free) +*(.text.find_get_page) +*(.text._raw_spin_lock) +*(.text.ide_outb) +*(.text.unmap_vmas) +*(.text.copy_page_range) +*(.text.kprobe_handler) +*(.text.__handle_mm_fault) +*(.text.__d_lookup) +*(.text.copy_user_generic) +*(.text.__link_path_walk) +*(.text.get_page_from_freelist) +*(.text.kmem_cache_alloc) +*(.text.drive_cmd_intr) +*(.text.ia32_setup_sigcontext) +*(.text.huge_pte_offset) +*(.text.do_page_fault) +*(.text.page_remove_rmap) +*(.text.release_pages) +*(.text.ide_end_request) +*(.text.__mutex_lock_slowpath) +*(.text.__find_get_block) +*(.text.kfree) +*(.text.vfs_read) +*(.text._raw_spin_unlock) +*(.text.free_hot_cold_page) +*(.text.fget_light) +*(.text.schedule) +*(.text.memcmp) +*(.text.touch_atime) +*(.text.__might_sleep) +*(.text.__down_read_trylock) +*(.text.arch_pick_mmap_layout) +*(.text.find_vma) +*(.text.__make_request) +*(.text.do_generic_mapping_read) +*(.text.mutex_lock_interruptible) +*(.text.__generic_file_aio_read) +*(.text._atomic_dec_and_lock) +*(.text.__wake_up_bit) +*(.text.add_to_page_cache) +*(.text.cache_alloc_debugcheck_after) +*(.text.vm_normal_page) +*(.text.mutex_debug_check_no_locks_freed) +*(.text.net_rx_action) +*(.text.__find_first_zero_bit) +*(.text.put_page) +*(.text._raw_read_lock) +*(.text.__delay) +*(.text.dnotify_parent) +*(.text.do_path_lookup) +*(.text.do_sync_read) +*(.text.do_lookup) +*(.text.bit_waitqueue) +*(.text.file_read_actor) +*(.text.strncpy_from_user) +*(.text.__pagevec_lru_add_active) +*(.text.fget) +*(.text.dput) +*(.text.__strnlen_user) +*(.text.inotify_inode_queue_event) +*(.text.rw_verify_area) +*(.text.ide_intr) +*(.text.inotify_dentry_parent_queue_event) +*(.text.permission) +*(.text.memscan) +*(.text.hpet_rtc_interrupt) +*(.text.do_mmap_pgoff) +*(.text.current_fs_time) +*(.text.vfs_getattr) +*(.text.kmem_flagcheck) +*(.text.mark_page_accessed) +*(.text.free_pages_and_swap_cache) +*(.text.generic_fillattr) +*(.text.__block_prepare_write) +*(.text.__set_page_dirty_nobuffers) +*(.text.link_path_walk) +*(.text.find_get_pages_tag) +*(.text.ide_do_request) +*(.text.__alloc_pages) +*(.text.generic_permission) +*(.text.mod_page_state_offset) +*(.text.free_pgd_range) +*(.text.generic_file_buffered_write) +*(.text.number) +*(.text.ide_do_rw_disk) +*(.text.__brelse) +*(.text.__mod_page_state_offset) +*(.text.rotate_reclaimable_page) +*(.text.find_vma_prepare) +*(.text.find_vma_prev) +*(.text.lru_cache_add_active) +*(.text.__kmalloc_track_caller) +*(.text.smp_invalidate_interrupt) +*(.text.handle_IRQ_event) +*(.text.__find_get_block_slow) +*(.text.do_wp_page) +*(.text.do_select) +*(.text.set_user_nice) +*(.text.sys_read) +*(.text.do_munmap) +*(.text.csum_partial) +*(.text.__do_softirq) +*(.text.may_open) +*(.text.getname) +*(.text.get_empty_filp) +*(.text.__fput) +*(.text.remove_mapping) +*(.text.filp_ctor) +*(.text.poison_obj) +*(.text.unmap_region) +*(.text.test_set_page_writeback) +*(.text.__do_page_cache_readahead) +*(.text.sock_def_readable) +*(.text.ide_outl) +*(.text.shrink_zone) +*(.text.rb_insert_color) +*(.text.get_request) +*(.text.sys_pread64) +*(.text.spin_bug) +*(.text.ide_outsl) +*(.text.mask_and_ack_8259A) +*(.text.filemap_nopage) +*(.text.page_add_file_rmap) +*(.text.find_lock_page) +*(.text.tcp_poll) +*(.text.__mark_inode_dirty) +*(.text.file_ra_state_init) +*(.text.generic_file_llseek) +*(.text.__pagevec_lru_add) +*(.text.page_cache_readahead) +*(.text.n_tty_receive_buf) +*(.text.zonelist_policy) +*(.text.vma_adjust) +*(.text.test_clear_page_dirty) +*(.text.sync_buffer) +*(.text.do_exit) +*(.text.__bitmap_weight) +*(.text.alloc_pages_current) +*(.text.get_unused_fd) +*(.text.zone_watermark_ok) +*(.text.cpuset_update_task_memory_state) +*(.text.__bitmap_empty) +*(.text.sys_munmap) +*(.text.__inode_dir_notify) +*(.text.__generic_file_aio_write_nolock) +*(.text.__pte_alloc) +*(.text.sys_select) +*(.text.vm_acct_memory) +*(.text.vfs_write) +*(.text.__lru_add_drain) +*(.text.prio_tree_insert) +*(.text.generic_file_aio_read) +*(.text.vma_merge) +*(.text.block_write_full_page) +*(.text.__page_set_anon_rmap) +*(.text.apic_timer_interrupt) +*(.text.release_console_sem) +*(.text.sys_write) +*(.text.sys_brk) +*(.text.dup_mm) +*(.text.read_current_timer) +*(.text.ll_rw_block) +*(.text.blk_rq_map_sg) +*(.text.dbg_userword) +*(.text.__block_commit_write) +*(.text.cache_grow) +*(.text.copy_strings) +*(.text.release_task) +*(.text.do_sync_write) +*(.text.unlock_page) +*(.text.load_elf_binary) +*(.text.__follow_mount) +*(.text.__getblk) +*(.text.do_sys_open) +*(.text.current_kernel_time) +*(.text.call_rcu) +*(.text.write_chan) +*(.text.vsnprintf) +*(.text.dummy_inode_setsecurity) +*(.text.submit_bh) +*(.text.poll_freewait) +*(.text.bio_alloc_bioset) +*(.text.skb_clone) +*(.text.page_waitqueue) +*(.text.__mutex_lock_interruptible_slowpath) +*(.text.get_index) +*(.text.csum_partial_copy_generic) +*(.text.bad_range) +*(.text.remove_vma) +*(.text.cp_new_stat) +*(.text.alloc_arraycache) +*(.text.test_clear_page_writeback) +*(.text.strsep) +*(.text.open_namei) +*(.text._raw_read_unlock) +*(.text.get_vma_policy) +*(.text.__down_write_trylock) +*(.text.find_get_pages) +*(.text.tcp_rcv_established) +*(.text.generic_make_request) +*(.text.__block_write_full_page) +*(.text.cfq_set_request) +*(.text.sys_inotify_init) +*(.text.split_vma) +*(.text.__mod_timer) +*(.text.get_options) +*(.text.vma_link) +*(.text.mpage_writepages) +*(.text.truncate_complete_page) +*(.text.tcp_recvmsg) +*(.text.sigprocmask) +*(.text.filemap_populate) +*(.text.sys_close) +*(.text.inotify_dev_queue_event) +*(.text.do_task_stat) +*(.text.__dentry_open) +*(.text.unlink_file_vma) +*(.text.__pollwait) +*(.text.packet_rcv_spkt) +*(.text.drop_buffers) +*(.text.free_pgtables) +*(.text.generic_file_direct_write) +*(.text.copy_process) +*(.text.netif_receive_skb) +*(.text.dnotify_flush) +*(.text.print_bad_pte) +*(.text.anon_vma_unlink) +*(.text.sys_mprotect) +*(.text.sync_sb_inodes) +*(.text.find_inode_fast) +*(.text.dummy_inode_readlink) +*(.text.putname) +*(.text.init_smp_flush) +*(.text.dbg_redzone2) +*(.text.sk_run_filter) +*(.text.may_expand_vm) +*(.text.generic_file_aio_write) +*(.text.find_next_zero_bit) +*(.text.file_kill) +*(.text.audit_getname) +*(.text.arch_unmap_area_topdown) +*(.text.alloc_page_vma) +*(.text.tcp_transmit_skb) +*(.text.rb_next) +*(.text.dbg_redzone1) +*(.text.generic_file_mmap) +*(.text.vfs_fstat) +*(.text.sys_time) +*(.text.page_lock_anon_vma) +*(.text.get_unmapped_area) +*(.text.remote_llseek) +*(.text.__up_read) +*(.text.fd_install) +*(.text.eventpoll_init_file) +*(.text.dma_alloc_coherent) +*(.text.create_empty_buffers) +*(.text.__mutex_unlock_slowpath) +*(.text.dup_fd) +*(.text.d_alloc) +*(.text.tty_ldisc_try) +*(.text.sys_stime) +*(.text.__rb_rotate_right) +*(.text.d_validate) +*(.text.rb_erase) +*(.text.path_release) +*(.text.memmove) +*(.text.invalidate_complete_page) +*(.text.clear_inode) +*(.text.cache_estimate) +*(.text.alloc_buffer_head) +*(.text.smp_call_function_interrupt) +*(.text.flush_tlb_others) +*(.text.file_move) +*(.text.balance_dirty_pages_ratelimited) +*(.text.vma_prio_tree_add) +*(.text.timespec_trunc) +*(.text.mempool_alloc) +*(.text.iget_locked) +*(.text.d_alloc_root) +*(.text.cpuset_populate_dir) +*(.text.anon_vma_prepare) +*(.text.sys_newstat) +*(.text.alloc_page_interleave) +*(.text.__path_lookup_intent_open) +*(.text.__pagevec_free) +*(.text.inode_init_once) +*(.text.free_vfsmnt) +*(.text.__user_walk_fd) +*(.text.cfq_idle_slice_timer) +*(.text.sys_mmap) +*(.text.sys_llseek) +*(.text.prio_tree_remove) +*(.text.filp_close) +*(.text.file_permission) +*(.text.vma_prio_tree_remove) +*(.text.tcp_ack) +*(.text.nameidata_to_filp) +*(.text.sys_lseek) +*(.text.percpu_counter_mod) +*(.text.igrab) +*(.text.__bread) +*(.text.alloc_inode) +*(.text.filldir) +*(.text.__rb_rotate_left) +*(.text.irq_affinity_write_proc) +*(.text.init_request_from_bio) +*(.text.find_or_create_page) +*(.text.tty_poll) +*(.text.tcp_sendmsg) +*(.text.ide_wait_stat) +*(.text.free_buffer_head) +*(.text.flush_signal_handlers) +*(.text.tcp_v4_rcv) +*(.text.nr_blockdev_pages) +*(.text.locks_remove_flock) +*(.text.__iowrite32_copy) +*(.text.do_filp_open) +*(.text.try_to_release_page) +*(.text.page_add_new_anon_rmap) +*(.text.kmem_cache_size) +*(.text.eth_type_trans) +*(.text.try_to_free_buffers) +*(.text.schedule_tail) +*(.text.proc_lookup) +*(.text.no_llseek) +*(.text.kfree_skbmem) +*(.text.do_wait) +*(.text.do_mpage_readpage) +*(.text.vfs_stat_fd) +*(.text.tty_write) +*(.text.705) +*(.text.sync_page) +*(.text.__remove_shared_vm_struct) +*(.text.__kfree_skb) +*(.text.sock_poll) +*(.text.get_request_wait) +*(.text.do_sigaction) +*(.text.do_brk) +*(.text.tcp_event_data_recv) +*(.text.read_chan) +*(.text.pipe_writev) +*(.text.__emul_lookup_dentry) +*(.text.rtc_get_rtc_time) +*(.text.print_objinfo) +*(.text.file_update_time) +*(.text.do_signal) +*(.text.disable_8259A_irq) +*(.text.blk_queue_bounce) +*(.text.__anon_vma_link) +*(.text.__vma_link) +*(.text.vfs_rename) +*(.text.sys_newlstat) +*(.text.sys_newfstat) +*(.text.sys_mknod) +*(.text.__show_regs) +*(.text.iput) +*(.text.get_signal_to_deliver) +*(.text.flush_tlb_page) +*(.text.debug_mutex_wake_waiter) +*(.text.copy_thread) +*(.text.clear_page_dirty_for_io) +*(.text.buffer_io_error) +*(.text.vfs_permission) +*(.text.truncate_inode_pages_range) +*(.text.sys_recvfrom) +*(.text.remove_suid) +*(.text.mark_buffer_dirty) +*(.text.local_bh_enable) +*(.text.get_zeroed_page) +*(.text.get_vmalloc_info) +*(.text.flush_old_exec) +*(.text.dummy_inode_permission) +*(.text.__bio_add_page) +*(.text.prio_tree_replace) +*(.text.notify_change) +*(.text.mntput_no_expire) +*(.text.fput) +*(.text.__end_that_request_first) +*(.text.wake_up_bit) +*(.text.unuse_mm) +*(.text.skb_release_data) +*(.text.shrink_icache_memory) +*(.text.sched_balance_self) +*(.text.__pmd_alloc) +*(.text.pipe_poll) +*(.text.normal_poll) +*(.text.__free_pages) +*(.text.follow_mount) +*(.text.cdrom_start_packet_command) +*(.text.blk_recount_segments) +*(.text.bio_put) +*(.text.__alloc_skb) +*(.text.__wake_up) +*(.text.vm_stat_account) +*(.text.sys_fcntl) +*(.text.sys_fadvise64) +*(.text._raw_write_unlock) +*(.text.__pud_alloc) +*(.text.alloc_page_buffers) +*(.text.vfs_llseek) +*(.text.sockfd_lookup) +*(.text._raw_write_lock) +*(.text.put_compound_page) +*(.text.prune_dcache) +*(.text.pipe_readv) +*(.text.mempool_free) +*(.text.make_ahead_window) +*(.text.lru_add_drain) +*(.text.constant_test_bit) +*(.text.__clear_user) +*(.text.arch_unmap_area) +*(.text.anon_vma_link) +*(.text.sys_chroot) +*(.text.setup_arg_pages) +*(.text.radix_tree_preload) +*(.text.init_rwsem) +*(.text.generic_osync_inode) +*(.text.generic_delete_inode) +*(.text.do_sys_poll) +*(.text.dev_queue_xmit) +*(.text.default_llseek) +*(.text.__writeback_single_inode) +*(.text.vfs_ioctl) +*(.text.__up_write) +*(.text.unix_poll) +*(.text.sys_rt_sigprocmask) +*(.text.sock_recvmsg) +*(.text.recalc_bh_state) +*(.text.__put_unused_fd) +*(.text.process_backlog) +*(.text.locks_remove_posix) +*(.text.lease_modify) +*(.text.expand_files) +*(.text.end_buffer_read_nobh) +*(.text.d_splice_alias) +*(.text.debug_mutex_init_waiter) +*(.text.copy_from_user) +*(.text.cap_vm_enough_memory) +*(.text.show_vfsmnt) +*(.text.release_sock) +*(.text.pfifo_fast_enqueue) +*(.text.half_md4_transform) +*(.text.fs_may_remount_ro) +*(.text.do_fork) +*(.text.copy_hugetlb_page_range) +*(.text.cache_free_debugcheck) +*(.text.__tcp_select_window) +*(.text.task_handoff_register) +*(.text.sys_open) +*(.text.strlcpy) +*(.text.skb_copy_datagram_iovec) +*(.text.set_up_list3s) +*(.text.release_open_intent) +*(.text.qdisc_restart) +*(.text.n_tty_chars_in_buffer) +*(.text.inode_change_ok) +*(.text.__downgrade_write) +*(.text.debug_mutex_unlock) +*(.text.add_timer_randomness) +*(.text.sock_common_recvmsg) +*(.text.set_bh_page) +*(.text.printk_lock) +*(.text.path_release_on_umount) +*(.text.ip_output) +*(.text.ide_build_dmatable) +*(.text.__get_user_8) +*(.text.end_buffer_read_sync) +*(.text.__d_path) +*(.text.d_move) +*(.text.del_timer) +*(.text.constant_test_bit) +*(.text.blockable_page_cache_readahead) +*(.text.tty_read) +*(.text.sys_readlink) +*(.text.sys_faccessat) +*(.text.read_swap_cache_async) +*(.text.pty_write_room) +*(.text.page_address_in_vma) +*(.text.kthread) +*(.text.cfq_exit_io_context) +*(.text.__tcp_push_pending_frames) +*(.text.sys_pipe) +*(.text.submit_bio) +*(.text.pid_revalidate) +*(.text.page_referenced_file) +*(.text.lock_sock) +*(.text.get_page_state_node) +*(.text.generic_block_bmap) +*(.text.do_setitimer) +*(.text.dev_queue_xmit_nit) +*(.text.copy_from_read_buf) +*(.text.__const_udelay) +*(.text.console_conditional_schedule) +*(.text.wake_up_new_task) +*(.text.wait_for_completion_interruptible) +*(.text.tcp_rcv_rtt_update) +*(.text.sys_mlockall) +*(.text.set_fs_altroot) +*(.text.schedule_timeout) +*(.text.nr_free_pagecache_pages) +*(.text.nf_iterate) +*(.text.mapping_tagged) +*(.text.ip_queue_xmit) +*(.text.ip_local_deliver) +*(.text.follow_page) +*(.text.elf_map) +*(.text.dummy_file_permission) +*(.text.dispose_list) +*(.text.dentry_open) +*(.text.dentry_iput) +*(.text.bio_alloc) +*(.text.alloc_skb_from_cache) +*(.text.wait_on_page_bit) +*(.text.vfs_readdir) +*(.text.vfs_lstat) +*(.text.seq_escape) +*(.text.__posix_lock_file) +*(.text.mm_release) +*(.text.kref_put) +*(.text.ip_rcv) +*(.text.__iget) +*(.text.free_pages) +*(.text.find_mergeable_anon_vma) +*(.text.find_extend_vma) +*(.text.dummy_inode_listsecurity) +*(.text.bio_add_page) +*(.text.__vm_enough_memory) +*(.text.vfs_stat) +*(.text.tty_paranoia_check) +*(.text.tcp_read_sock) +*(.text.tcp_data_queue) +*(.text.sys_uname) +*(.text.sys_renameat) +*(.text.__strncpy_from_user) +*(.text.__mutex_init) +*(.text.__lookup_hash) +*(.text.kref_get) +*(.text.ip_route_input) +*(.text.__insert_inode_hash) +*(.text.do_sock_write) +*(.text.blk_done_softirq) +*(.text.__wake_up_sync) +*(.text.__vma_link_rb) +*(.text.tty_ioctl) +*(.text.tracesys) +*(.text.sys_getdents) +*(.text.sys_dup) +*(.text.stub_execve) +*(.text.sha_transform) +*(.text.radix_tree_tag_clear) +*(.text.put_unused_fd) +*(.text.put_files_struct) +*(.text.mpage_readpages) +*(.text.may_delete) +*(.text.kmem_cache_create) +*(.text.ip_mc_output) +*(.text.interleave_nodes) +*(.text.groups_search) +*(.text.generic_drop_inode) +*(.text.generic_commit_write) +*(.text.fcntl_setlk) +*(.text.exit_mmap) +*(.text.end_page_writeback) +*(.text.__d_rehash) +*(.text.debug_mutex_free_waiter) +*(.text.csum_ipv6_magic) +*(.text.count) +*(.text.cleanup_rbuf) +*(.text.check_spinlock_acquired_node) +*(.text.can_vma_merge_after) +*(.text.bio_endio) +*(.text.alloc_pidmap) +*(.text.write_ldt) +*(.text.vmtruncate_range) +*(.text.vfs_create) +*(.text.__user_walk) +*(.text.update_send_head) +*(.text.unmap_underlying_metadata) +*(.text.tty_ldisc_deref) +*(.text.tcp_setsockopt) +*(.text.tcp_send_ack) +*(.text.sys_pause) +*(.text.sys_gettimeofday) +*(.text.sync_dirty_buffer) +*(.text.strncmp) +*(.text.release_posix_timer) +*(.text.proc_file_read) +*(.text.prepare_to_wait) +*(.text.locks_mandatory_locked) +*(.text.interruptible_sleep_on_timeout) +*(.text.inode_sub_bytes) +*(.text.in_group_p) +*(.text.hrtimer_try_to_cancel) +*(.text.filldir64) +*(.text.fasync_helper) +*(.text.dummy_sb_pivotroot) +*(.text.d_lookup) +*(.text.d_instantiate) +*(.text.__d_find_alias) +*(.text.cpu_idle_wait) +*(.text.cond_resched_lock) +*(.text.chown_common) +*(.text.blk_congestion_wait) +*(.text.activate_page) +*(.text.unlock_buffer) +*(.text.tty_wakeup) +*(.text.tcp_v4_do_rcv) +*(.text.tcp_current_mss) +*(.text.sys_openat) +*(.text.sys_fchdir) +*(.text.strnlen_user) +*(.text.strnlen) +*(.text.strchr) +*(.text.sock_common_getsockopt) +*(.text.skb_checksum) +*(.text.remove_wait_queue) +*(.text.rb_replace_node) +*(.text.radix_tree_node_ctor) +*(.text.pty_chars_in_buffer) +*(.text.profile_hit) +*(.text.prio_tree_left) +*(.text.pgd_clear_bad) +*(.text.pfifo_fast_dequeue) +*(.text.page_referenced) +*(.text.open_exec) +*(.text.mmput) +*(.text.mm_init) +*(.text.__ide_dma_off_quietly) +*(.text.ide_dma_intr) +*(.text.hrtimer_start) +*(.text.get_io_context) +*(.text.__get_free_pages) +*(.text.find_first_zero_bit) +*(.text.file_free_rcu) +*(.text.dummy_socket_sendmsg) +*(.text.do_unlinkat) +*(.text.do_arch_prctl) +*(.text.destroy_inode) +*(.text.can_vma_merge_before) +*(.text.block_sync_page) +*(.text.block_prepare_write) +*(.text.bio_init) +*(.text.arch_ptrace) +*(.text.wake_up_inode) +*(.text.wait_on_retry_sync_kiocb) +*(.text.vma_prio_tree_next) +*(.text.tcp_rcv_space_adjust) +*(.text.__tcp_ack_snd_check) +*(.text.sys_utime) +*(.text.sys_recvmsg) +*(.text.sys_mremap) +*(.text.sys_bdflush) +*(.text.sleep_on) +*(.text.set_page_dirty_lock) +*(.text.seq_path) +*(.text.schedule_timeout_interruptible) +*(.text.sched_fork) +*(.text.rt_run_flush) +*(.text.profile_munmap) +*(.text.prepare_binprm) +*(.text.__pagevec_release_nonlru) +*(.text.m_show) +*(.text.lookup_mnt) +*(.text.__lookup_mnt) +*(.text.lock_timer_base) +*(.text.is_subdir) +*(.text.invalidate_bh_lru) +*(.text.init_buffer_head) +*(.text.ifind_fast) +*(.text.ide_dma_start) +*(.text.__get_page_state) +*(.text.flock_to_posix_lock) +*(.text.__find_symbol) +*(.text.do_futex) +*(.text.do_execve) +*(.text.dirty_writeback_centisecs_handler) +*(.text.dev_watchdog) +*(.text.can_share_swap_page) +*(.text.blkdev_put) +*(.text.bio_get_nr_vecs) +*(.text.xfrm_compile_policy) +*(.text.vma_prio_tree_insert) +*(.text.vfs_lstat_fd) +*(.text.__user_path_lookup_open) +*(.text.thread_return) +*(.text.tcp_send_delayed_ack) +*(.text.sock_def_error_report) +*(.text.shrink_slab) +*(.text.serial_out) +*(.text.seq_read) +*(.text.secure_ip_id) +*(.text.search_binary_handler) +*(.text.proc_pid_unhash) +*(.text.pagevec_lookup) +*(.text.new_inode) +*(.text.memcpy_toiovec) +*(.text.locks_free_lock) +*(.text.__lock_page) +*(.text.__lock_buffer) +*(.text.load_module) +*(.text.is_bad_inode) +*(.text.invalidate_inode_buffers) +*(.text.insert_vm_struct) +*(.text.inode_setattr) +*(.text.inode_add_bytes) +*(.text.ide_read_24) +*(.text.ide_get_error_location) +*(.text.ide_do_drive_cmd) +*(.text.get_locked_pte) +*(.text.get_filesystem_list) +*(.text.generic_file_open) +*(.text.follow_down) +*(.text.find_next_bit) +*(.text.__find_first_bit) +*(.text.exit_mm) +*(.text.exec_keys) +*(.text.end_buffer_write_sync) +*(.text.end_bio_bh_io_sync) +*(.text.dummy_socket_shutdown) +*(.text.d_rehash) +*(.text.d_path) +*(.text.do_ioctl) +*(.text.dget_locked) +*(.text.copy_thread_group_keys) +*(.text.cdrom_end_request) +*(.text.cap_bprm_apply_creds) +*(.text.blk_rq_bio_prep) +*(.text.__bitmap_intersects) +*(.text.bio_phys_segments) +*(.text.bio_free) +*(.text.arch_get_unmapped_area_topdown) +*(.text.writeback_in_progress) +*(.text.vfs_follow_link) +*(.text.tcp_rcv_state_process) +*(.text.tcp_check_space) +*(.text.sys_stat) +*(.text.sys_rt_sigreturn) +*(.text.sys_rt_sigaction) +*(.text.sys_remap_file_pages) +*(.text.sys_pwrite64) +*(.text.sys_fchownat) +*(.text.sys_fchmodat) +*(.text.strncat) +*(.text.strlcat) +*(.text.strcmp) +*(.text.steal_locks) +*(.text.sock_create) +*(.text.sk_stream_rfree) +*(.text.sk_stream_mem_schedule) +*(.text.skip_atoi) +*(.text.sk_alloc) +*(.text.show_stat) +*(.text.set_fs_pwd) +*(.text.set_binfmt) +*(.text.pty_unthrottle) +*(.text.proc_symlink) +*(.text.pipe_release) +*(.text.pageout) +*(.text.n_tty_write_wakeup) +*(.text.n_tty_ioctl) +*(.text.nr_free_zone_pages) +*(.text.migration_thread) +*(.text.mempool_free_slab) +*(.text.meminfo_read_proc) +*(.text.max_sane_readahead) +*(.text.lru_cache_add) +*(.text.kill_fasync) +*(.text.kernel_read) +*(.text.invalidate_mapping_pages) +*(.text.inode_has_buffers) +*(.text.init_once) +*(.text.inet_sendmsg) +*(.text.idedisk_issue_flush) +*(.text.generic_file_write) +*(.text.free_more_memory) +*(.text.__free_fdtable) +*(.text.filp_dtor) +*(.text.exit_sem) +*(.text.exit_itimers) +*(.text.error_interrupt) +*(.text.end_buffer_async_write) +*(.text.eligible_child) +*(.text.elf_map) +*(.text.dump_task_regs) +*(.text.dummy_task_setscheduler) +*(.text.dummy_socket_accept) +*(.text.dummy_file_free_security) +*(.text.__down_read) +*(.text.do_sock_read) +*(.text.do_sigaltstack) +*(.text.do_mremap) +*(.text.current_io_context) +*(.text.cpu_swap_callback) +*(.text.copy_vma) +*(.text.cap_bprm_set_security) +*(.text.blk_insert_request) +*(.text.bio_map_kern_endio) +*(.text.bio_hw_segments) +*(.text.bictcp_cong_avoid) +*(.text.add_interrupt_randomness) +*(.text.wait_for_completion) +*(.text.version_read_proc) +*(.text.unix_write_space) +*(.text.tty_ldisc_ref_wait) +*(.text.tty_ldisc_put) +*(.text.try_to_wake_up) +*(.text.tcp_v4_tw_remember_stamp) +*(.text.tcp_try_undo_dsack) +*(.text.tcp_may_send_now) +*(.text.sys_waitid) +*(.text.sys_sched_getparam) +*(.text.sys_getppid) +*(.text.sys_getcwd) +*(.text.sys_dup2) +*(.text.sys_chmod) +*(.text.sys_chdir) +*(.text.sprintf) +*(.text.sock_wfree) +*(.text.sock_aio_write) +*(.text.skb_drop_fraglist) +*(.text.skb_dequeue) +*(.text.set_close_on_exec) +*(.text.set_brk) +*(.text.seq_puts) +*(.text.SELECT_DRIVE) +*(.text.sched_exec) +*(.text.return_EIO) +*(.text.remove_from_page_cache) +*(.text.rcu_start_batch) +*(.text.__put_task_struct) +*(.text.proc_pid_readdir) +*(.text.proc_get_inode) +*(.text.prepare_to_wait_exclusive) +*(.text.pipe_wait) +*(.text.pipe_new) +*(.text.pdflush_operation) +*(.text.__pagevec_release) +*(.text.pagevec_lookup_tag) +*(.text.packet_rcv) +*(.text.n_tty_set_room) +*(.text.nr_free_pages) +*(.text.__net_timestamp) +*(.text.mpage_end_io_read) +*(.text.mod_timer) +*(.text.__memcpy) +*(.text.mb_cache_shrink_fn) +*(.text.lock_rename) +*(.text.kstrdup) +*(.text.is_ignored) +*(.text.int_very_careful) +*(.text.inotify_inode_is_dead) +*(.text.inotify_get_cookie) +*(.text.inode_get_bytes) +*(.text.init_timer) +*(.text.init_dev) +*(.text.inet_getname) +*(.text.ide_map_sg) +*(.text.__ide_dma_end) +*(.text.hrtimer_get_remaining) +*(.text.get_task_mm) +*(.text.get_random_int) +*(.text.free_pipe_info) +*(.text.filemap_write_and_wait_range) +*(.text.exit_thread) +*(.text.enter_idle) +*(.text.end_that_request_first) +*(.text.end_8259A_irq) +*(.text.dummy_file_alloc_security) +*(.text.do_group_exit) +*(.text.debug_mutex_init) +*(.text.cpuset_exit) +*(.text.cpu_idle) +*(.text.copy_semundo) +*(.text.copy_files) +*(.text.chrdev_open) +*(.text.cdrom_transfer_packet_command) +*(.text.cdrom_mode_sense) +*(.text.blk_phys_contig_segment) +*(.text.blk_get_queue) +*(.text.bio_split) +*(.text.audit_alloc) +*(.text.anon_pipe_buf_release) +*(.text.add_wait_queue_exclusive) +*(.text.add_wait_queue) +*(.text.acct_process) +*(.text.account) +*(.text.zeromap_page_range) +*(.text.yield) +*(.text.writeback_acquire) +*(.text.worker_thread) +*(.text.wait_on_page_writeback_range) +*(.text.__wait_on_buffer) +*(.text.vscnprintf) +*(.text.vmalloc_to_pfn) +*(.text.vgacon_save_screen) +*(.text.vfs_unlink) +*(.text.vfs_rmdir) +*(.text.unregister_md_personality) +*(.text.unlock_new_inode) +*(.text.unix_stream_sendmsg) +*(.text.unix_stream_recvmsg) +*(.text.unhash_process) +*(.text.udp_v4_lookup_longway) +*(.text.tty_ldisc_flush) +*(.text.tty_ldisc_enable) +*(.text.tty_hung_up_p) +*(.text.tty_buffer_free_all) +*(.text.tso_fragment) +*(.text.try_to_del_timer_sync) +*(.text.tcp_v4_err) +*(.text.tcp_unhash) +*(.text.tcp_seq_next) +*(.text.tcp_select_initial_window) +*(.text.tcp_sacktag_write_queue) +*(.text.tcp_cwnd_validate) +*(.text.sys_vhangup) +*(.text.sys_uselib) +*(.text.sys_symlink) +*(.text.sys_signal) +*(.text.sys_poll) +*(.text.sys_mount) +*(.text.sys_kill) +*(.text.sys_ioctl) +*(.text.sys_inotify_add_watch) +*(.text.sys_getuid) +*(.text.sys_getrlimit) +*(.text.sys_getitimer) +*(.text.sys_getgroups) +*(.text.sys_ftruncate) +*(.text.sysfs_lookup) +*(.text.sys_exit_group) +*(.text.stub_fork) +*(.text.sscanf) +*(.text.sock_map_fd) +*(.text.sock_get_timestamp) +*(.text.__sock_create) +*(.text.smp_call_function_single) +*(.text.sk_stop_timer) +*(.text.skb_copy_and_csum_datagram) +*(.text.__skb_checksum_complete) +*(.text.single_next) +*(.text.sigqueue_alloc) +*(.text.shrink_dcache_parent) +*(.text.select_idle_routine) +*(.text.run_workqueue) +*(.text.run_local_timers) +*(.text.remove_inode_hash) +*(.text.remove_dquot_ref) +*(.text.register_binfmt) +*(.text.read_cache_pages) +*(.text.rb_last) +*(.text.pty_open) +*(.text.proc_root_readdir) +*(.text.proc_pid_flush) +*(.text.proc_pident_lookup) +*(.text.proc_fill_super) +*(.text.proc_exe_link) +*(.text.posix_locks_deadlock) +*(.text.pipe_iov_copy_from_user) +*(.text.opost) +*(.text.nf_register_hook) +*(.text.netif_rx_ni) +*(.text.m_start) +*(.text.mpage_writepage) +*(.text.mm_alloc) +*(.text.memory_open) +*(.text.mark_buffer_async_write) +*(.text.lru_add_drain_all) +*(.text.locks_init_lock) +*(.text.locks_delete_lock) +*(.text.lock_hrtimer_base) +*(.text.load_script) +*(.text.__kill_fasync) +*(.text.ip_mc_sf_allow) +*(.text.__ioremap) +*(.text.int_with_check) +*(.text.int_sqrt) +*(.text.install_thread_keyring) +*(.text.init_page_buffers) +*(.text.inet_sock_destruct) +*(.text.idle_notifier_register) +*(.text.ide_execute_command) +*(.text.ide_end_drive_cmd) +*(.text.__ide_dma_host_on) +*(.text.hrtimer_run_queues) +*(.text.hpet_mask_rtc_irq_bit) +*(.text.__get_zone_counts) +*(.text.get_zone_counts) +*(.text.get_write_access) +*(.text.get_fs_struct) +*(.text.get_dirty_limits) +*(.text.generic_readlink) +*(.text.free_hot_page) +*(.text.finish_wait) +*(.text.find_inode) +*(.text.find_first_bit) +*(.text.__filemap_fdatawrite_range) +*(.text.__filemap_copy_from_user_iovec) +*(.text.exit_aio) +*(.text.elv_set_request) +*(.text.elv_former_request) +*(.text.dup_namespace) +*(.text.dupfd) +*(.text.dummy_socket_getsockopt) +*(.text.dummy_sb_post_mountroot) +*(.text.dummy_quotactl) +*(.text.dummy_inode_rename) +*(.text.__do_SAK) +*(.text.do_pipe) +*(.text.do_fsync) +*(.text.d_instantiate_unique) +*(.text.d_find_alias) +*(.text.deny_write_access) +*(.text.dentry_unhash) +*(.text.d_delete) +*(.text.datagram_poll) +*(.text.cpuset_fork) +*(.text.cpuid_read) +*(.text.copy_namespace) +*(.text.cond_resched) +*(.text.check_version) +*(.text.__change_page_attr) +*(.text.cfq_slab_kill) +*(.text.cfq_completed_request) +*(.text.cdrom_pc_intr) +*(.text.cdrom_decode_status) +*(.text.cap_capset_check) +*(.text.blk_put_request) +*(.text.bio_fs_destructor) +*(.text.bictcp_min_cwnd) +*(.text.alloc_chrdev_region) +*(.text.add_element) +*(.text.acct_update_integrals) +*(.text.write_boundary_block) +*(.text.writeback_release) +*(.text.writeback_inodes) +*(.text.wake_up_state) +*(.text.__wake_up_locked) +*(.text.wake_futex) +*(.text.wait_task_inactive) +*(.text.__wait_on_freeing_inode) +*(.text.wait_noreap_copyout) +*(.text.vmstat_start) +*(.text.vgacon_do_font_op) +*(.text.vfs_readv) +*(.text.vfs_quota_sync) +*(.text.update_queue) +*(.text.unshare_files) +*(.text.unmap_vm_area) +*(.text.unix_socketpair) +*(.text.unix_release_sock) +*(.text.unix_detach_fds) +*(.text.unix_create1) +*(.text.unix_bind) +*(.text.udp_sendmsg) +*(.text.udp_rcv) +*(.text.udp_queue_rcv_skb) +*(.text.uart_write) +*(.text.uart_startup) +*(.text.uart_open) +*(.text.tty_vhangup) +*(.text.tty_termios_baud_rate) +*(.text.tty_release) +*(.text.tty_ldisc_ref) +*(.text.throttle_vm_writeout) +*(.text.058) +*(.text.tcp_xmit_probe_skb) +*(.text.tcp_v4_send_check) +*(.text.tcp_v4_destroy_sock) +*(.text.tcp_sync_mss) +*(.text.tcp_snd_test) +*(.text.tcp_slow_start) +*(.text.tcp_send_fin) +*(.text.tcp_rtt_estimator) +*(.text.tcp_parse_options) +*(.text.tcp_ioctl) +*(.text.tcp_init_tso_segs) +*(.text.tcp_init_cwnd) +*(.text.tcp_getsockopt) +*(.text.tcp_fin) +*(.text.tcp_connect) +*(.text.tcp_cong_avoid) +*(.text.__tcp_checksum_complete_user) +*(.text.task_dumpable) +*(.text.sys_wait4) +*(.text.sys_utimes) +*(.text.sys_symlinkat) +*(.text.sys_socketpair) +*(.text.sys_rmdir) +*(.text.sys_readahead) +*(.text.sys_nanosleep) +*(.text.sys_linkat) +*(.text.sys_fstat) +*(.text.sysfs_readdir) +*(.text.sys_execve) +*(.text.sysenter_tracesys) +*(.text.sys_chown) +*(.text.stub_clone) +*(.text.strrchr) +*(.text.strncpy) +*(.text.stopmachine_set_state) +*(.text.sock_sendmsg) +*(.text.sock_release) +*(.text.sock_fasync) +*(.text.sock_close) +*(.text.sk_stream_write_space) +*(.text.sk_reset_timer) +*(.text.skb_split) +*(.text.skb_recv_datagram) +*(.text.skb_queue_tail) +*(.text.sk_attach_filter) +*(.text.si_swapinfo) +*(.text.simple_strtoll) +*(.text.set_termios) +*(.text.set_task_comm) +*(.text.set_shrinker) +*(.text.set_normalized_timespec) +*(.text.set_brk) +*(.text.serial_in) +*(.text.seq_printf) +*(.text.secure_dccp_sequence_number) +*(.text.rwlock_bug) +*(.text.rt_hash_code) +*(.text.__rta_fill) +*(.text.__request_resource) +*(.text.relocate_new_kernel) +*(.text.release_thread) +*(.text.release_mem) +*(.text.rb_prev) +*(.text.rb_first) +*(.text.random_poll) +*(.text.__put_super_and_need_restart) +*(.text.pty_write) +*(.text.ptrace_stop) +*(.text.proc_self_readlink) +*(.text.proc_root_lookup) +*(.text.proc_root_link) +*(.text.proc_pid_make_inode) +*(.text.proc_pid_attr_write) +*(.text.proc_lookupfd) +*(.text.proc_delete_inode) +*(.text.posix_same_owner) +*(.text.posix_block_lock) +*(.text.poll_initwait) +*(.text.pipe_write) +*(.text.pipe_read_fasync) +*(.text.pipe_ioctl) +*(.text.pdflush) +*(.text.pci_user_read_config_dword) +*(.text.page_readlink) +*(.text.null_lseek) +*(.text.nf_hook_slow) +*(.text.netlink_sock_destruct) +*(.text.netlink_broadcast) +*(.text.neigh_resolve_output) +*(.text.name_to_int) +*(.text.mwait_idle) +*(.text.mutex_trylock) +*(.text.mutex_debug_check_no_locks_held) +*(.text.m_stop) +*(.text.mpage_end_io_write) +*(.text.mpage_alloc) +*(.text.move_page_tables) +*(.text.mounts_open) +*(.text.__memset) +*(.text.memcpy_fromiovec) +*(.text.make_8259A_irq) +*(.text.lookup_user_key_possessed) +*(.text.lookup_create) +*(.text.locks_insert_lock) +*(.text.locks_alloc_lock) +*(.text.kthread_should_stop) +*(.text.kswapd) +*(.text.kobject_uevent) +*(.text.kobject_get_path) +*(.text.kobject_get) +*(.text.klist_children_put) +*(.text.__ip_route_output_key) +*(.text.ip_flush_pending_frames) +*(.text.ip_compute_csum) +*(.text.ip_append_data) +*(.text.ioc_set_batching) +*(.text.invalidate_inode_pages) +*(.text.__invalidate_device) +*(.text.install_arg_page) +*(.text.in_sched_functions) +*(.text.inotify_unmount_inodes) +*(.text.init_once) +*(.text.init_cdrom_command) +*(.text.inet_stream_connect) +*(.text.inet_sk_rebuild_header) +*(.text.inet_csk_addr2sockaddr) +*(.text.inet_create) +*(.text.ifind) +*(.text.ide_setup_dma) +*(.text.ide_outsw) +*(.text.ide_fixstring) +*(.text.ide_dma_setup) +*(.text.ide_cdrom_packet) +*(.text.ide_cd_put) +*(.text.ide_build_sglist) +*(.text.i8259A_shutdown) +*(.text.hung_up_tty_ioctl) +*(.text.hrtimer_nanosleep) +*(.text.hrtimer_init) +*(.text.hrtimer_cancel) +*(.text.hash_futex) +*(.text.group_send_sig_info) +*(.text.grab_cache_page_nowait) +*(.text.get_wchan) +*(.text.get_stack) +*(.text.get_page_state) +*(.text.getnstimeofday) +*(.text.get_node) +*(.text.get_kprobe) +*(.text.generic_unplug_device) +*(.text.free_task) +*(.text.frag_show) +*(.text.find_next_zero_string) +*(.text.filp_open) +*(.text.fillonedir) +*(.text.exit_io_context) +*(.text.exit_idle) +*(.text.exact_lock) +*(.text.eth_header) +*(.text.dummy_unregister_security) +*(.text.dummy_socket_post_create) +*(.text.dummy_socket_listen) +*(.text.dummy_quota_on) +*(.text.dummy_inode_follow_link) +*(.text.dummy_file_receive) +*(.text.dummy_file_mprotect) +*(.text.dummy_file_lock) +*(.text.dummy_file_ioctl) +*(.text.dummy_bprm_post_apply_creds) +*(.text.do_writepages) +*(.text.__down_interruptible) +*(.text.do_notify_resume) +*(.text.do_acct_process) +*(.text.del_timer_sync) +*(.text.default_rebuild_header) +*(.text.d_callback) +*(.text.dcache_readdir) +*(.text.ctrl_dumpfamily) +*(.text.cpuset_rmdir) +*(.text.copy_strings_kernel) +*(.text.con_write_room) +*(.text.complete_all) +*(.text.collect_sigign_sigcatch) +*(.text.clear_user) +*(.text.check_unthrottle) +*(.text.cdrom_release) +*(.text.cdrom_newpc_intr) +*(.text.cdrom_ioctl) +*(.text.cdrom_check_status) +*(.text.cdev_put) +*(.text.cdev_add) +*(.text.cap_ptrace) +*(.text.cap_bprm_secureexec) +*(.text.cache_alloc_refill) +*(.text.bmap) +*(.text.blk_run_queue) +*(.text.blk_queue_dma_alignment) +*(.text.blk_ordered_req_seq) +*(.text.blk_backing_dev_unplug) +*(.text.__bitmap_subset) +*(.text.__bitmap_and) +*(.text.bio_unmap_user) +*(.text.__bforget) +*(.text.bd_forget) +*(.text.bad_pipe_w) +*(.text.bad_get_user) +*(.text.audit_free) +*(.text.anon_vma_ctor) +*(.text.anon_pipe_buf_map) +*(.text.alloc_sock_iocb) +*(.text.alloc_fdset) +*(.text.aio_kick_handler) +*(.text.__add_entropy_words) +*(.text.add_disk_randomness) diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 029be20acce..39ff0708f80 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S @@ -20,7 +20,12 @@ SECTIONS phys_startup_64 = startup_64 - LOAD_OFFSET; _text = .; /* Text and read-only data */ .text : AT(ADDR(.text) - LOAD_OFFSET) { + /* First the code that has to be first for bootstrapping */ *(.bootstrap.text) + /* Then all the functions that are "hot" in profiles, to group them + onto the same hugetlb entry */ + #include "functionlist" + /* Then the rest */ *(.text) SCHED_TEXT LOCK_TEXT -- cgit v1.2.3 From 5b922cd429675059f7a7798f7a0b3898c38dc070 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Sat, 25 Mar 2006 16:30:55 +0100 Subject: [PATCH] x86_64: fix orphaned bits of timer init messages When x86_64 timer init messages were changed to use apic verbosity levels, two messages were missed and one got the wrong level. This causes the last word of a suppressed message to print on a line by itself. Fix that so either the entire message prints or none of it does. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/io_apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index db7db1e3650..77b4c608cca 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1848,7 +1848,7 @@ static inline void check_timer(void) */ setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { - printk("works.\n"); + apic_printk(APIC_VERBOSE," works.\n"); nmi_watchdog_default(); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); @@ -1860,7 +1860,7 @@ static inline void check_timer(void) */ clear_IO_APIC_pin(apic2, pin2); } - printk(" failed.\n"); + apic_printk(APIC_VERBOSE," failed.\n"); if (nmi_watchdog == NMI_IO_APIC) { printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); @@ -1875,7 +1875,7 @@ static inline void check_timer(void) enable_8259A_irq(0); if (timer_irq_works()) { - apic_printk(APIC_QUIET, " works.\n"); + apic_printk(APIC_VERBOSE," works.\n"); return; } apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); -- cgit v1.2.3 From 01d4bed417b5943577e9290fbf672ea9a449dc46 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:58 +0100 Subject: [PATCH] x86_64: Limit max number of CPUs to 255 Because 256 causes overflows in some code that stores them in 8 bit fields and the x86 APIC architecture cannot handle more than 255 anyways. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 24a3016c079..6fc63145719 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -323,7 +323,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID config NR_CPUS int "Maximum number of CPUs (2-256)" - range 2 256 + range 2 255 depends on SMP default "8" help -- cgit v1.2.3 From 7682968b7d4d42bb076051b962c3926b4c98539a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:04 +0100 Subject: [PATCH] x86_64: Change default setting for noexec32 to match i386 kernel This means i386 processes compiled with a recent compiler will get non executable heap by default now. This is the same default as a 32bit PAE kernel would use on a NX enabled CPU. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 356b462cb0e..eabdb63fec3 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -59,7 +59,7 @@ int __init nonx_setup(char *str) } __setup("noexec=", nonx_setup); /* parsed early actually */ -int force_personality32 = READ_IMPLIES_EXEC; +int force_personality32 = 0; /* noexec32=on|off Control non executable heap for 32bit processes. -- cgit v1.2.3 From 267b48014a5c0c2ae90b04dad5d95ceb903365a6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:10 +0100 Subject: [PATCH] x86_64: Try to allocate node memmap near the end of node This fixes problems with very large nodes (over 128GB) filling up all of the first 4GB with their mem_map and not leaving enough space for the swiotlb. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/mm/numa.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index e4b62753a19..07471a3eb19 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -149,7 +149,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en /* Initialize final allocator for a zone */ void __init setup_node_zones(int nodeid) { - unsigned long start_pfn, end_pfn; + unsigned long start_pfn, end_pfn, memmapsize, limit; unsigned long zones[MAX_NR_ZONES]; unsigned long holes[MAX_NR_ZONES]; @@ -159,6 +159,16 @@ void __init setup_node_zones(int nodeid) Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); + /* Try to allocate mem_map at end to not fill up precious <4GB + memory. */ + memmapsize = sizeof(struct page) * (end_pfn-start_pfn); + limit = end_pfn << PAGE_SHIFT; + NODE_DATA(nodeid)->node_mem_map = + __alloc_bootmem_core(NODE_DATA(nodeid)->bdata, + memmapsize, SMP_CACHE_BYTES, + round_down(limit - memmapsize, PAGE_SIZE), + limit); + size_zones(zones, holes, start_pfn, end_pfn); free_area_init_node(nodeid, NODE_DATA(nodeid), zones, start_pfn, holes); -- cgit v1.2.3 From b1fc513d81b0f50f9543428ce95ec63ae70ab184 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sat, 25 Mar 2006 16:31:13 +0100 Subject: [PATCH] x86_64: Use cpumask bitops for cpu_vm_mask cpu_vm_mask is of type cpumask_t, so use the proper bitops. Signed-off-by: Brian Gerst Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 19ef012b1f1..4a6628b14d9 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -75,7 +75,7 @@ static inline void leave_mm(int cpu) { if (read_pda(mmu_state) == TLBSTATE_OK) BUG(); - clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask); + cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); load_cr3(swapper_pg_dir); } @@ -85,7 +85,7 @@ static inline void leave_mm(int cpu) * [cpu0: the cpu that switches] * 1) switch_mm() either 1a) or 1b) * 1a) thread switch to a different mm - * 1a1) clear_bit(cpu, &old_mm->cpu_vm_mask); + * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis * for the wrong mm, and in the worst case we perform a superfluous @@ -95,7 +95,7 @@ static inline void leave_mm(int cpu) * was in lazy tlb mode. * 1a3) update cpu active_mm * Now cpu0 accepts tlb flushes for the new mm. - * 1a4) set_bit(cpu, &new_mm->cpu_vm_mask); + * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask); * Now the other cpus will send tlb flush ipis. * 1a4) change cr3. * 1b) thread switch without mm change -- cgit v1.2.3 From da7ed9f98f6f3f18664f8ab24303f9428b9d78f8 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sat, 25 Mar 2006 16:31:16 +0100 Subject: [PATCH] x86_64: timer interrupt lockup due to pending interrupt o check_timer() routine fails while second kernel is booting after a crash on an opetron box. Problem happens because timer vector (0x31) seems to be locked. o After a system crash, it is not safe to service interrupts any more, hence interrupts are disabled. This leads to pending interrupts at LAPIC. LAPIC sends these interrupts to the CPU during early boot of second kernel. Other pending interrupts are discarded saying unexpected trap but timer interrupt is serviced and CPU does not issue an LAPIC EOI because it think this interrupt came from i8259 and sends ack to 8259. This leads to vector 0x31 locking as LAPIC does not clear respective ISR and keeps on waiting for EOI. o This patch issues extra EOI for the pending interrupts who have ISR set. o Though today only timer seems to be the special case because in early boot it thinks interrupts are coming from i8259 and uses mask_and_ack_8259A() as ack handler and does not issue LAPIC EOI. But probably doing it in generic manner for all vectors makes sense. Signed-off-by: Vivek Goyal Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/apic.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index e5b14c57eaa..d54620147e8 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c @@ -342,6 +342,7 @@ void __init init_bsp_APIC(void) void __cpuinit setup_local_APIC (void) { unsigned int value, maxlvt; + int i, j; value = apic_read(APIC_LVR); @@ -370,6 +371,25 @@ void __cpuinit setup_local_APIC (void) value &= ~APIC_TPRI_MASK; apic_write(APIC_TASKPRI, value); + /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1< Date: Sat, 25 Mar 2006 16:31:19 +0100 Subject: [PATCH] x86_64: free_bootmem_node needs __pa in allocate_aperture free_bootmem_node expects a physical address to be passed in, but __alloc_bootmem_node returns a virtual one. That address needs to be translated to physical. Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/aperture.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index a0f955b9995..15b45582101 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -60,7 +60,7 @@ static u32 __init allocate_aperture(void) printk("Cannot allocate aperture memory hole (%p,%uK)\n", p, aper_size>>10); if (p) - free_bootmem_node(nd0, (unsigned long)p, aper_size); + free_bootmem_node(nd0, __pa(p), aper_size); return 0; } printk("Mapping aperture over %d KB of RAM @ %lx\n", -- cgit v1.2.3 From 9d95dd849ccc43c4b21504e1829b5bed68cdb1bc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:22 +0100 Subject: [PATCH] i386/x86-64: List Intel LaGrange AKA SMX in /proc/cpuinfo Spec just got published so we know the CPUID bit. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 8acf5313e83..db70090a08f 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1280,7 +1280,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -- cgit v1.2.3 From ba22f13563de5773701fc318ccaaa37b1fb6d294 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:31 +0100 Subject: [PATCH] x86_64: Remove CONFIG_UNORDERED_IO It was a failed experiment - all benchmarks done with it on both AMD and Intel showed it was a loss. That was probably because the store buffers of the CPUs for write combining traffic weren't large enough. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 6fc63145719..31bab721cb7 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -520,16 +520,6 @@ config PCI_MMCONFIG bool "Support mmconfig PCI config space access" depends on PCI && ACPI -config UNORDERED_IO - bool "Unordered IO mapping access" - depends on EXPERIMENTAL - help - Use unordered stores to access IO memory mappings in device drivers. - Still very experimental. When a driver works on IA64/ppc64/pa-risc it should - work with this option, but it makes the drivers behave differently - from i386. Requires that the driver writer used memory barriers - properly. - source "drivers/pci/pcie/Kconfig" source "drivers/pci/Kconfig" -- cgit v1.2.3 From 5d05f4de414c98348219b633401ad9c9a5348a8b Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Sat, 25 Mar 2006 16:31:34 +0100 Subject: [PATCH] x86_64: Make GART_IOMMU kconfig help text more specific (trivial) Have the GART_IOMMU help text specify that this is the hardware IOMMU in amd64 processors. This will be significant if/when other IOMMUs are added to the x86-64 architecture. :-) Also, note that the previous help text stated that IOMMU was needed for >3GB memory instead of >4GB. This is fixed in the newer version. Signed-off-by: Jon Mason Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 31bab721cb7..6420baeb8c1 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -364,13 +364,15 @@ config GART_IOMMU select SWIOTLB depends on PCI help - Support the IOMMU. Needed to run systems with more than 3GB of memory - properly with 32-bit PCI devices that do not support DAC (Double Address - Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. - Normally the kernel will take the right choice by itself. - This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU - and a software emulation used on other systems. - If unsure, say Y. + Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors + and for the bounce buffering software IOMMU. + Needed to run systems with more than 3GB of memory properly with + 32-bit PCI devices that do not support DAC (Double Address Cycle). + The IOMMU can be turned off at runtime with the iommu=off parameter. + Normally the kernel will take the right choice by itself. + This option includes a driver for the AMD Opteron/Athlon64 IOMMU + northbridge and a software emulation used on other systems without + hardware IOMMU. If unsure, say Y. # need this always enabled with GART_IOMMU for the VIA workaround config SWIOTLB -- cgit v1.2.3 From 9c01dda02f417270e733e38effe1084c77ae107d Mon Sep 17 00:00:00 2001 From: Navin Boppuri Date: Sat, 25 Mar 2006 16:31:40 +0100 Subject: [PATCH] x86_64: Search K8 devices on more devices. arch/x86_64/kernel/aperture.c: The search for the AGP bridge has been extended to search for all the 256 buses instead of the first 32. This is required since on a some systems, the bridge may be located on a bus much farther than the first 32. By searching all 256 buses, we guarantee that the search succeeds on such systems. arch/x86_64/kernel/pci-gart.c: The search for the Northbridge is not limited to just bus 0 anymore. This is required because on certain systems, we may not find one on bus 0. Signed-off-by: Navin Boppuri Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/aperture.c | 2 +- arch/x86_64/kernel/pci-gart.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 15b45582101..fffd6b0a2fa 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -161,7 +161,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) int num, slot, func; /* Poor man's PCI discovery */ - for (num = 0; num < 32; num++) { + for (num = 0; num < 256; num++) { for (slot = 0; slot < 32; slot++) { for (func = 0; func < 8; func++) { u32 class, cap; diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index ebd125e968b..a6c01e12126 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -65,9 +65,7 @@ static u32 gart_unmapped_entry; #define for_all_nb(dev) \ dev = NULL; \ - while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\ - if (dev->bus->number == 0 && \ - (PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31)) + while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL) static struct pci_dev *northbridges[MAX_NB]; static u32 northbridge_flush_word[MAX_NB]; -- cgit v1.2.3 From 40caa884650fc6931cf55918dbf7496c49b3ddf8 Mon Sep 17 00:00:00 2001 From: Kevin Winchester Date: Sat, 25 Mar 2006 16:31:43 +0100 Subject: [PATCH] x86_64: Eliminate register_die_notifier symbol exported register_die_notifier is exported twice, once in traps.c and once in x8664_ksyms.c. This results in a warning on build. Signed-off-by: Kevin Winchester Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/x8664_ksyms.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 365e5fbae9a..d96a9348e5a 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c @@ -144,7 +144,6 @@ EXPORT_SYMBOL(rwsem_down_write_failed_thunk); EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(die_chain); -EXPORT_SYMBOL(register_die_notifier); #ifdef CONFIG_SMP EXPORT_SYMBOL(cpu_sibling_map); -- cgit v1.2.3 From dcf36bfa5de6d4e37878d4c98b6986fee4eb8b4c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 25 Mar 2006 16:31:46 +0100 Subject: [PATCH] x86_64: group memnodemap and memnodeshift in a memnode structure pfn_to_page() and others need to access both memnode_shift and the very first bytes of memnodemap[]. If we force memnode_shift to be just before the memnodemap array, we can reduce the memory footprint to one cache line instead of two for most setups. This patch introduce a 'memnode' structure where shift and map[] are carefully placed. Signed-off-by: Eric Dumazet Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/mm/numa.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 07471a3eb19..63c72641b73 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -25,8 +25,7 @@ struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; bootmem_data_t plat_node_bdata[MAX_NUMNODES]; -int memnode_shift; -u8 memnodemap[NODEMAPSIZE]; +struct memnode memnode; unsigned char cpu_to_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = NUMA_NO_NODE @@ -367,8 +366,7 @@ void __init init_cpu_to_node(void) EXPORT_SYMBOL(cpu_to_node); EXPORT_SYMBOL(node_to_cpumask); -EXPORT_SYMBOL(memnode_shift); -EXPORT_SYMBOL(memnodemap); +EXPORT_SYMBOL(memnode); EXPORT_SYMBOL(node_data); #ifdef CONFIG_DISCONTIGMEM -- cgit v1.2.3 From 0085979006fd55ec7c2f721bdaa9af130a08d62a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:49 +0100 Subject: [PATCH] x86_64: Remove bogus special case in AMD core parsing. No need to restrict to power of two here. TBD needs more double checking Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index db70090a08f..a57eec8311a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -946,8 +946,6 @@ static int __init init_amd(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000008) { c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_max_cores & (c->x86_max_cores - 1)) - c->x86_max_cores = 1; amd_detect_cmp(c); } -- cgit v1.2.3 From 3076a492a5e8dd624f237886646b35d12193502d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:55 +0100 Subject: [PATCH] x86_64: Report SIGSEGV for IRET faults tcsh is not happy with the -9999 error code. Suggested by Ernie Petrides Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7c10e9009d6..8538bfea30e 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -553,7 +553,7 @@ iret_label: /* force a signal here? this matches i386 behaviour */ /* running with kernel gs */ bad_iret: - movq $-9999,%rdi /* better code? */ + movq $11,%rdi /* SIGSEGV */ sti jmp do_exit .previous -- cgit v1.2.3 From c36cd16f78a5dd740a619ef8445e35a73484d58b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:32:04 +0100 Subject: [PATCH] x86_64: Add cpu_relax() to busy loops in PM timer code Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/pmtimer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/pmtimer.c b/arch/x86_64/kernel/pmtimer.c index 5c51d10408a..ee5ee4891f3 100644 --- a/arch/x86_64/kernel/pmtimer.c +++ b/arch/x86_64/kernel/pmtimer.c @@ -86,7 +86,7 @@ static unsigned pmtimer_wait_tick(void) for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK; a == b; b = inl(pmtmr_ioport) & ACPI_PM_MASK) - ; + cpu_relax(); return b; } @@ -97,6 +97,7 @@ void pmtimer_wait(unsigned us) a = pmtimer_wait_tick(); do { b = inl(pmtmr_ioport); + cpu_relax(); } while (cyc2us(b - a) < us); } -- cgit v1.2.3 From 841b8a46bffec232377d2de157f971e812be4fe4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sun, 26 Mar 2006 01:36:59 -0800 Subject: [PATCH] x86: "make isoimage" support; FDINITRD= support; minor cleanups Add a "make isoimage" to i386 and x86-64, which allows the automatic creation of a bootable CD image. It also adds an option FDINITRD= to include an initrd of the user's choice in generated floppy- or CD boot images. Finally, some minor cleanups of the image generation code. Signed-off-by: H. Peter Anvin Cc: Andi Kleen Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Makefile | 17 +++++++++++------ arch/x86_64/boot/Makefile | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 10 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 0fbc0283609..585fd4a559c 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile @@ -70,7 +70,7 @@ drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/ boot := arch/x86_64/boot PHONY += bzImage bzlilo install archmrproper \ - fdimage fdimage144 fdimage288 archclean + fdimage fdimage144 fdimage288 isoimage archclean #Default target when executing "make" all: bzImage @@ -87,7 +87,7 @@ bzlilo: vmlinux bzdisk: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk -fdimage fdimage144 fdimage288: vmlinux +fdimage fdimage144 fdimage288 isoimage: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ install: @@ -99,11 +99,16 @@ archclean: define archhelp echo '* bzImage - Compressed kernel image (arch/$(ARCH)/boot/bzImage)' echo ' install - Install kernel using' - echo ' (your) ~/bin/installkernel or' - echo ' (distribution) /sbin/installkernel or' - echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' (your) ~/bin/installkernel or' + echo ' (distribution) /sbin/installkernel or' + echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' bzdisk - Create a boot floppy in /dev/fd0' + echo ' fdimage - Create a boot floppy image' + echo ' isoimage - Create a boot CD-ROM image' endef -CLEAN_FILES += arch/$(ARCH)/boot/fdimage arch/$(ARCH)/boot/mtools.conf +CLEAN_FILES += arch/$(ARCH)/boot/fdimage \ + arch/$(ARCH)/boot/image.iso \ + arch/$(ARCH)/boot/mtools.conf diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index 29f8396ed15..43ee6c50c27 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile @@ -60,8 +60,12 @@ $(obj)/setup $(obj)/bootsect: %: %.o FORCE $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ -# Set this if you want to pass append arguments to the zdisk/fdimage kernel +# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel FDARGS = +# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel +FDINITRD = + +image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ @@ -70,8 +74,11 @@ $(obj)/mtools.conf: $(src)/mtools.conf.in zdisk: $(BOOTIMAGE) $(obj)/mtools.conf MTOOLSRC=$(obj)/mtools.conf mformat a: ; sync syslinux /dev/fd0 ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - a:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux ; sync # These require being root or having syslinux 2.02 or higher installed @@ -79,18 +86,39 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440 MTOOLSRC=$(obj)/mtools.conf mformat v: ; sync syslinux $(obj)/fdimage ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - v:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux ; sync fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880 MTOOLSRC=$(obj)/mtools.conf mformat w: ; sync syslinux $(obj)/fdimage ; sync - echo 'default linux $(FDARGS)' | \ + echo '$(image_cmdline)' | \ MTOOLSRC=$(obj)/mtools.conf mcopy - w:syslinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \ + fi MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux ; sync +isoimage: $(BOOTIMAGE) + -rm -rf $(obj)/isoimage + mkdir $(obj)/isoimage + cp `echo /usr/lib*/syslinux/isolinux.bin | awk '{ print $1; }'` \ + $(obj)/isoimage + cp $(BOOTIMAGE) $(obj)/isoimage/linux + echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg + if [ -f '$(FDINITRD)' ] ; then \ + cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \ + fi + mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ + -no-emul-boot -boot-load-size 4 -boot-info-table \ + $(obj)/isoimage + rm -rf $(obj)/isoimage + zlilo: $(BOOTIMAGE) if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi -- cgit v1.2.3 From 88959ea968709c35e8b979ac9f5a398fa748091a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 26 Mar 2006 01:37:27 -0800 Subject: [PATCH] create struct compat_timex and use it everywhere We had a copy of the compatibility version of struct timex in each 64 bit architecture. This patch just creates a global one and replaces all the usages of the old ones. Signed-off-by: Stephen Rothwell Cc: Arnd Bergmann Acked-by: Kyle McMartin Acked-by: Tony Luck Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/sys_ia32.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 2b2d029f477..b13121e451a 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -769,30 +769,17 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) /* Handle adjtimex compatibility. */ -struct timex32 { - u32 modes; - s32 offset, freq, maxerror, esterror; - s32 status, constant, precision, tolerance; - struct compat_timeval time; - s32 tick; - s32 ppsfreq, jitter, shift, stabil; - s32 jitcnt, calcnt, errcnt, stbcnt; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; - s32 :32; s32 :32; s32 :32; s32 :32; -}; - extern int do_adjtimex(struct timex *); asmlinkage long -sys32_adjtimex(struct timex32 __user *utp) +sys32_adjtimex(struct compat_timex __user *utp) { struct timex txc; int ret; memset(&txc, 0, sizeof(struct timex)); - if (!access_ok(VERIFY_READ, utp, sizeof(struct timex32)) || + if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || __get_user(txc.modes, &utp->modes) || __get_user(txc.offset, &utp->offset) || __get_user(txc.freq, &utp->freq) || @@ -817,7 +804,7 @@ sys32_adjtimex(struct timex32 __user *utp) ret = do_adjtimex(&txc); - if (!access_ok(VERIFY_WRITE, utp, sizeof(struct timex32)) || + if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || __put_user(txc.modes, &utp->modes) || __put_user(txc.offset, &utp->offset) || __put_user(txc.freq, &utp->freq) || -- cgit v1.2.3 From 3158e9411a66fb98d495ac441c242264f31aaf3e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sun, 26 Mar 2006 01:37:29 -0800 Subject: [PATCH] consolidate sys32/compat_adjtimex Create compat_sys_adjtimex and use it an all appropriate places. Signed-off-by: Stephen Rothwell Cc: Arnd Bergmann Acked-by: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/ia32entry.S | 2 +- arch/x86_64/ia32/sys_ia32.c | 64 -------------------------------------------- 2 files changed, 1 insertion(+), 65 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 00dee176c08..7549a4389fb 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -501,7 +501,7 @@ ia32_sys_call_table: .quad sys_setdomainname .quad sys_uname .quad sys_modify_ldt - .quad sys32_adjtimex + .quad compat_sys_adjtimex .quad sys32_mprotect /* 125 */ .quad compat_sys_sigprocmask .quad quiet_ni_syscall /* create_module */ diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index b13121e451a..f182b20858e 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -767,69 +766,6 @@ sys32_sendfile(int out_fd, int in_fd, compat_off_t __user *offset, s32 count) return ret; } -/* Handle adjtimex compatibility. */ - -extern int do_adjtimex(struct timex *); - -asmlinkage long -sys32_adjtimex(struct compat_timex __user *utp) -{ - struct timex txc; - int ret; - - memset(&txc, 0, sizeof(struct timex)); - - if (!access_ok(VERIFY_READ, utp, sizeof(struct compat_timex)) || - __get_user(txc.modes, &utp->modes) || - __get_user(txc.offset, &utp->offset) || - __get_user(txc.freq, &utp->freq) || - __get_user(txc.maxerror, &utp->maxerror) || - __get_user(txc.esterror, &utp->esterror) || - __get_user(txc.status, &utp->status) || - __get_user(txc.constant, &utp->constant) || - __get_user(txc.precision, &utp->precision) || - __get_user(txc.tolerance, &utp->tolerance) || - __get_user(txc.time.tv_sec, &utp->time.tv_sec) || - __get_user(txc.time.tv_usec, &utp->time.tv_usec) || - __get_user(txc.tick, &utp->tick) || - __get_user(txc.ppsfreq, &utp->ppsfreq) || - __get_user(txc.jitter, &utp->jitter) || - __get_user(txc.shift, &utp->shift) || - __get_user(txc.stabil, &utp->stabil) || - __get_user(txc.jitcnt, &utp->jitcnt) || - __get_user(txc.calcnt, &utp->calcnt) || - __get_user(txc.errcnt, &utp->errcnt) || - __get_user(txc.stbcnt, &utp->stbcnt)) - return -EFAULT; - - ret = do_adjtimex(&txc); - - if (!access_ok(VERIFY_WRITE, utp, sizeof(struct compat_timex)) || - __put_user(txc.modes, &utp->modes) || - __put_user(txc.offset, &utp->offset) || - __put_user(txc.freq, &utp->freq) || - __put_user(txc.maxerror, &utp->maxerror) || - __put_user(txc.esterror, &utp->esterror) || - __put_user(txc.status, &utp->status) || - __put_user(txc.constant, &utp->constant) || - __put_user(txc.precision, &utp->precision) || - __put_user(txc.tolerance, &utp->tolerance) || - __put_user(txc.time.tv_sec, &utp->time.tv_sec) || - __put_user(txc.time.tv_usec, &utp->time.tv_usec) || - __put_user(txc.tick, &utp->tick) || - __put_user(txc.ppsfreq, &utp->ppsfreq) || - __put_user(txc.jitter, &utp->jitter) || - __put_user(txc.shift, &utp->shift) || - __put_user(txc.stabil, &utp->stabil) || - __put_user(txc.jitcnt, &utp->jitcnt) || - __put_user(txc.calcnt, &utp->calcnt) || - __put_user(txc.errcnt, &utp->errcnt) || - __put_user(txc.stbcnt, &utp->stbcnt)) - ret = -EFAULT; - - return ret; -} - asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) -- cgit v1.2.3 From c6fd91f0bdcd294a0ae0ba2b2a7f7456ef4b7144 Mon Sep 17 00:00:00 2001 From: bibo mao Date: Sun, 26 Mar 2006 01:38:20 -0800 Subject: [PATCH] kretprobe instance recycled by parent process When kretprobe probes the schedule() function, if the probed process exits then schedule() will never return, so some kretprobe instances will never be recycled. In this patch the parent process will recycle retprobe instances of the probed function and there will be no memory leak of kretprobe instances. Signed-off-by: bibo mao Cc: Masami Hiramatsu Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 81111835722..0370720515f 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -35,8 +35,8 @@ #include #include #include -#include #include +#include #include #include @@ -353,13 +353,6 @@ void exit_thread(void) struct task_struct *me = current; struct thread_struct *t = &me->thread; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(me); - if (me->thread.io_bitmap_ptr) { struct tss_struct *tss = &per_cpu(init_tss, get_cpu()); -- cgit v1.2.3 From 2326c77017c79fd6d55c69d8a49a57a252921bcd Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Sun, 26 Mar 2006 01:38:21 -0800 Subject: [PATCH] kprobe handler: discard user space trap Currently kprobe handler traps only happen in kernel space, so function kprobe_exceptions_notify should skip traps which happen in user space. This patch modifies this, and it is based on 2.6.16-rc4. Signed-off-by: bibo mao Cc: Ananth N Mavinakayanahalli Cc: "Keshavamurthy, Anil S" Cc: Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 14f0ced613b..218e015c319 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -601,6 +601,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) -- cgit v1.2.3 From c28f896634f2c931a298490deab3861ab117716a Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Sun, 26 Mar 2006 01:38:23 -0800 Subject: [PATCH] kprobes: fix broken fault handling for x86_64 Provide proper kprobes fault handling, if a user-specified pre/post handlers tries to access user address space, through copy_from_user(), get_user() etc. The user-specified fault handler gets called only if the fault occurs while executing user-specified handlers. In such a case user-specified handler is allowed to fix it first, later if the user-specifed fault handler does not fix it, we try to fix it by calling fix_exception(). The user-specified handler will not be called if the fault happens when single stepping the original instruction, instead we reset the current probe and allow the system page fault handler to fix it up. Signed-off-by: Prasanna S Panchamukhi Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 62 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 7 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index 218e015c319..accbff3fec4 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -37,10 +37,12 @@ #include #include #include +#include #include #include #include +#include void jprobe_return_end(void); static void __kprobes arch_copy_kprobe(struct kprobe *p); @@ -578,16 +580,62 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + const struct exception_table_entry *fixup; - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs, kcb); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the rip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->rip = (unsigned long)cur->addr; regs->eflags |= kcb->kprobe_old_rflags; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + fixup = search_exception_tables(regs->rip); + if (fixup) { + regs->rip = fixup->fixup; + return 1; + } + + /* + * fixup() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } -- cgit v1.2.3 From f33e2fbacce8008984db99c45120db31081577c5 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 26 Mar 2006 01:39:42 -0800 Subject: [PATCH] bitops: x86_64: use generic bitops - remove sched_find_first_bit() - remove generic_hweight{64,32,16,8}() - remove ext2_{set,clear,test,find_first_zero,find_next_zero}_bit() - remove minix_{test,set,test_and_clear,test,find_first_zero}_bit() Signed-off-by: Akinobu Mita Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 6420baeb8c1..45efe0ca88f 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -45,6 +45,10 @@ config RWSEM_GENERIC_SPINLOCK config RWSEM_XCHGADD_ALGORITHM bool +config GENERIC_HWEIGHT + bool + default y + config GENERIC_CALIBRATE_DELAY bool default y -- cgit v1.2.3 From 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 27 Mar 2006 01:15:22 -0800 Subject: [PATCH] sched: new sched domain for representing multi-core Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 9 +++++++++ arch/x86_64/kernel/setup.c | 3 ++- arch/x86_64/kernel/smpboot.c | 24 ++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 45efe0ca88f..1cb4aa241c8 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -250,6 +250,15 @@ config SCHED_SMT cost of slightly increased overhead in some places. If unsure say N here. +config SCHED_MC + bool "Multi-core scheduler support" + depends on SMP + default y + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + source "kernel/Kconfig.preempt" config NUMA diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index a57eec8311a..d1f3e9272c0 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -962,7 +962,6 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id(0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -1171,6 +1170,8 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[2] = cpuid_edx(0x80860001); } + c->apicid = phys_pkg_id(0); + /* * Vendor-specific initialization. In this section we * canonicalize the feature flags, meaning if there are diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 66e98659d07..ea48fa63807 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -68,6 +68,9 @@ u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; /* core ID of each logical CPU */ u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; +/* Last level cache ID of each logical CPU */ +u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* Bitmask of currently online CPUs */ cpumask_t cpu_online_map __read_mostly; @@ -445,6 +448,18 @@ void __cpuinit smp_callin(void) cpu_set(cpuid, cpu_callin_map); } +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -463,12 +478,16 @@ static inline void set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -476,6 +495,11 @@ static inline void set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); -- cgit v1.2.3 From dc8ecb43701a78bd3c38e7fed1d1c76840579450 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:34 -0800 Subject: [PATCH] unify pfn_to_page: x86_64 pfn_to_page x86_64 can use generic funcs. For DISCONTIGMEM, CONFIG_OUT_OF_LINE_PFN_TO_PAGE is selected. Signed-off-by: KAMEZAWA Hiroyuki Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig | 4 ++++ arch/x86_64/mm/numa.c | 15 --------------- 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 1cb4aa241c8..4310b4a311a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -334,6 +334,10 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool y depends on NUMA +config OUT_OF_LINE_PFN_TO_PAGE + def_bool y + depends on DISCONTIGMEM + config NR_CPUS int "Maximum number of CPUs (2-256)" range 2 255 diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 63c72641b73..4be82d6e2b4 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -377,21 +377,6 @@ EXPORT_SYMBOL(node_data); * Should do that. */ -/* Requires pfn_valid(pfn) to be true */ -struct page *pfn_to_page(unsigned long pfn) -{ - int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); - return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; -} -EXPORT_SYMBOL(pfn_to_page); - -unsigned long page_to_pfn(struct page *page) -{ - return (long)(((page) - page_zone(page)->zone_mem_map) + - page_zone(page)->zone_start_pfn); -} -EXPORT_SYMBOL(page_to_pfn); - int pfn_valid(unsigned long pfn) { unsigned nid; -- cgit v1.2.3 From ec936fc563715a9e2b2e363eb060655b49529325 Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 27 Mar 2006 01:15:59 -0800 Subject: [PATCH] for_each_online_pgdat: renaming for_each_pgdat Replace for_each_pgdat() with for_each_online_pgdat(). Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index b0441562544..e5f7f1c3446 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -72,7 +72,7 @@ void show_mem(void) show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); - for_each_pgdat(pgdat) { + for_each_online_pgdat(pgdat) { for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pfn_to_page(pgdat->node_start_pfn + i); total++; -- cgit v1.2.3 From 8fdd6c6df7889dc89df3d9fe0f5bbe6733e39f48 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:26 -0800 Subject: [PATCH] lightweight robust futexes: x86_64 x86_64: add the futex_atomic_cmpxchg_inuser() assembly implementation, and wire up the new syscalls. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Acked-by: Ulrich Drepper Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/ia32entry.S | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 7549a4389fb..35b2faccdc6 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -688,6 +688,8 @@ ia32_sys_call_table: .quad sys_ni_syscall /* pselect6 for now */ .quad sys_ni_syscall /* ppoll for now */ .quad sys_unshare /* 310 */ + .quad compat_sys_set_robust_list + .quad compat_sys_get_robust_list ia32_syscall_end: .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 .quad ni_syscall -- cgit v1.2.3 From e041c683412d5bf44dc2b109053e3b837b71742d Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 27 Mar 2006 01:16:30 -0800 Subject: [PATCH] Notifier chain update: API changes The kernel's implementation of notifier chains is unsafe. There is no protection against entries being added to or removed from a chain while the chain is in use. The issues were discussed in this thread: http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 We noticed that notifier chains in the kernel fall into two basic usage classes: "Blocking" chains are always called from a process context and the callout routines are allowed to sleep; "Atomic" chains can be called from an atomic context and the callout routines are not allowed to sleep. We decided to codify this distinction and make it part of the API. Therefore this set of patches introduces three new, parallel APIs: one for blocking notifiers, one for atomic notifiers, and one for "raw" notifiers (which is really just the old API under a new name). New kinds of data structures are used for the heads of the chains, and new routines are defined for registration, unregistration, and calling a chain. The three APIs are explained in include/linux/notifier.h and their implementation is in kernel/sys.c. With atomic and blocking chains, the implementation guarantees that the chain links will not be corrupted and that chain callers will not get messed up by entries being added or removed. For raw chains the implementation provides no guarantees at all; users of this API must provide their own protections. (The idea was that situations may come up where the assumptions of the atomic and blocking APIs are not appropriate, so it should be possible for users to handle these things in their own way.) There are some limitations, which should not be too hard to live with. For atomic/blocking chains, registration and unregistration must always be done in a process context since the chain is protected by a mutex/rwsem. Also, a callout routine for a non-raw chain must not try to register or unregister entries on its own chain. (This did happen in a couple of places and the code had to be changed to avoid it.) Since atomic chains may be called from within an NMI handler, they cannot use spinlocks for synchronization. Instead we use RCU. The overhead falls almost entirely in the unregister routine, which is okay since unregistration is much less frequent that calling a chain. Here is the list of chains that we adjusted and their classifications. None of them use the raw API, so for the moment it is only a placeholder. ATOMIC CHAINS ------------- arch/i386/kernel/traps.c: i386die_chain arch/ia64/kernel/traps.c: ia64die_chain arch/powerpc/kernel/traps.c: powerpc_die_chain arch/sparc64/kernel/traps.c: sparc64die_chain arch/x86_64/kernel/traps.c: die_chain drivers/char/ipmi/ipmi_si_intf.c: xaction_notifier_list kernel/panic.c: panic_notifier_list kernel/profile.c: task_free_notifier net/bluetooth/hci_core.c: hci_notifier net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_chain net/ipv4/netfilter/ip_conntrack_core.c: ip_conntrack_expect_chain net/ipv6/addrconf.c: inet6addr_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_chain net/netfilter/nf_conntrack_core.c: nf_conntrack_expect_chain net/netlink/af_netlink.c: netlink_chain BLOCKING CHAINS --------------- arch/powerpc/platforms/pseries/reconfig.c: pSeries_reconfig_chain arch/s390/kernel/process.c: idle_chain arch/x86_64/kernel/process.c idle_notifier drivers/base/memory.c: memory_chain drivers/cpufreq/cpufreq.c cpufreq_policy_notifier_list drivers/cpufreq/cpufreq.c cpufreq_transition_notifier_list drivers/macintosh/adb.c: adb_client_list drivers/macintosh/via-pmu.c sleep_notifier_list drivers/macintosh/via-pmu68k.c sleep_notifier_list drivers/macintosh/windfarm_core.c wf_client_list drivers/usb/core/notify.c usb_notifier_list drivers/video/fbmem.c fb_notifier_list kernel/cpu.c cpu_chain kernel/module.c module_notify_list kernel/profile.c munmap_notifier kernel/profile.c task_exit_notifier kernel/sys.c reboot_notifier_list net/core/dev.c netdev_chain net/decnet/dn_dev.c: dnaddr_chain net/ipv4/devinet.c: inetaddr_chain It's possible that some of these classifications are wrong. If they are, please let us know or submit a patch to fix them. Note that any chain that gets called very frequently should be atomic, because the rwsem read-locking used for blocking chains is very likely to incur cache misses on SMP systems. (However, if the chain's callout routines may sleep then the chain cannot be atomic.) The patch set was written by Alan Stern and Chandra Seetharaman, incorporating material written by Keith Owens and suggestions from Paul McKenney and Andrew Morton. [jes@sgi.com: restructure the notifier chain initialization macros] Signed-off-by: Alan Stern Signed-off-by: Chandra Seetharaman Signed-off-by: Jes Sorensen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 17 +++++------------ arch/x86_64/kernel/traps.c | 18 +++++++++--------- 2 files changed, 14 insertions(+), 21 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 0370720515f..70dd8e5c688 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -66,24 +66,17 @@ EXPORT_SYMBOL(boot_option_idle_override); void (*pm_idle)(void); static DEFINE_PER_CPU(unsigned int, cpu_idle_state); -static struct notifier_block *idle_notifier; -static DEFINE_SPINLOCK(idle_notifier_lock); +static ATOMIC_NOTIFIER_HEAD(idle_notifier); void idle_notifier_register(struct notifier_block *n) { - unsigned long flags; - spin_lock_irqsave(&idle_notifier_lock, flags); - notifier_chain_register(&idle_notifier, n); - spin_unlock_irqrestore(&idle_notifier_lock, flags); + atomic_notifier_chain_register(&idle_notifier, n); } EXPORT_SYMBOL_GPL(idle_notifier_register); void idle_notifier_unregister(struct notifier_block *n) { - unsigned long flags; - spin_lock_irqsave(&idle_notifier_lock, flags); - notifier_chain_unregister(&idle_notifier, n); - spin_unlock_irqrestore(&idle_notifier_lock, flags); + atomic_notifier_chain_unregister(&idle_notifier, n); } EXPORT_SYMBOL(idle_notifier_unregister); @@ -93,13 +86,13 @@ static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; void enter_idle(void) { __get_cpu_var(idle_state) = CPU_IDLE; - notifier_call_chain(&idle_notifier, IDLE_START, NULL); + atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); } static void __exit_idle(void) { __get_cpu_var(idle_state) = CPU_NOT_IDLE; - notifier_call_chain(&idle_notifier, IDLE_END, NULL); + atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); } /* Called from interrupts to signify idle end */ diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 7b148309c52..edaa9fe654d 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -69,20 +69,20 @@ asmlinkage void alignment_check(void); asmlinkage void machine_check(void); asmlinkage void spurious_interrupt_bug(void); -struct notifier_block *die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - vmalloc_sync_all(); - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + return atomic_notifier_chain_register(&die_chain, nb); +} +EXPORT_SYMBOL(register_die_notifier); + +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&die_chain, nb); } +EXPORT_SYMBOL(unregister_die_notifier); static inline void conditional_sti(struct pt_regs *regs) { -- cgit v1.2.3 From 641f71f5f6ed251959ef8f88b1d0edc6ef7a4632 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Tue, 28 Mar 2006 01:56:01 -0800 Subject: [PATCH] RTC: Remove RTC UIP synchronization on x86_64 Signed-off-by: Matt Mackall Cc: Andi Kleen Cc: Alessandro Zummo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 41 ++++++++++++----------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 7f58fa68249..473b514b66e 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -504,42 +504,25 @@ unsigned long long sched_clock(void) static unsigned long get_cmos_time(void) { - unsigned int timeout = 1000000, year, mon, day, hour, min, sec; - unsigned char uip = 0, this = 0; + unsigned int year, mon, day, hour, min, sec; unsigned long flags; unsigned extyear = 0; -/* - * The Linux interpretation of the CMOS clock register contents: When the - * Update-In-Progress (UIP) flag goes from 1 to 0, the RTC registers show the - * second which has precisely just started. Waiting for this can take up to 1 - * second, we timeout approximately after 2.4 seconds on a machine with - * standard 8.3 MHz ISA bus. - */ - spin_lock_irqsave(&rtc_lock, flags); - while (timeout && (!uip || this)) { - uip |= this; - this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; - timeout--; - } - - /* - * Here we are safe to assume the registers won't change for a whole - * second, so we just go ahead and read them. - */ - sec = CMOS_READ(RTC_SECONDS); - min = CMOS_READ(RTC_MINUTES); - hour = CMOS_READ(RTC_HOURS); - day = CMOS_READ(RTC_DAY_OF_MONTH); - mon = CMOS_READ(RTC_MONTH); - year = CMOS_READ(RTC_YEAR); - + do { + sec = CMOS_READ(RTC_SECONDS); + min = CMOS_READ(RTC_MINUTES); + hour = CMOS_READ(RTC_HOURS); + day = CMOS_READ(RTC_DAY_OF_MONTH); + mon = CMOS_READ(RTC_MONTH); + year = CMOS_READ(RTC_YEAR); #ifdef CONFIG_ACPI - if (acpi_fadt.revision >= FADT2_REVISION_ID && acpi_fadt.century) - extyear = CMOS_READ(acpi_fadt.century); + if (acpi_fadt.revision >= FADT2_REVISION_ID && + acpi_fadt.century) + extyear = CMOS_READ(acpi_fadt.century); #endif + } while (sec != CMOS_READ(RTC_SECONDS)); spin_unlock_irqrestore(&rtc_lock, flags); -- cgit v1.2.3