From c4bce90ea2069e5a87beac806de3090ab32128d5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Feb 2006 21:57:54 -0800 Subject: [SPARC64]: Deal with PTE layout differences in SUN4V. Yes, you heard it right, they changed the PTE layout for SUN4V. Ho hum... This is the simple and inefficient way to support this. It'll get optimized, don't worry. Signed-off-by: David S. Miller --- arch/sparc64/mm/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/sparc64/mm/fault.c') diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 6f0539aa44d..439a53c1e56 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -137,7 +137,7 @@ static unsigned int get_user_insn(unsigned long tpc) if (!pte_present(pte)) goto out; - pa = (pte_val(pte) & _PAGE_PADDR); + pa = (pte_pfn(pte) << PAGE_SHIFT); pa += (tpc & ~PAGE_MASK); /* Use phys bypass so we don't pollute dtlb/dcache. */ -- cgit v1.2.3 From bf941d6cd62aa2022f0887e25e3d02c389b0bf9b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 13 Feb 2006 18:07:45 -0800 Subject: [SPARC64]: Log faulting vaddr when bogus kernel PC detected. Signed-off-by: David S. Miller --- arch/sparc64/mm/fault.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/sparc64/mm/fault.c') diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index 439a53c1e56..b97bd054aad 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -91,12 +91,13 @@ static void __kprobes unhandled_fault(unsigned long address, die_if_kernel("Oops", regs); } -static void bad_kernel_pc(struct pt_regs *regs) +static void bad_kernel_pc(struct pt_regs *regs, unsigned long vaddr) { unsigned long *ksp; printk(KERN_CRIT "OOPS: Bogus kernel PC [%016lx] in fault handler\n", regs->tpc); + printk(KERN_CRIT "OOPS: Fault was to vaddr[%lx]\n", vaddr); __asm__("mov %%sp, %0" : "=r" (ksp)); show_stack(current, ksp); unhandled_fault(regs->tpc, current, regs); @@ -280,7 +281,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) (tpc >= MODULES_VADDR && tpc < MODULES_END)) { /* Valid, no problems... */ } else { - bad_kernel_pc(regs); + bad_kernel_pc(regs, address); return; } } -- cgit v1.2.3 From 7a1ac5264108fc3ed22d17a3cdd76212ed1666d1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 16 Mar 2006 02:02:32 -0800 Subject: [SPARC64]: Fix and re-enable dynamic TSB sizing. This is good for up to %50 performance improvement of some test cases. The problem has been the race conditions, and hopefully I've plugged them all up here. 1) There was a serious race in switch_mm() wrt. lazy TLB switching to and from kernel threads. We could erroneously skip a tsb_context_switch() and thus use a stale TSB across a TSB grow event. There is a big comment now in that function describing exactly how it can happen. 2) All code paths that do something with the TSB need to be guarded with the mm->context.lock spinlock. This makes page table flushing paths properly synchronize with both TSB growing and TLB context changes. 3) TSB growing events are moved to the end of successful fault processing. Previously it was in update_mmu_cache() but that is deadlock prone. At the end of do_sparc64_fault() we hold no spinlocks that could deadlock the TSB grow sequence. We also have dropped the address space semaphore. While we're here, add prefetching to the copy_tsb() routine and put it in assembler into the tsb.S file. This piece of code is quite time critical. There are some small negative side effects to this code which can be improved upon. In particular we grab the mm->context.lock even for the tsb insert done by update_mmu_cache() now and that's a bit excessive. We can get rid of that locking, and the same lock taking in flush_tsb_user(), by disabling PSTATE_IE around the whole operation including the capturing of the tsb pointer and tsb_nentries value. That would work because anyone growing the TSB won't free up the old TSB until all cpus respond to the TSB change cross call. I'm not quite so confident in that optimization to put it in right now, but eventually we might be able to and the description is here for reference. This code seems very solid now. It passes several parallel GCC bootstrap builds, and our favorite "nut cruncher" stress test which is a full "make -j8192" build of a "make allmodconfig" kernel. That puts about 256 processes on each cpu's run queue, makes lots of process cpu migrations occur, causes lots of page table and TLB flushing activity, incurs many context version number changes, and it swaps the machine real far out to disk even though there is 16GB of ram on this test system. :-) Signed-off-by: David S. Miller --- arch/sparc64/mm/fault.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/sparc64/mm/fault.c') diff --git a/arch/sparc64/mm/fault.c b/arch/sparc64/mm/fault.c index b97bd054aad..63b6cc0cd5d 100644 --- a/arch/sparc64/mm/fault.c +++ b/arch/sparc64/mm/fault.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * To debug kernel to catch accesses to certain virtual/physical addresses. @@ -258,7 +259,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) struct vm_area_struct *vma; unsigned int insn = 0; int si_code, fault_code; - unsigned long address; + unsigned long address, mm_rss; fault_code = get_thread_fault_code(); @@ -407,6 +408,11 @@ good_area: } up_read(&mm->mmap_sem); + + mm_rss = get_mm_rss(mm); + if (unlikely(mm_rss >= mm->context.tsb_rss_limit)) + tsb_grow(mm, mm_rss); + return; /* -- cgit v1.2.3