60 files changed, 390 insertions, 1544 deletions
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index c7481d59b6d..6d5251254f6 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -371,6 +371,8 @@ show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_online_node(nid) {
+		unsigned long flags;
+		pgdat_resize_lock(NODE_DATA(nid), &flags);
 		i = node_spanned_pages(nid);
 		while (i-- > 0) {
 			struct page *page = nid_page_nr(nid, i);
@@ -384,6 +386,7 @@ show_mem(void)
 			else
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(NODE_DATA(nid), &flags);
 	}
 	printk("%ld pages of RAM\n",total);
 	printk("%ld free pages\n",free);
diff --git a/arch/alpha/mm/remap.c b/arch/alpha/mm/remap.c
index 19817ad3d89..a78356c3ead 100644
--- a/arch/alpha/mm/remap.c
+++ b/arch/alpha/mm/remap.c
@@ -2,7 +2,6 @@
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
 
-/* called with the page_table_lock held */
 static inline void 
 remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, 
 	       unsigned long phys_addr, unsigned long flags)
@@ -31,7 +30,6 @@ remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
 	} while (address && (address < end));
 }
 
-/* called with the page_table_lock held */
 static inline int 
 remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, 
 	       unsigned long phys_addr, unsigned long flags)
@@ -46,7 +44,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, 
@@ -70,7 +68,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -84,7 +81,6 @@ __alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	return error;
 }
 
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index a94d75fef59..a917e3dd366 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -139,93 +139,33 @@ struct iwmmxt_sigframe {
 	unsigned long	storage[0x98/4];
 };
 
-static int page_present(struct mm_struct *mm, void __user *uptr, int wr)
-{
-	unsigned long addr = (unsigned long)uptr;
-	pgd_t *pgd = pgd_offset(mm, addr);
-	if (pgd_present(*pgd)) {
-		pmd_t *pmd = pmd_offset(pgd, addr);
-		if (pmd_present(*pmd)) {
-			pte_t *pte = pte_offset_map(pmd, addr);
-			return (pte_present(*pte) && (!wr || pte_write(*pte)));
-		}
-	}
-	return 0;
-}
-
-static int copy_locked(void __user *uptr, void *kptr, size_t size, int write,
-		       void (*copyfn)(void *, void __user *))
-{
-	unsigned char v, __user *userptr = uptr;
-	int err = 0;
-
-	do {
-		struct mm_struct *mm;
-
-		if (write) {
-			__put_user_error(0, userptr, err);
-			__put_user_error(0, userptr + size - 1, err);
-		} else {
-			__get_user_error(v, userptr, err);
-			__get_user_error(v, userptr + size - 1, err);
-		}
-
-		if (err)
-			break;
-
-		mm = current->mm;
-		spin_lock(&mm->page_table_lock);
-		if (page_present(mm, userptr, write) &&
-		    page_present(mm, userptr + size - 1, write)) {
-		    	copyfn(kptr, uptr);
-		} else
-			err = 1;
-		spin_unlock(&mm->page_table_lock);
-	} while (err);
-
-	return err;
-}
-
 static int preserve_iwmmxt_context(struct iwmmxt_sigframe *frame)
 {
-	int err = 0;
+	char kbuf[sizeof(*frame) + 8];
+	struct iwmmxt_sigframe *kframe;
 
 	/* the iWMMXt context must be 64 bit aligned */
-	WARN_ON((unsigned long)frame & 7);
-
-	__put_user_error(IWMMXT_MAGIC0, &frame->magic0, err);
-	__put_user_error(IWMMXT_MAGIC1, &frame->magic1, err);
-
-	/*
-	 * iwmmxt_task_copy() doesn't check user permissions.
-	 * Let's do a dummy write on the upper boundary to ensure
-	 * access to user mem is OK all way up.
-	 */
-	err |= copy_locked(&frame->storage, current_thread_info(),
-			   sizeof(frame->storage), 1, iwmmxt_task_copy);
-	return err;
+	kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	kframe->magic0 = IWMMXT_MAGIC0;
+	kframe->magic1 = IWMMXT_MAGIC1;
+	iwmmxt_task_copy(current_thread_info(), &kframe->storage);
+	return __copy_to_user(frame, kframe, sizeof(*frame));
 }
 
 static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame)
 {
-	unsigned long magic0, magic1;
-	int err = 0;
+	char kbuf[sizeof(*frame) + 8];
+	struct iwmmxt_sigframe *kframe;
 
-	/* the iWMMXt context is 64 bit aligned */
-	WARN_ON((unsigned long)frame & 7);
-
-	/*
-	 * Validate iWMMXt context signature.
-	 * Also, iwmmxt_task_restore() doesn't check user permissions.
-	 * Let's do a dummy write on the upper boundary to ensure
-	 * access to user mem is OK all way up.
-	 */
-	__get_user_error(magic0, &frame->magic0, err);
-	__get_user_error(magic1, &frame->magic1, err);
-	if (!err && magic0 == IWMMXT_MAGIC0 && magic1 == IWMMXT_MAGIC1)
-		err = copy_locked(&frame->storage, current_thread_info(),
-				  sizeof(frame->storage), 0, iwmmxt_task_restore);
-	return err;
+	/* the iWMMXt context must be 64 bit aligned */
+	kframe = (struct iwmmxt_sigframe *)((unsigned long)(kbuf + 8) & ~7);
+	if (__copy_from_user(kframe, frame, sizeof(*frame)))
+		return -1;
+	if (kframe->magic0 != IWMMXT_MAGIC0 ||
+	    kframe->magic1 != IWMMXT_MAGIC1)
+		return -1;
+	iwmmxt_task_restore(current_thread_info(), &kframe->storage);
+	return 0;
 }
 
 #endif
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index baa09601a64..66e5a0516f2 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -483,29 +483,33 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
 		unsigned long addr = regs->ARM_r2;
 		struct mm_struct *mm = current->mm;
 		pgd_t *pgd; pmd_t *pmd; pte_t *pte;
+		spinlock_t *ptl;
 
 		regs->ARM_cpsr &= ~PSR_C_BIT;
-		spin_lock(&mm->page_table_lock);
+		down_read(&mm->mmap_sem);
 		pgd = pgd_offset(mm, addr);
 		if (!pgd_present(*pgd))
 			goto bad_access;
 		pmd = pmd_offset(pgd, addr);
 		if (!pmd_present(*pmd))
 			goto bad_access;
-		pte = pte_offset_map(pmd, addr);
-		if (!pte_present(*pte) || !pte_write(*pte))
+		pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+		if (!pte_present(*pte) || !pte_write(*pte)) {
+			pte_unmap_unlock(pte, ptl);
 			goto bad_access;
+		}
 		val = *(unsigned long *)addr;
 		val -= regs->ARM_r0;
 		if (val == 0) {
 			*(unsigned long *)addr = regs->ARM_r1;
 			regs->ARM_cpsr |= PSR_C_BIT;
 		}
-		spin_unlock(&mm->page_table_lock);
+		pte_unmap_unlock(pte, ptl);
+		up_read(&mm->mmap_sem);
 		return val;
 
 		bad_access:
-		spin_unlock(&mm->page_table_lock);
+		up_read(&mm->mmap_sem);
 		/* simulate a write access fault */
 		do_DataAbort(addr, 15 + (1 << 11), regs);
 		return -1;
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index 82f4d5e27c5..47b0b767f08 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -397,8 +397,6 @@ static int __init consistent_init(void)
 	pte_t *pte;
 	int ret = 0;
 
-	spin_lock(&init_mm.page_table_lock);
-
 	do {
 		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -409,7 +407,7 @@ static int __init consistent_init(void)
 		}
 		WARN_ON(!pmd_none(*pmd));
 
-		pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 		if (!pte) {
 			printk(KERN_ERR "%s: no pte tables\n", __func__);
 			ret = -ENOMEM;
@@ -419,8 +417,6 @@ static int __init consistent_init(void)
 		consistent_pte = pte;
 	} while (0);
 
-	spin_unlock(&init_mm.page_table_lock);
-
 	return ret;
 }
 
diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index be4ab3d73c9..7fc1b35a674 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -26,6 +26,11 @@ static unsigned long shared_pte_mask = L_PTE_CACHEABLE;
 /*
  * We take the easy way out of this problem - we make the
  * PTE uncacheable.  However, we leave the write buffer on.
+ *
+ * Note that the pte lock held when calling update_mmu_cache must also
+ * guard the pte (somewhere else in the same mm) that we modify here.
+ * Therefore those configurations which might call adjust_pte (those
+ * without CONFIG_CPU_CACHE_VIPT) cannot support split page_table_lock.
  */
 static int adjust_pte(struct vm_area_struct *vma, unsigned long address)
 {
@@ -127,7 +132,7 @@ void __flush_dcache_page(struct address_space *mapping, struct page *page);
  *  2. If we have multiple shared mappings of the same space in
  *     an object, we need to deal with the cache aliasing issues.
  *
- * Note that the page_table_lock will be held.
+ * Note that the pte lock will be held.
  */
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 {
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 6fb1258df1b..0f128c28fee 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -75,7 +75,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
 
 	pgprot = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_WRITE | flags);
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, pgprot);
@@ -97,7 +97,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr,
 	phys_addr -= address;
 	dir = pgd_offset(&init_mm, address);
 	BUG_ON(address >= end);
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 		if (!pmd) {
@@ -114,7 +113,6 @@ remap_area_pages(unsigned long start, unsigned long phys_addr,
 		dir++;
 	} while (address && (address < end));
 
-	spin_unlock(&init_mm.page_table_lock);
 	flush_cache_vmap(start, end);
 	return err;
 }
diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c
index 61bc2fa0511..1221fdde176 100644
--- a/arch/arm/mm/mm-armv.c
+++ b/arch/arm/mm/mm-armv.c
@@ -180,11 +180,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 
 	if (!vectors_high()) {
 		/*
-		 * This lock is here just to satisfy pmd_alloc and pte_lock
-		 */
-		spin_lock(&mm->page_table_lock);
-
-		/*
 		 * On ARM, first page must always be allocated since it
 		 * contains the machine vectors.
 		 */
@@ -201,23 +196,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		set_pte(new_pte, *init_pte);
 		pte_unmap_nested(init_pte);
 		pte_unmap(new_pte);
-
-		spin_unlock(&mm->page_table_lock);
 	}
 
 	return new_pgd;
 
 no_pte:
-	spin_unlock(&mm->page_table_lock);
 	pmd_free(new_pmd);
-	free_pages((unsigned long)new_pgd, 2);
-	return NULL;
-
 no_pmd:
-	spin_unlock(&mm->page_table_lock);
 	free_pages((unsigned long)new_pgd, 2);
-	return NULL;
-
 no_pgd:
 	return NULL;
 }
@@ -243,6 +229,7 @@ void free_pgd_slow(pgd_t *pgd)
 	pte = pmd_page(*pmd);
 	pmd_clear(pmd);
 	dec_page_state(nr_page_table_pages);
+	pte_lock_deinit(pte);
 	pte_free(pte);
 	pmd_free(pmd);
 free:
diff --git a/arch/arm/oprofile/backtrace.c b/arch/arm/oprofile/backtrace.c
index df35c452a8b..7c22c12618c 100644
--- a/arch/arm/oprofile/backtrace.c
+++ b/arch/arm/oprofile/backtrace.c
@@ -49,42 +49,22 @@ static struct frame_tail* kernel_backtrace(struct frame_tail *tail)
 
 static struct frame_tail* user_backtrace(struct frame_tail *tail)
 {
-	struct frame_tail buftail;
+	struct frame_tail buftail[2];
 
-	/* hardware pte might not be valid due to dirty/accessed bit emulation
-	 * so we use copy_from_user and benefit from exception fixups */
-	if (copy_from_user(&buftail, tail, sizeof(struct frame_tail)))
+	/* Also check accessibility of one struct frame_tail beyond */
+	if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
+		return NULL;
+	if (__copy_from_user_inatomic(buftail, tail, sizeof(buftail)))
 		return NULL;
 
-	oprofile_add_trace(buftail.lr);
+	oprofile_add_trace(buftail[0].lr);
 
 	/* frame pointers should strictly progress back up the stack
 	 * (towards higher addresses) */
-	if (tail >= buftail.fp)
+	if (tail >= buftail[0].fp)
 		return NULL;
 
-	return buftail.fp-1;
-}
-
-/* Compare two addresses and see if they're on the same page */
-#define CMP_ADDR_EQUAL(x,y,offset) ((((unsigned long) x) >> PAGE_SHIFT) \
-	== ((((unsigned long) y) + offset) >> PAGE_SHIFT))
-
-/* check that the page(s) containing the frame tail are present */
-static int pages_present(struct frame_tail *tail)
-{
-	struct mm_struct * mm = current->mm;
-
-	if (!check_user_page_readable(mm, (unsigned long)tail))
-		return 0;
-
-	if (CMP_ADDR_EQUAL(tail, tail, 8))
-		return 1;
-
-	if (!check_user_page_readable(mm, ((unsigned long)tail) + 8))
-		return 0;
-
-	return 1;
+	return buftail[0].fp-1;
 }
 
 /*
@@ -118,7 +98,6 @@ static int valid_kernel_stack(struct frame_tail *tail, struct pt_regs *regs)
 void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
 	struct frame_tail *tail;
-	unsigned long last_address = 0;
 
 	tail = ((struct frame_tail *) regs->ARM_fp) - 1;
 
@@ -132,13 +111,6 @@ void arm_backtrace(struct pt_regs * const regs, unsigned int depth)
 		return;
 	}
 
-	while (depth-- && tail && !((unsigned long) tail & 3)) {
-		if ((!CMP_ADDR_EQUAL(last_address, tail, 0)
-			|| !CMP_ADDR_EQUAL(last_address, tail, 8))
-				&& !pages_present(tail))
-			return;
-		last_address = (unsigned long) tail;
+	while (depth-- && tail && !((unsigned long) tail & 3))
 		tail = user_backtrace(tail);
-	}
 }
-
diff --git a/arch/arm26/mm/memc.c b/arch/arm26/mm/memc.c
index 8e8a2bb2487..34def6397c3 100644
--- a/arch/arm26/mm/memc.c
+++ b/arch/arm26/mm/memc.c
@@ -79,12 +79,6 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 		goto no_pgd;
 
 	/*
-	 * This lock is here just to satisfy pmd_alloc and pte_lock
-         * FIXME: I bet we could avoid taking it pretty much altogether
-	 */
-	spin_lock(&mm->page_table_lock);
-
-	/*
 	 * On ARM, first page must always be allocated since it contains
 	 * the machine vectors.
 	 */
@@ -92,7 +86,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	if (!new_pmd)
 		goto no_pmd;
 
-	new_pte = pte_alloc_kernel(mm, new_pmd, 0);
+	new_pte = pte_alloc_map(mm, new_pmd, 0);
 	if (!new_pte)
 		goto no_pte;
 
@@ -101,6 +95,7 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	init_pte = pte_offset(init_pmd, 0);
 
 	set_pte(new_pte, *init_pte);
+	pte_unmap(new_pte);
 
 	/*
 	 * the page table entries are zeroed
@@ -112,23 +107,14 @@ pgd_t *get_pgd_slow(struct mm_struct *mm)
 	memcpy(new_pgd + FIRST_KERNEL_PGD_NR, init_pgd + FIRST_KERNEL_PGD_NR,
 		(PTRS_PER_PGD - FIRST_KERNEL_PGD_NR) * sizeof(pgd_t));
 
-	spin_unlock(&mm->page_table_lock);
-
 	/* update MEMC tables */
 	cpu_memc_update_all(new_pgd);
 	return new_pgd;
 
 no_pte:
-	spin_unlock(&mm->page_table_lock);
 	pmd_free(new_pmd);
-	free_pgd_slow(new_pgd);
-	return NULL;
-
 no_pmd:
-	spin_unlock(&mm->page_table_lock);
 	free_pgd_slow(new_pgd);
-	return NULL;
-
 no_pgd:
 	return NULL;
 }
diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c
index 8233406798d..b08a28bb58a 100644
--- a/arch/cris/arch-v32/mm/tlb.c
+++ b/arch/cris/arch-v32/mm/tlb.c
@@ -175,6 +175,8 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 	return 0;
 }
 
+static DEFINE_SPINLOCK(mmu_context_lock);
+
 /* Called in schedule() just before actually doing the switch_to. */
 void
 switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -183,10 +185,10 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	int cpu = smp_processor_id();
 
 	/* Make sure there is a MMU context. */
-	spin_lock(&next->page_table_lock);
+	spin_lock(&mmu_context_lock);
 	get_mmu_context(next);
 	cpu_set(cpu, next->cpu_vm_mask);
-	spin_unlock(&next->page_table_lock);
+	spin_unlock(&mmu_context_lock);
 
 	/*
 	 * Remember the pgd for the fault handlers. Keep a seperate copy of it
diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index ebba11e270f..a92ac987758 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c
@@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, prot);
@@ -74,7 +74,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pud_t *pud;
 		pmd_t *pmd;
@@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return error;
 }
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index cfc4f97490c..342823aad75 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -55,21 +55,18 @@ static int map_page(unsigned long va, unsigned long pa, pgprot_t prot)
 	pte_t *pte;
 	int err = -ENOMEM;
 
-	spin_lock(&init_mm.page_table_lock);
-
 	/* Use upper 10 bits of VA to index the first level map */
 	pge = pgd_offset_k(va);
 	pue = pud_offset(pge, va);
 	pme = pmd_offset(pue, va);
 
 	/* Use middle 10 bits of VA to index the second-level map */
-	pte = pte_alloc_kernel(&init_mm, pme, va);
+	pte = pte_alloc_kernel(pme, va);
 	if (pte != 0) {
 		err = 0;
 		set_pte(pte, mk_pte_phys(pa & PAGE_MASK, prot));
 	}
 
-	spin_unlock(&init_mm.page_table_lock);
 	return err;
 }
 
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 4eaec0f3525..2c67dfe5a6b 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -87,14 +87,14 @@ static inline void pgd_list_add(pgd_t *pgd)
 	if (pgd_list)
 		pgd_list->private = (unsigned long) &page->index;
 	pgd_list = page;
-	page->private = (unsigned long) &pgd_list;
+	set_page_private(page, (unsigned long)&pgd_list);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
 	struct page *next, **pprev, *page = virt_to_page(pgd);
 	next = (struct page *) page->index;
-	pprev = (struct page **) page->private;
+	pprev = (struct page **)page_private(page);
 	*pprev = next;
 	if (next)
 		next->private = (unsigned long) pprev;
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c
index 16b48500962..fc1993564f9 100644
--- a/arch/i386/kernel/vm86.c
+++ b/arch/i386/kernel/vm86.c
@@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 	return ret;
 }
 
-static void mark_screen_rdonly(struct task_struct * tsk)
+static void mark_screen_rdonly(struct mm_struct *mm)
 {
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
-	pte_t *pte, *mapped;
+	pte_t *pte;
+	spinlock_t *ptl;
 	int i;
 
-	preempt_disable();
-	spin_lock(&tsk->mm->page_table_lock);
-	pgd = pgd_offset(tsk->mm, 0xA0000);
+	pgd = pgd_offset(mm, 0xA0000);
 	if (pgd_none_or_clear_bad(pgd))
 		goto out;
 	pud = pud_offset(pgd, 0xA0000);
@@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk)
 	pmd = pmd_offset(pud, 0xA0000);
 	if (pmd_none_or_clear_bad(pmd))
 		goto out;
-	pte = mapped = pte_offset_map(pmd, 0xA0000);
+	pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
 	for (i = 0; i < 32; i++) {
 		if (pte_present(*pte))
 			set_pte(pte, pte_wrprotect(*pte));
 		pte++;
 	}
-	pte_unmap(mapped);
+	pte_unmap_unlock(pte, ptl);
 out:
-	spin_unlock(&tsk->mm->page_table_lock);
-	preempt_enable();
 	flush_tlb();
 }
 
@@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
 
 	tsk->thread.screen_bitmap = info->screen_bitmap;
 	if (info->flags & VM86_SCREEN_BITMAP)
-		mark_screen_rdonly(tsk);
+		mark_screen_rdonly(tsk->mm);
 	__asm__ __volatile__(
 		"xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t"
 		"movl %0,%%esp\n\t"
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index 244d8ec66be..c4af9638dbf 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
 
 extern unsigned long find_max_low_pfn(void);
 extern void find_max_pfn(void);
-extern void one_highpage_init(struct page *, int, int);
+extern void add_one_highpage_init(struct page *, int, int);
 
 extern struct e820map e820;
 extern unsigned long init_pg_tables_end;
@@ -427,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro)
 			if (!pfn_valid(node_pfn))
 				continue;
 			page = pfn_to_page(node_pfn);
-			one_highpage_init(page, node_pfn, bad_ppro);
+			add_one_highpage_init(page, node_pfn, bad_ppro);
 		}
 	}
 	totalram_pages += totalhigh_pages;
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 2ebaf75f732..542d9298da5 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -27,6 +27,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/efi.h>
+#include <linux/memory_hotplug.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
@@ -266,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 	pkmap_page_table = pte;	
 }
 
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+void __devinit free_new_highpage(struct page *page)
+{
+	set_page_count(page, 1);
+	__free_page(page);
+	totalhigh_pages++;
+}
+
+void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
 	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
 		ClearPageReserved(page);
-		set_page_count(page, 1);
-		__free_page(page);
-		totalhigh_pages++;
+		free_new_highpage(page);
 	} else
 		SetPageReserved(page);
 }
 
+static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+{
+	free_new_highpage(page);
+	totalram_pages++;
+#ifdef CONFIG_FLATMEM
+	max_mapnr = max(pfn, max_mapnr);
+#endif
+	num_physpages++;
+	return 0;
+}
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+void online_page(struct page *page)
+{
+	ClearPageReserved(page);
+	add_one_highpage_hotplug(page, page_to_pfn(page));
+}
+
+
 #ifdef CONFIG_NUMA
 extern void set_highmem_pages_init(int);
 #else
@@ -284,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro)
 {
 	int pfn;
 	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
-		one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+		add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
 	totalram_pages += totalhigh_pages;
 }
 #endif /* CONFIG_FLATMEM */
@@ -615,6 +645,28 @@ void __init mem_init(void)
 #endif
 }
 
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int add_memory(u64 start, u64 size)
+{
+	struct pglist_data *pgdata = &contig_page_data;
+	struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+	return -EINVAL;
+}
+#endif
+
 kmem_cache_t *pgd_cache;
 kmem_cache_t *pmd_cache;
 
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index f379b8d6755..5d09de8d1c6 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
 	unsigned long pfn;
 
 	pfn = phys_addr >> PAGE_SHIFT;
-	pte = pte_alloc_kernel(&init_mm, pmd, addr);
+	pte = pte_alloc_kernel(pmd, addr);
 	if (!pte)
 		return -ENOMEM;
 	do {
@@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr,
 	flush_cache_all();
 	phys_addr -= addr;
 	pgd = pgd_offset_k(addr);
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		next = pgd_addr_end(addr, end);
 		err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags);
 		if (err)
 			break;
 	} while (pgd++, addr = next, addr != end);
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return err;
 }
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index dcdce2c6c53..9db3242103b 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -31,11 +31,13 @@ void show_mem(void)
 	pg_data_t *pgdat;
 	unsigned long i;
 	struct page_state ps;
+	unsigned long flags;
 
 	printk(KERN_INFO "Mem-info:\n");
 	show_free_areas();
 	printk(KERN_INFO "Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
 			total++;
@@ -48,6 +50,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	printk(KERN_INFO "%d pages of RAM\n", total);
 	printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
@@ -188,19 +191,19 @@ static inline void pgd_list_add(pgd_t *pgd)
 	struct page *page = virt_to_page(pgd);
 	page->index = (unsigned long)pgd_list;
 	if (pgd_list)
-		pgd_list->private = (unsigned long)&page->index;
+		set_page_private(pgd_list, (unsigned long)&page->index);
 	pgd_list = page;
-	page->private = (unsigned long)&pgd_list;
+	set_page_private(page, (unsigned long)&pgd_list);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
 	struct page *next, **pprev, *page = virt_to_page(pgd);
 	next = (struct page *)page->index;
-	pprev = (struct page **)page->private;
+	pprev = (struct page **)page_private(page);
 	*pprev = next;
 	if (next)
-		next->private = (unsigned long)pprev;
+		set_page_private(next, (unsigned long)pprev);
 }
 
 void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c
index 65dfd2edb67..21654be3f73 100644
--- a/arch/i386/oprofile/backtrace.c
+++ b/arch/i386/oprofile/backtrace.c
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <asm/ptrace.h>
+#include <asm/uaccess.h>
 
 struct frame_head {
 	struct frame_head * ebp;
@@ -21,26 +22,22 @@ struct frame_head {
 static struct frame_head *
 dump_backtrace(struct frame_head * head)
 {
-	oprofile_add_trace(head->ret);
+	struct frame_head bufhead[2];
 
-	/* frame pointers should strictly progress back up the stack
-	 * (towards higher addresses) */
-	if (head >= head->ebp)
+	/* Also check accessibility of one struct frame_head beyond */
+	if (!access_ok(VERIFY_READ, head, sizeof(bufhead)))
+		return NULL;
+	if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead)))
 		return NULL;
 
-	return head->ebp;
-}
-
-/* check that the page(s) containing the frame head are present */
-static int pages_present(struct frame_head * head)
-{
-	struct mm_struct * mm = current->mm;
+	oprofile_add_trace(bufhead[0].ret);
 
-	/* FIXME: only necessary once per page */
-	if (!check_user_page_readable(mm, (unsigned long)head))
-		return 0;
+	/* frame pointers should strictly progress back up the stack
+	 * (towards higher addresses) */
+	if (head >= bufhead[0].ebp)
+		return NULL;
 
-	return check_user_page_readable(mm, (unsigned long)(head + 1));
+	return bufhead[0].ebp;
 }
 
 /*
@@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 		return;
 	}
 
-#ifdef CONFIG_SMP
-	if (!spin_trylock(&current->mm->page_table_lock))
-		return;
-#endif
-
-	while (depth-- && head && pages_present(head))
+	while (depth-- && head)
 		head = dump_backtrace(head);
-
-#ifdef CONFIG_SMP
-	spin_unlock(&current->mm->page_table_lock);
-#endif
 }
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index d71731ee5b6..f7dfc107cb7 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2352,7 +2352,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
 	insert_vm_struct(mm, vma);
 
 	mm->total_vm  += size >> PAGE_SHIFT;
-	vm_stat_account(vma);
+	vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
+							vma_pages(vma));
 	up_write(&task->mm->mmap_sem);
 
 	/*
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index a3788fb8480..a88cdb7232f 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -555,9 +555,13 @@ void show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
-		unsigned long present = pgdat->node_present_pages;
+		unsigned long present;
+		unsigned long flags;
 		int shared = 0, cached = 0, reserved = 0;
+
 		printk("Node ID: %d\n", pgdat->node_id);
+		pgdat_resize_lock(pgdat, &flags);
+		present = pgdat->node_present_pages;
 		for(i = 0; i < pgdat->node_spanned_pages; i++) {
 			struct page *page;
 			if (pfn_valid(pgdat->node_start_pfn + i))
@@ -571,6 +575,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page)-1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 		total_present += present;
 		total_reserved += reserved;
 		total_cached += cached;
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 3c32af910d6..af7eb087dca 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -20,32 +20,6 @@
 extern void die (char *, struct pt_regs *, long);
 
 /*
- * This routine is analogous to expand_stack() but instead grows the
- * register backing store (which grows towards higher addresses).
- * Since the register backing store is access sequentially, we
- * disallow growing the RBS by more than a page at a time.  Note that
- * the VM_GROWSUP flag can be set on any VM area but that's fine
- * because the total process size is still limited by RLIMIT_STACK and
- * RLIMIT_AS.
- */
-static inline long
-expand_backing_store (struct vm_area_struct *vma, unsigned long address)
-{
-	unsigned long grow;
-
-	grow = PAGE_SIZE >> PAGE_SHIFT;
-	if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
-	    || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
-		return -ENOMEM;
-	vma->vm_end += PAGE_SIZE;
-	vma->vm_mm->total_vm += grow;
-	if (vma->vm_flags & VM_LOCKED)
-		vma->vm_mm->locked_vm += grow;
-	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
-	return 0;
-}
-
-/*
  * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
  * (inside region 5, on ia64) and that page is present.
  */
@@ -185,7 +159,13 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
 		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
 		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
 			goto bad_area;
-		if (expand_backing_store(vma, address))
+		/*
+		 * Since the register backing store is accessed sequentially,
+		 * we disallow growing it by more than a page at a time.
+		 */
+		if (address > vma->vm_end + PAGE_SIZE - sizeof(long))
+			goto bad_area;
+		if (expand_upwards(vma, address))
 			goto bad_area;
 	}
 	goto good_area;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 98246acd499..e3215ba64ff 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -158,7 +158,7 @@ ia64_init_addr_space (void)
 		vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
 		vma->vm_end = vma->vm_start + PAGE_SIZE;
 		vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
-		vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
+		vma->vm_flags = VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT;
 		down_write(&current->mm->mmap_sem);
 		if (insert_vm_struct(current->mm, vma)) {
 			up_write(&current->mm->mmap_sem);
@@ -275,26 +275,21 @@ put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
 
 	pgd = pgd_offset_k(address);		/* note: this is NOT pgd_offset()! */
 
-	spin_lock(&init_mm.page_table_lock);
 	{
 		pud = pud_alloc(&init_mm, pgd, address);
 		if (!pud)
 			goto out;
-
 		pmd = pmd_alloc(&init_mm, pud, address);
 		if (!pmd)
 			goto out;
-		pte = pte_alloc_map(&init_mm, pmd, address);
+		pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			goto out;
-		if (!pte_none(*pte)) {
-			pte_unmap(pte);
+		if (!pte_none(*pte))
 			goto out;
-		}
 		set_pte(pte, mk_pte(page, pgprot));
-		pte_unmap(pte);
 	}
-  out:	spin_unlock(&init_mm.page_table_lock);
+  out:
 	/* no need for flush_tlb */
 	return page;
 }
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c93e0f2b5fe..c79a9b96d02 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -158,10 +158,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
 # ifdef CONFIG_SMP
 	platform_global_tlb_purge(mm, start, end, nbits);
 # else
+	preempt_disable();
 	do {
 		ia64_ptcl(start, (nbits<<2));
 		start += (1UL << nbits);
 	} while (start < end);
+	preempt_enable();
 # endif
 
 	ia64_srlz_i();			/* srlz.i implies srlz.d */
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index d9a40b1fe8b..6facf15b04f 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -48,6 +48,8 @@ void show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; ++i) {
 			page = pgdat_page_nr(pgdat, i);
 			total++;
@@ -60,6 +62,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	printk("%d pages of RAM\n", total);
 	printk("%d pages of HIGHMEM\n",highmem);
@@ -150,10 +153,14 @@ int __init reservedpages_count(void)
 	int reservedpages, nid, i;
 
 	reservedpages = 0;
-	for_each_online_node(nid)
+	for_each_online_node(nid) {
+		unsigned long flags;
+		pgdat_resize_lock(NODE_DATA(nid), &flags);
 		for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++)
 			if (PageReserved(nid_page_nr(nid, i)))
 				reservedpages++;
+		pgdat_resize_unlock(NODE_DATA(nid), &flags);
+	}
 
 	return reservedpages;
 }
diff --git a/arch/m32r/mm/ioremap.c b/arch/m32r/mm/ioremap.c
index 70c59055c19..a151849a605 100644
--- a/arch/m32r/mm/ioremap.c
+++ b/arch/m32r/mm/ioremap.c
@@ -67,7 +67,7 @@ remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -90,7 +90,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -104,7 +103,6 @@ remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return error;
 }
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index ba960bbc8e6..1dd5d18b220 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -388,33 +388,11 @@ config AMIGA_PCMCIA
 	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
 	  600. If you intend to use pcmcia cards say Y; otherwise say N.
 
-config STRAM_SWAP
-	bool "Support for ST-RAM as swap space"
-	depends on ATARI && BROKEN
-	---help---
-	  Some Atari 68k machines (including the 520STF and 1020STE) divide
-	  their addressable memory into ST and TT sections.  The TT section
-	  (up to 512MB) is the main memory; the ST section (up to 4MB) is
-	  accessible to the built-in graphics board, runs slower, and is
-	  present mainly for backward compatibility with older machines.
-
-	  This enables support for using (parts of) ST-RAM as swap space,
-	  instead of as normal system memory. This can first enhance system
-	  performance if you have lots of alternate RAM (compared to the size
-	  of ST-RAM), because executable code always will reside in faster
-	  memory. ST-RAM will remain as ultra-fast swap space. On the other
-	  hand, it allows much improved dynamic allocations of ST-RAM buffers
-	  for device driver modules (e.g. floppy, ACSI, SLM printer, DMA
-	  sound). The probability that such allocations at module load time
-	  fail is drastically reduced.
-
 config STRAM_PROC
 	bool "ST-RAM statistics in /proc"
 	depends on ATARI
 	help
-	  Say Y here to report ST-RAM usage statistics in /proc/stram.  See
-	  the help for CONFIG_STRAM_SWAP for discussion of ST-RAM and its
-	  uses.
+	  Say Y here to report ST-RAM usage statistics in /proc/stram.
 
 config HEARTBEAT
 	bool "Use power LED as a heartbeat" if AMIGA || APOLLO || ATARI || MAC ||Q40
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index 5a3c106b40c..22e0481a5f7 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -15,11 +15,9 @@
 #include <linux/kdev_t.h>
 #include <linux/major.h>
 #include <linux/init.h>
-#include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
-#include <linux/shm.h>
 #include <linux/bootmem.h>
 #include <linux/mount.h>
 #include <linux/blkdev.h>
@@ -33,8 +31,6 @@
 #include <asm/io.h>
 #include <asm/semaphore.h>
 
-#include <linux/swapops.h>
-
 #undef DEBUG
 
 #ifdef DEBUG
@@ -49,8 +45,7 @@
 #include <linux/proc_fs.h>
 #endif
 
-/* Pre-swapping comments:
- *
+/*
  * ++roman:
  *
  * New version of ST-Ram buffer allocation. Instead of using the
@@ -75,76 +70,6 @@
  *
  */
 
-/*
- * New Nov 1997: Use ST-RAM as swap space!
- *
- * In the past, there were often problems with modules that require ST-RAM
- * buffers. Such drivers have to use __get_dma_pages(), which unfortunately
- * often isn't very successful in allocating more than 1 page :-( [1] The net
- * result was that most of the time you couldn't insmod such modules (ataflop,
- * ACSI, SCSI on Falcon, Atari internal framebuffer, not to speak of acsi_slm,
- * which needs a 1 MB buffer... :-).
- *
- * To overcome this limitation, ST-RAM can now be turned into a very
- * high-speed swap space. If a request for an ST-RAM buffer comes, the kernel
- * now tries to unswap some pages on that swap device to make some free (and
- * contiguous) space. This works much better in comparison to
- * __get_dma_pages(), since used swap pages can be selectively freed by either
- * moving them to somewhere else in swap space, or by reading them back into
- * system memory. Ok, there operation of unswapping isn't really cheap (for
- * each page, one has to go through the page tables of all processes), but it
- * doesn't happen that often (only when allocation ST-RAM, i.e. when loading a
- * module that needs ST-RAM). But it at least makes it possible to load such
- * modules!
- *
- * It could also be that overall system performance increases a bit due to
- * ST-RAM swapping, since slow ST-RAM isn't used anymore for holding data or
- * executing code in. It's then just a (very fast, compared to disk) back
- * storage for not-so-often needed data. (But this effect must be compared
- * with the loss of total memory...) Don't know if the effect is already
- * visible on a TT, where the speed difference between ST- and TT-RAM isn't
- * that dramatic, but it should on machines where TT-RAM is really much faster
- * (e.g. Afterburner).
- *
- *   [1]: __get_free_pages() does a fine job if you only want one page, but if
- * you want more (contiguous) pages, it can give you such a block only if
- * there's already a free one. The algorithm can't try to free buffers or swap
- * out something in order to make more free space, since all that page-freeing
- * mechanisms work "target-less", i.e. they just free something, but not in a
- * specific place. I.e., __get_free_pages() can't do anything to free
- * *adjacent* pages :-( This situation becomes even worse for DMA memory,
- * since the freeing algorithms are also blind to DMA capability of pages.
- */
-
-/* 1998-10-20: ++andreas
-   unswap_by_move disabled because it does not handle swapped shm pages.
-*/
-
-/* 2000-05-01: ++andreas
-   Integrated with bootmem.  Remove all traces of unswap_by_move.
-*/
-
-#ifdef CONFIG_STRAM_SWAP
-#define ALIGN_IF_SWAP(x)	PAGE_ALIGN(x)
-#else
-#define ALIGN_IF_SWAP(x)	(x)
-#endif
-
-/* get index of swap page at address 'addr' */
-#define SWAP_NR(addr)		(((addr) - swap_start) >> PAGE_SHIFT)
-
-/* get address of swap page #'nr' */
-#define SWAP_ADDR(nr)		(swap_start + ((nr) << PAGE_SHIFT))
-
-/* get number of pages for 'n' bytes (already page-aligned) */
-#define N_PAGES(n)			((n) >> PAGE_SHIFT)
-
-/* The following two numbers define the maximum fraction of ST-RAM in total
- * memory, below that the kernel would automatically use ST-RAM as swap
- * space. This decision can be overridden with stram_swap= */
-#define MAX_STRAM_FRACTION_NOM		1
-#define MAX_STRAM_FRACTION_DENOM	3
-
 /* Start and end (virtual) of ST-RAM */
 static void *stram_start, *stram_end;
 
@@ -164,10 +89,9 @@ typedef struct stram_block {
 } BLOCK;
 
 /* values for flags field */
-#define BLOCK_FREE		0x01	/* free structure in the BLOCKs pool */
+#define BLOCK_FREE	0x01	/* free structure in the BLOCKs pool */
 #define BLOCK_KMALLOCED	0x02	/* structure allocated by kmalloc() */
-#define BLOCK_GFP		0x08	/* block allocated with __get_dma_pages() */
-#define BLOCK_INSWAP	0x10	/* block allocated in swap space */
+#define BLOCK_GFP	0x08	/* block allocated with __get_dma_pages() */
 
 /* list of allocated blocks */
 static BLOCK *alloc_list;
@@ -179,60 +103,8 @@ static BLOCK *alloc_list;
 #define N_STATIC_BLOCKS	20
 static BLOCK static_blocks[N_STATIC_BLOCKS];
 
-#ifdef CONFIG_STRAM_SWAP
-/* max. number of bytes to use for swapping
- *  0 = no ST-RAM swapping
- * -1 = do swapping (to whole ST-RAM) if it's less than MAX_STRAM_FRACTION of
- *      total memory
- */
-static int max_swap_size = -1;
-
-/* start and end of swapping area */
-static void *swap_start, *swap_end;
-
-/* The ST-RAM's swap info structure */
-static struct swap_info_struct *stram_swap_info;
-
-/* The ST-RAM's swap type */
-static int stram_swap_type;
-
-/* Semaphore for get_stram_region.  */
-static DECLARE_MUTEX(stram_swap_sem);
-
-/* major and minor device number of the ST-RAM device; for the major, we use
- * the same as Amiga z2ram, which is really similar and impossible on Atari,
- * and for the minor a relatively odd number to avoid the user creating and
- * using that device. */
-#define	STRAM_MAJOR		Z2RAM_MAJOR
-#define	STRAM_MINOR		13
-
-/* Some impossible pointer value */
-#define MAGIC_FILE_P	(struct file *)0xffffdead
-
-#ifdef DO_PROC
-static unsigned stat_swap_read;
-static unsigned stat_swap_write;
-static unsigned stat_swap_force;
-#endif /* DO_PROC */
-
-#endif /* CONFIG_STRAM_SWAP */
-
 /***************************** Prototypes *****************************/
 
-#ifdef CONFIG_STRAM_SWAP
-static int swap_init(void *start_mem, void *swap_data);
-static void *get_stram_region( unsigned long n_pages );
-static void free_stram_region( unsigned long offset, unsigned long n_pages
-			       );
-static int in_some_region(void *addr);
-static unsigned long find_free_region( unsigned long n_pages, unsigned long
-				       *total_free, unsigned long
-				       *region_free );
-static void do_stram_request(request_queue_t *);
-static int stram_open( struct inode *inode, struct file *filp );
-static int stram_release( struct inode *inode, struct file *filp );
-static void reserve_region(void *start, void *end);
-#endif
 static BLOCK *add_region( void *addr, unsigned long size );
 static BLOCK *find_region( void *addr );
 static int remove_region( BLOCK *block );
@@ -279,84 +151,11 @@ void __init atari_stram_init(void)
  */
 void __init atari_stram_reserve_pages(void *start_mem)
 {
-#ifdef CONFIG_STRAM_SWAP
-	/* if max_swap_size is negative (i.e. no stram_swap= option given),
-	 * determine at run time whether to use ST-RAM swapping */
-	if (max_swap_size < 0)
-		/* Use swapping if ST-RAM doesn't make up more than MAX_STRAM_FRACTION
-		 * of total memory. In that case, the max. size is set to 16 MB,
-		 * because ST-RAM can never be bigger than that.
-		 * Also, never use swapping on a Hades, there's no separate ST-RAM in
-		 * that machine. */
-		max_swap_size =
-			(!MACH_IS_HADES &&
-			 (N_PAGES(stram_end-stram_start)*MAX_STRAM_FRACTION_DENOM <=
-			  ((unsigned long)high_memory>>PAGE_SHIFT)*MAX_STRAM_FRACTION_NOM)) ? 16*1024*1024 : 0;
-	DPRINTK( "atari_stram_reserve_pages: max_swap_size = %d\n", max_swap_size );
-#endif
-
 	/* always reserve first page of ST-RAM, the first 2 kB are
 	 * supervisor-only! */
 	if (!kernel_in_stram)
 		reserve_bootmem (0, PAGE_SIZE);
 
-#ifdef CONFIG_STRAM_SWAP
-	{
-		void *swap_data;
-
-		start_mem = (void *) PAGE_ALIGN ((unsigned long) start_mem);
-		/* determine first page to use as swap: if the kernel is
-		   in TT-RAM, this is the first page of (usable) ST-RAM;
-		   otherwise just use the end of kernel data (= start_mem) */
-		swap_start = !kernel_in_stram ? stram_start + PAGE_SIZE : start_mem;
-		/* decrement by one page, rest of kernel assumes that first swap page
-		 * is always reserved and maybe doesn't handle swp_entry == 0
-		 * correctly */
-		swap_start -= PAGE_SIZE;
-		swap_end = stram_end;
-		if (swap_end-swap_start > max_swap_size)
-			swap_end =  swap_start + max_swap_size;
-		DPRINTK( "atari_stram_reserve_pages: swapping enabled; "
-				 "swap=%p-%p\n", swap_start, swap_end);
-
-		/* reserve some amount of memory for maintainance of
-		 * swapping itself: one page for each 2048 (PAGE_SIZE/2)
-		 * swap pages. (2 bytes for each page) */
-		swap_data = start_mem;
-		start_mem += ((SWAP_NR(swap_end) + PAGE_SIZE/2 - 1)
-			      >> (PAGE_SHIFT-1)) << PAGE_SHIFT;
-		/* correct swap_start if necessary */
-		if (swap_start + PAGE_SIZE == swap_data)
-			swap_start = start_mem - PAGE_SIZE;
-
-		if (!swap_init( start_mem, swap_data )) {
-			printk( KERN_ERR "ST-RAM swap space initialization failed\n" );
-			max_swap_size = 0;
-			return;
-		}
-		/* reserve region for swapping meta-data */
-		reserve_region(swap_data, start_mem);
-		/* reserve swapping area itself */
-		reserve_region(swap_start + PAGE_SIZE, swap_end);
-
-		/*
-		 * If the whole ST-RAM is used for swapping, there are no allocatable
-		 * dma pages left. But unfortunately, some shared parts of the kernel
-		 * (particularly the SCSI mid-level) call __get_dma_pages()
-		 * unconditionally :-( These calls then fail, and scsi.c even doesn't
-		 * check for NULL return values and just crashes. The quick fix for
-		 * this (instead of doing much clean up work in the SCSI code) is to
-		 * pretend all pages are DMA-able by setting mach_max_dma_address to
-		 * ULONG_MAX. This doesn't change any functionality so far, since
-		 * get_dma_pages() shouldn't be used on Atari anyway anymore (better
-		 * use atari_stram_alloc()), and the Atari SCSI drivers don't need DMA
-		 * memory. But unfortunately there's now no kind of warning (even not
-		 * a NULL return value) if you use get_dma_pages() nevertheless :-(
-		 * You just will get non-DMA-able memory...
-		 */
-		mach_max_dma_address = 0xffffffff;
-	}
-#endif
 }
 
 void atari_stram_mem_init_hook (void)
@@ -367,7 +166,6 @@ void atari_stram_mem_init_hook (void)
 
 /*
  * This is main public interface: somehow allocate a ST-RAM block
- * There are three strategies:
  *
  *  - If we're before mem_init(), we have to make a static allocation. The
  *    region is taken in the kernel data area (if the kernel is in ST-RAM) or
@@ -375,14 +173,9 @@ void atari_stram_mem_init_hook (void)
  *    rsvd_stram_* region. The ST-RAM is somewhere in the middle of kernel
  *    address space in the latter case.
  *
- *  - If mem_init() already has been called and ST-RAM swapping is enabled,
- *    try to get the memory from the (pseudo) swap-space, either free already
- *    or by moving some other pages out of the swap.
- *
- *  - If mem_init() already has been called, and ST-RAM swapping is not
- *    enabled, the only possibility is to try with __get_dma_pages(). This has
- *    the disadvantage that it's very hard to get more than 1 page, and it is
- *    likely to fail :-(
+ *  - If mem_init() already has been called, try with __get_dma_pages().
+ *    This has the disadvantage that it's very hard to get more than 1 page,
+ *    and it is likely to fail :-(
  *
  */
 void *atari_stram_alloc(long size, const char *owner)
@@ -393,27 +186,13 @@ void *atari_stram_alloc(long size, const char *owner)
 
 	DPRINTK("atari_stram_alloc(size=%08lx,owner=%s)\n", size, owner);
 
-	size = ALIGN_IF_SWAP(size);
-	DPRINTK( "atari_stram_alloc: rounded size = %08lx\n", size );
-#ifdef CONFIG_STRAM_SWAP
-	if (max_swap_size) {
-		/* If swapping is active: make some free space in the swap
-		   "device". */
-		DPRINTK( "atari_stram_alloc: after mem_init, swapping ok, "
-				 "calling get_region\n" );
-		addr = get_stram_region( N_PAGES(size) );
-		flags = BLOCK_INSWAP;
-	}
-	else
-#endif
 	if (!mem_init_done)
 		return alloc_bootmem_low(size);
 	else {
-		/* After mem_init() and no swapping: can only resort to
-		 * __get_dma_pages() */
+		/* After mem_init(): can only resort to __get_dma_pages() */
 		addr = (void *)__get_dma_pages(GFP_KERNEL, get_order(size));
 		flags = BLOCK_GFP;
-		DPRINTK( "atari_stram_alloc: after mem_init, swapping off, "
+		DPRINTK( "atari_stram_alloc: after mem_init, "
 				 "get_pages=%p\n", addr );
 	}
 
@@ -422,12 +201,7 @@ void *atari_stram_alloc(long size, const char *owner)
 			/* out of memory for BLOCK structure :-( */
 			DPRINTK( "atari_stram_alloc: out of mem for BLOCK -- "
 					 "freeing again\n" );
-#ifdef CONFIG_STRAM_SWAP
-			if (flags == BLOCK_INSWAP)
-				free_stram_region( SWAP_NR(addr), N_PAGES(size) );
-			else
-#endif
-				free_pages((unsigned long)addr, get_order(size));
+			free_pages((unsigned long)addr, get_order(size));
 			return( NULL );
 		}
 		block->owner = owner;
@@ -451,25 +225,12 @@ void atari_stram_free( void *addr )
 	DPRINTK( "atari_stram_free: found block (%p): size=%08lx, owner=%s, "
 			 "flags=%02x\n", block, block->size, block->owner, block->flags );
 
-#ifdef CONFIG_STRAM_SWAP
-	if (!max_swap_size) {
-#endif
-		if (block->flags & BLOCK_GFP) {
-			DPRINTK("atari_stram_free: is kmalloced, order_size=%d\n",
-				get_order(block->size));
-			free_pages((unsigned long)addr, get_order(block->size));
-		}
-		else
-			goto fail;
-#ifdef CONFIG_STRAM_SWAP
-	}
-	else if (block->flags & BLOCK_INSWAP) {
-		DPRINTK( "atari_stram_free: is swap-alloced\n" );
-		free_stram_region( SWAP_NR(block->start), N_PAGES(block->size) );
-	}
-	else
+	if (!(block->flags & BLOCK_GFP))
 		goto fail;
-#endif
+
+	DPRINTK("atari_stram_free: is kmalloced, order_size=%d\n",
+		get_order(block->size));
+	free_pages((unsigned long)addr, get_order(block->size));
 	remove_region( block );
 	return;
 
@@ -478,612 +239,6 @@ void atari_stram_free( void *addr )
 			"(called from %p)\n", addr, __builtin_return_address(0) );
 }
 
-
-#ifdef CONFIG_STRAM_SWAP
-
-
-/* ------------------------------------------------------------------------ */
-/*						   Main Swapping Functions							*/
-/* ------------------------------------------------------------------------ */
-
-
-/*
- * Initialize ST-RAM swap device
- * (lots copied and modified from sys_swapon() in mm/swapfile.c)
- */
-static int __init swap_init(void *start_mem, void *swap_data)
-{
-	static struct dentry fake_dentry;
-	static struct vfsmount fake_vfsmnt;
-	struct swap_info_struct *p;
-	struct inode swap_inode;
-	unsigned int type;
-	void *addr;
-	int i, j, k, prev;
-
-	DPRINTK("swap_init(start_mem=%p, swap_data=%p)\n",
-		start_mem, swap_data);
-
-	/* need at least one page for swapping to (and this also isn't very
-	 * much... :-) */
-	if (swap_end - swap_start < 2*PAGE_SIZE) {
-		printk( KERN_WARNING "stram_swap_init: swap space too small\n" );
-		return( 0 );
-	}
-
-	/* find free slot in swap_info */
-	for( p = swap_info, type = 0; type < nr_swapfiles; type++, p++ )
-		if (!(p->flags & SWP_USED))
-			break;
-	if (type >= MAX_SWAPFILES) {
-		printk( KERN_WARNING "stram_swap_init: max. number of "
-				"swap devices exhausted\n" );
-		return( 0 );
-	}
-	if (type >= nr_swapfiles)
-		nr_swapfiles = type+1;
-
-	stram_swap_info = p;
-	stram_swap_type = type;
-
-	/* fake some dir cache entries to give us some name in /dev/swaps */
-	fake_dentry.d_parent = &fake_dentry;
-	fake_dentry.d_name.name = "stram (internal)";
-	fake_dentry.d_name.len = 16;
-	fake_vfsmnt.mnt_parent = &fake_vfsmnt;
-
-	p->flags        = SWP_USED;
-	p->swap_file    = &fake_dentry;
-	p->swap_vfsmnt  = &fake_vfsmnt;
-	p->swap_map	= swap_data;
-	p->cluster_nr   = 0;
-	p->next         = -1;
-	p->prio         = 0x7ff0;	/* a rather high priority, but not the higest
-								 * to give the user a chance to override */
-
-	/* call stram_open() directly, avoids at least the overhead in
-	 * constructing a dummy file structure... */
-	swap_inode.i_rdev = MKDEV( STRAM_MAJOR, STRAM_MINOR );
-	stram_open( &swap_inode, MAGIC_FILE_P );
-	p->max = SWAP_NR(swap_end);
-
-	/* initialize swap_map: set regions that are already allocated or belong
-	 * to kernel data space to SWAP_MAP_BAD, otherwise to free */
-	j = 0; /* # of free pages */
-	k = 0; /* # of already allocated pages (from pre-mem_init stram_alloc()) */
-	p->lowest_bit = 0;
-	p->highest_bit = 0;
-	for( i = 1, addr = SWAP_ADDR(1); i < p->max;
-		 i++, addr += PAGE_SIZE ) {
-		if (in_some_region( addr )) {
-			p->swap_map[i] = SWAP_MAP_BAD;
-			++k;
-		}
-		else if (kernel_in_stram && addr < start_mem ) {
-			p->swap_map[i] = SWAP_MAP_BAD;
-		}
-		else {
-			p->swap_map[i] = 0;
-			++j;
-			if (!p->lowest_bit) p->lowest_bit = i;
-			p->highest_bit = i;
-		}
-	}
-	/* first page always reserved (and doesn't really belong to swap space) */
-	p->swap_map[0] = SWAP_MAP_BAD;
-
-	/* now swapping to this device ok */
-	p->pages = j + k;
-	swap_list_lock();
-	nr_swap_pages += j;
-	p->flags = SWP_WRITEOK;
-
-	/* insert swap space into swap_list */
-	prev = -1;
-	for (i = swap_list.head; i >= 0; i = swap_info[i].next) {
-		if (p->prio >= swap_info[i].prio) {
-			break;
-		}
-		prev = i;
-	}
-	p->next = i;
-	if (prev < 0) {
-		swap_list.head = swap_list.next = p - swap_info;
-	} else {
-		swap_info[prev].next = p - swap_info;
-	}
-	swap_list_unlock();
-
-	printk( KERN_INFO "Using %dk (%d pages) of ST-RAM as swap space.\n",
-			p->pages << 2, p->pages );
-	return( 1 );
-}
-
-
-/*
- * The swap entry has been read in advance, and we return 1 to indicate
- * that the page has been used or is no longer needed.
- *
- * Always set the resulting pte to be nowrite (the same as COW pages
- * after one process has exited).  We don't know just how many PTEs will
- * share this swap entry, so be cautious and let do_wp_page work out
- * what to do if a write is requested later.
- */
-static inline void unswap_pte(struct vm_area_struct * vma, unsigned long
-			      address, pte_t *dir, swp_entry_t entry,
-			      struct page *page)
-{
-	pte_t pte = *dir;
-
-	if (pte_none(pte))
-		return;
-	if (pte_present(pte)) {
-		/* If this entry is swap-cached, then page must already
-                   hold the right address for any copies in physical
-                   memory */
-		if (pte_page(pte) != page)
-			return;
-		/* We will be removing the swap cache in a moment, so... */
-		set_pte(dir, pte_mkdirty(pte));
-		return;
-	}
-	if (pte_val(pte) != entry.val)
-		return;
-
-	DPRINTK("unswap_pte: replacing entry %08lx by new page %p",
-		entry.val, page);
-	set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
-	swap_free(entry);
-	get_page(page);
-	inc_mm_counter(vma->vm_mm, rss);
-}
-
-static inline void unswap_pmd(struct vm_area_struct * vma, pmd_t *dir,
-			      unsigned long address, unsigned long size,
-			      unsigned long offset, swp_entry_t entry,
-			      struct page *page)
-{
-	pte_t * pte;
-	unsigned long end;
-
-	if (pmd_none(*dir))
-		return;
-	if (pmd_bad(*dir)) {
-		pmd_ERROR(*dir);
-		pmd_clear(dir);
-		return;
-	}
-	pte = pte_offset_kernel(dir, address);
-	offset += address & PMD_MASK;
-	address &= ~PMD_MASK;
-	end = address + size;
-	if (end > PMD_SIZE)
-		end = PMD_SIZE;
-	do {
-		unswap_pte(vma, offset+address-vma->vm_start, pte, entry, page);
-		address += PAGE_SIZE;
-		pte++;
-	} while (address < end);
-}
-
-static inline void unswap_pgd(struct vm_area_struct * vma, pgd_t *dir,
-			      unsigned long address, unsigned long size,
-			      swp_entry_t entry, struct page *page)
-{
-	pmd_t * pmd;
-	unsigned long offset, end;
-
-	if (pgd_none(*dir))
-		return;
-	if (pgd_bad(*dir)) {
-		pgd_ERROR(*dir);
-		pgd_clear(dir);
-		return;
-	}
-	pmd = pmd_offset(dir, address);
-	offset = address & PGDIR_MASK;
-	address &= ~PGDIR_MASK;
-	end = address + size;
-	if (end > PGDIR_SIZE)
-		end = PGDIR_SIZE;
-	do {
-		unswap_pmd(vma, pmd, address, end - address, offset, entry,
-			   page);
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address < end);
-}
-
-static void unswap_vma(struct vm_area_struct * vma, pgd_t *pgdir,
-		       swp_entry_t entry, struct page *page)
-{
-	unsigned long start = vma->vm_start, end = vma->vm_end;
-
-	do {
-		unswap_pgd(vma, pgdir, start, end - start, entry, page);
-		start = (start + PGDIR_SIZE) & PGDIR_MASK;
-		pgdir++;
-	} while (start < end);
-}
-
-static void unswap_process(struct mm_struct * mm, swp_entry_t entry,
-			   struct page *page)
-{
-	struct vm_area_struct* vma;
-
-	/*
-	 * Go through process' page directory.
-	 */
-	if (!mm)
-		return;
-	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		pgd_t * pgd = pgd_offset(mm, vma->vm_start);
-		unswap_vma(vma, pgd, entry, page);
-	}
-}
-
-
-static int unswap_by_read(unsigned short *map, unsigned long max,
-			  unsigned long start, unsigned long n_pages)
-{
-	struct task_struct *p;
-	struct page *page;
-	swp_entry_t entry;
-	unsigned long i;
-
-	DPRINTK( "unswapping %lu..%lu by reading in\n",
-			 start, start+n_pages-1 );
-
-	for( i = start; i < start+n_pages; ++i ) {
-		if (map[i] == SWAP_MAP_BAD) {
-			printk( KERN_ERR "get_stram_region: page %lu already "
-					"reserved??\n", i );
-			continue;
-		}
-
-		if (map[i]) {
-			entry = swp_entry(stram_swap_type, i);
-			DPRINTK("unswap: map[i=%lu]=%u nr_swap=%ld\n",
-				i, map[i], nr_swap_pages);
-
-			swap_device_lock(stram_swap_info);
-			map[i]++;
-			swap_device_unlock(stram_swap_info);
-			/* Get a page for the entry, using the existing
-			   swap cache page if there is one.  Otherwise,
-			   get a clean page and read the swap into it. */
-			page = read_swap_cache_async(entry, NULL, 0);
-			if (!page) {
-				swap_free(entry);
-				return -ENOMEM;
-			}
-			read_lock(&tasklist_lock);
-			for_each_process(p)
-				unswap_process(p->mm, entry, page);
-			read_unlock(&tasklist_lock);
-			shmem_unuse(entry, page);
-			/* Now get rid of the extra reference to the
-			   temporary page we've been using. */
-			if (PageSwapCache(page))
-				delete_from_swap_cache(page);
-			__free_page(page);
-	#ifdef DO_PROC
-			stat_swap_force++;
-	#endif
-		}
-
-		DPRINTK( "unswap: map[i=%lu]=%u nr_swap=%ld\n",
-				 i, map[i], nr_swap_pages );
-		swap_list_lock();
-		swap_device_lock(stram_swap_info);
-		map[i] = SWAP_MAP_BAD;
-		if (stram_swap_info->lowest_bit == i)
-			stram_swap_info->lowest_bit++;
-		if (stram_swap_info->highest_bit == i)
-			stram_swap_info->highest_bit--;
-		--nr_swap_pages;
-		swap_device_unlock(stram_swap_info);
-		swap_list_unlock();
-	}
-
-	return 0;
-}
-
-/*
- * reserve a region in ST-RAM swap space for an allocation
- */
-static void *get_stram_region( unsigned long n_pages )
-{
-	unsigned short *map = stram_swap_info->swap_map;
-	unsigned long max = stram_swap_info->max;
-	unsigned long start, total_free, region_free;
-	int err;
-	void *ret = NULL;
-
-	DPRINTK( "get_stram_region(n_pages=%lu)\n", n_pages );
-
-	down(&stram_swap_sem);
-
-	/* disallow writing to the swap device now */
-	stram_swap_info->flags = SWP_USED;
-
-	/* find a region of n_pages pages in the swap space including as much free
-	 * pages as possible (and excluding any already-reserved pages). */
-	if (!(start = find_free_region( n_pages, &total_free, &region_free )))
-		goto end;
-	DPRINTK( "get_stram_region: region starts at %lu, has %lu free pages\n",
-			 start, region_free );
-
-	err = unswap_by_read(map, max, start, n_pages);
-	if (err)
-		goto end;
-
-	ret = SWAP_ADDR(start);
-  end:
-	/* allow using swap device again */
-	stram_swap_info->flags = SWP_WRITEOK;
-	up(&stram_swap_sem);
-	DPRINTK( "get_stram_region: returning %p\n", ret );
-	return( ret );
-}
-
-
-/*
- * free a reserved region in ST-RAM swap space
- */
-static void free_stram_region( unsigned long offset, unsigned long n_pages )
-{
-	unsigned short *map = stram_swap_info->swap_map;
-
-	DPRINTK( "free_stram_region(offset=%lu,n_pages=%lu)\n", offset, n_pages );
-
-	if (offset < 1 || offset + n_pages > stram_swap_info->max) {
-		printk( KERN_ERR "free_stram_region: Trying to free non-ST-RAM\n" );
-		return;
-	}
-
-	swap_list_lock();
-	swap_device_lock(stram_swap_info);
-	/* un-reserve the freed pages */
-	for( ; n_pages > 0; ++offset, --n_pages ) {
-		if (map[offset] != SWAP_MAP_BAD)
-			printk( KERN_ERR "free_stram_region: Swap page %lu was not "
-					"reserved\n", offset );
-		map[offset] = 0;
-	}
-
-	/* update swapping meta-data */
-	if (offset < stram_swap_info->lowest_bit)
-		stram_swap_info->lowest_bit = offset;
-	if (offset+n_pages-1 > stram_swap_info->highest_bit)
-		stram_swap_info->highest_bit = offset+n_pages-1;
-	if (stram_swap_info->prio > swap_info[swap_list.next].prio)
-		swap_list.next = swap_list.head;
-	nr_swap_pages += n_pages;
-	swap_device_unlock(stram_swap_info);
-	swap_list_unlock();
-}
-
-
-/* ------------------------------------------------------------------------ */
-/*						Utility Functions for Swapping						*/
-/* ------------------------------------------------------------------------ */
-
-
-/* is addr in some of the allocated regions? */
-static int in_some_region(void *addr)
-{
-	BLOCK *p;
-
-	for( p = alloc_list; p; p = p->next ) {
-		if (p->start <= addr && addr < p->start + p->size)
-			return( 1 );
-	}
-	return( 0 );
-}
-
-
-static unsigned long find_free_region(unsigned long n_pages,
-				      unsigned long *total_free,
-				      unsigned long *region_free)
-{
-	unsigned short *map = stram_swap_info->swap_map;
-	unsigned long max = stram_swap_info->max;
-	unsigned long head, tail, max_start;
-	long nfree, max_free;
-
-	/* first scan the swap space for a suitable place for the allocation */
-	head = 1;
-	max_start = 0;
-	max_free = -1;
-	*total_free = 0;
-
-  start_over:
-	/* increment tail until final window size reached, and count free pages */
-	nfree = 0;
-	for( tail = head; tail-head < n_pages && tail < max; ++tail ) {
-		if (map[tail] == SWAP_MAP_BAD) {
-			head = tail+1;
-			goto start_over;
-		}
-		if (!map[tail]) {
-			++nfree;
-			++*total_free;
-		}
-	}
-	if (tail-head < n_pages)
-		goto out;
-	if (nfree > max_free) {
-		max_start = head;
-		max_free  = nfree;
-		if (max_free >= n_pages)
-			/* don't need more free pages... :-) */
-			goto out;
-	}
-
-	/* now shift the window and look for the area where as much pages as
-	 * possible are free */
-	while( tail < max ) {
-		nfree -= (map[head++] == 0);
-		if (map[tail] == SWAP_MAP_BAD) {
-			head = tail+1;
-			goto start_over;
-		}
-		if (!map[tail]) {
-			++nfree;
-			++*total_free;
-		}
-		++tail;
-		if (nfree > max_free) {
-			max_start = head;
-			max_free  = nfree;
-			if (max_free >= n_pages)
-				/* don't need more free pages... :-) */
-				goto out;
-		}
-	}
-
-  out:
-	if (max_free < 0) {
-		printk( KERN_NOTICE "get_stram_region: ST-RAM too full or fragmented "
-				"-- can't allocate %lu pages\n", n_pages );
-		return( 0 );
-	}
-
-	*region_free = max_free;
-	return( max_start );
-}
-
-
-/* setup parameters from command line */
-void __init stram_swap_setup(char *str, int *ints)
-{
-	if (ints[0] >= 1)
-		max_swap_size = ((ints[1] < 0 ? 0 : ints[1]) * 1024) & PAGE_MASK;
-}
-
-
-/* ------------------------------------------------------------------------ */
-/*								ST-RAM device								*/
-/* ------------------------------------------------------------------------ */
-
-static int refcnt;
-
-static void do_stram_request(request_queue_t *q)
-{
-	struct request *req;
-
-	while ((req = elv_next_request(q)) != NULL) {
-		void *start = swap_start + (req->sector << 9);
-		unsigned long len = req->current_nr_sectors << 9;
-		if ((start + len) > swap_end) {
-			printk( KERN_ERR "stram: bad access beyond end of device: "
-					"block=%ld, count=%d\n",
-					req->sector,
-					req->current_nr_sectors );
-			end_request(req, 0);
-			continue;
-		}
-
-		if (req->cmd == READ) {
-			memcpy(req->buffer, start, len);
-#ifdef DO_PROC
-			stat_swap_read += N_PAGES(len);
-#endif
-		}
-		else {
-			memcpy(start, req->buffer, len);
-#ifdef DO_PROC
-			stat_swap_write += N_PAGES(len);
-#endif
-		}
-		end_request(req, 1);
-	}
-}
-
-
-static int stram_open( struct inode *inode, struct file *filp )
-{
-	if (filp != MAGIC_FILE_P) {
-		printk( KERN_NOTICE "Only kernel can open ST-RAM device\n" );
-		return( -EPERM );
-	}
-	if (refcnt)
-		return( -EBUSY );
-	++refcnt;
-	return( 0 );
-}
-
-static int stram_release( struct inode *inode, struct file *filp )
-{
-	if (filp != MAGIC_FILE_P) {
-		printk( KERN_NOTICE "Only kernel can close ST-RAM device\n" );
-		return( -EPERM );
-	}
-	if (refcnt > 0)
-		--refcnt;
-	return( 0 );
-}
-
-
-static struct block_device_operations stram_fops = {
-	.open =		stram_open,
-	.release =	stram_release,
-};
-
-static struct gendisk *stram_disk;
-static struct request_queue *stram_queue;
-static DEFINE_SPINLOCK(stram_lock);
-
-int __init stram_device_init(void)
-{
-	if (!MACH_IS_ATARI)
-		/* no point in initializing this, I hope */
-		return -ENXIO;
-
-	if (!max_swap_size)
-		/* swapping not enabled */
-		return -ENXIO;
-	stram_disk = alloc_disk(1);
-	if (!stram_disk)
-		return -ENOMEM;
-
-	if (register_blkdev(STRAM_MAJOR, "stram")) {
-		put_disk(stram_disk);
-		return -ENXIO;
-	}
-
-	stram_queue = blk_init_queue(do_stram_request, &stram_lock);
-	if (!stram_queue) {
-		unregister_blkdev(STRAM_MAJOR, "stram");
-		put_disk(stram_disk);
-		return -ENOMEM;
-	}
-
-	stram_disk->major = STRAM_MAJOR;
-	stram_disk->first_minor = STRAM_MINOR;
-	stram_disk->fops = &stram_fops;
-	stram_disk->queue = stram_queue;
-	sprintf(stram_disk->disk_name, "stram");
-	set_capacity(stram_disk, (swap_end - swap_start)/512);
-	add_disk(stram_disk);
-	return 0;
-}
-
-
-
-/* ------------------------------------------------------------------------ */
-/*							Misc Utility Functions							*/
-/* ------------------------------------------------------------------------ */
-
-/* reserve a range of pages */
-static void reserve_region(void *start, void *end)
-{
-	reserve_bootmem (virt_to_phys(start), end - start);
-}
-
-#endif /* CONFIG_STRAM_SWAP */
-
 
 /* ------------------------------------------------------------------------ */
 /*							  Region Management								*/
@@ -1173,50 +328,9 @@ int get_stram_list( char *buf )
 {
 	int len = 0;
 	BLOCK *p;
-#ifdef CONFIG_STRAM_SWAP
-	int i;
-	unsigned short *map = stram_swap_info->swap_map;
-	unsigned long max = stram_swap_info->max;
-	unsigned free = 0, used = 0, rsvd = 0;
-#endif
 
-#ifdef CONFIG_STRAM_SWAP
-	if (max_swap_size) {
-		for( i = 1; i < max; ++i ) {
-			if (!map[i])
-				++free;
-			else if (map[i] == SWAP_MAP_BAD)
-				++rsvd;
-			else
-				++used;
-		}
-		PRINT_PROC(
-			"Total ST-RAM:      %8u kB\n"
-			"Total ST-RAM swap: %8lu kB\n"
-			"Free swap:         %8u kB\n"
-			"Used swap:         %8u kB\n"
-			"Allocated swap:    %8u kB\n"
-			"Swap Reads:        %8u\n"
-			"Swap Writes:       %8u\n"
-			"Swap Forced Reads: %8u\n",
-			(stram_end - stram_start) >> 10,
-			(max-1) << (PAGE_SHIFT-10),
-			free << (PAGE_SHIFT-10),
-			used << (PAGE_SHIFT-10),
-			rsvd << (PAGE_SHIFT-10),
-			stat_swap_read,
-			stat_swap_write,
-			stat_swap_force );
-	}
-	else {
-#endif
-		PRINT_PROC( "ST-RAM swapping disabled\n" );
-		PRINT_PROC("Total ST-RAM:      %8u kB\n",
+	PRINT_PROC("Total ST-RAM:      %8u kB\n",
 			   (stram_end - stram_start) >> 10);
-#ifdef CONFIG_STRAM_SWAP
-	}
-#endif
-
 	PRINT_PROC( "Allocated regions:\n" );
 	for( p = alloc_list; p; p = p->next ) {
 		if (len + 50 >= PAGE_SIZE)
@@ -1227,8 +341,6 @@ int get_stram_list( char *buf )
 			   p->owner);
 		if (p->flags & BLOCK_GFP)
 			PRINT_PROC( "page-alloced)\n" );
-		else if (p->flags & BLOCK_INSWAP)
-			PRINT_PROC( "in swap)\n" );
 		else
 			PRINT_PROC( "??)\n" );
 	}
diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c
index 5dcb3fa35ea..fe2383e36b0 100644
--- a/arch/m68k/mm/kmap.c
+++ b/arch/m68k/mm/kmap.c
@@ -201,7 +201,7 @@ void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag)
 			virtaddr += PTRTREESIZE;
 			size -= PTRTREESIZE;
 		} else {
-			pte_dir = pte_alloc_kernel(&init_mm, pmd_dir, virtaddr);
+			pte_dir = pte_alloc_kernel(pmd_dir, virtaddr);
 			if (!pte_dir) {
 				printk("ioremap: no mem for pte_dir\n");
 				return NULL;
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index 32e55adfeb8..117481e8630 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -116,7 +116,7 @@ inline int dvma_map_cpu(unsigned long kaddr,
 			pte_t *pte;
 			unsigned long end3;
 
-			if((pte = pte_alloc_kernel(&init_mm, pmd, vaddr)) == NULL) {
+			if((pte = pte_alloc_kernel(pmd, vaddr)) == NULL) {
 				ret = -ENOMEM;
 				goto out;
 			}
diff --git a/arch/mips/kernel/irixelf.c b/arch/mips/kernel/irixelf.c
index 99262fe6456..7ce34d4aa22 100644
--- a/arch/mips/kernel/irixelf.c
+++ b/arch/mips/kernel/irixelf.c
@@ -697,7 +697,6 @@ static int load_irix_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	/* Do this so that we can load the interpreter, if need be.  We will
 	 * change some of these later.
 	 */
-	set_mm_counter(current->mm, rss, 0);
 	setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
 	current->mm->start_stack = bprm->p;
 
diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 9c44ca70bef..3101d1db559 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -55,7 +55,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -77,7 +77,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pud_t *pud;
 		pmd_t *pmd;
@@ -96,7 +95,6 @@ static int remap_area_pages(unsigned long address, phys_t phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return error;
 }
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index e15f09eaed1..a065349aee3 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -270,7 +270,6 @@ void flush_dcache_page(struct page *page)
 	unsigned long offset;
 	unsigned long addr;
 	pgoff_t pgoff;
-	pte_t *pte;
 	unsigned long pfn = page_to_pfn(page);
 
 
@@ -301,21 +300,16 @@ void flush_dcache_page(struct page *page)
 		 * taking a page fault if the pte doesn't exist.
 		 * This is just for speed.  If the page translation
 		 * isn't there, there's no point exciting the
-		 * nadtlb handler into a nullification frenzy */
-
-
-  		if(!(pte = translation_exists(mpnt, addr)))
-			continue;
-
-		/* make sure we really have this page: the private
+		 * nadtlb handler into a nullification frenzy.
+		 *
+		 * Make sure we really have this page: the private
 		 * mappings may cover this area but have COW'd this
-		 * particular page */
-		if(pte_pfn(*pte) != pfn)
-  			continue;
-
-		__flush_cache_page(mpnt, addr);
-
-		break;
+		 * particular page.
+		 */
+  		if (translation_exists(mpnt, addr, pfn)) {
+			__flush_cache_page(mpnt, addr);
+			break;
+		}
 	}
 	flush_dcache_mmap_unlock(mapping);
 }
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index ae6213d7167..f94a02ef3d9 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -114,7 +114,7 @@ static inline int map_pmd_uncached(pmd_t * pmd, unsigned long vaddr,
 	if (end > PGDIR_SIZE)
 		end = PGDIR_SIZE;
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, vaddr);
+		pte_t * pte = pte_alloc_kernel(pmd, vaddr);
 		if (!pte)
 			return -ENOMEM;
 		if (map_pte_uncached(pte, orig_vaddr, end - vaddr, paddr_ptr))
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 2886ad70db4..29b998e430e 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -505,7 +505,9 @@ void show_mem(void)
 
 		for (j = node_start_pfn(i); j < node_end_pfn(i); j++) {
 			struct page *p;
+			unsigned long flags;
 
+			pgdat_resize_lock(NODE_DATA(i), &flags);
 			p = nid_page_nr(i, j) - node_start_pfn(i);
 
 			total++;
@@ -517,6 +519,7 @@ void show_mem(void)
 				free++;
 			else
 				shared += page_count(p) - 1;
+			pgdat_resize_unlock(NODE_DATA(i), &flags);
         	}
 	}
 #endif
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index f2df502cdae..5c7a1b3b932 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -52,7 +52,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(NULL, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -75,10 +75,9 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd;
-		pmd = pmd_alloc(dir, address);
+		pmd = pmd_alloc(&init_mm, dir, address);
 		error = -ENOMEM;
 		if (!pmd)
 			break;
@@ -89,7 +88,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return error;
 }
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
index 0f710d2baec..685fd0defe2 100644
--- a/arch/ppc/kernel/dma-mapping.c
+++ b/arch/ppc/kernel/dma-mapping.c
@@ -335,8 +335,6 @@ static int __init dma_alloc_init(void)
 	pte_t *pte;
 	int ret = 0;
 
-	spin_lock(&init_mm.page_table_lock);
-
 	do {
 		pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
 		pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
@@ -347,7 +345,7 @@ static int __init dma_alloc_init(void)
 		}
 		WARN_ON(!pmd_none(*pmd));
 
-		pte = pte_alloc_kernel(&init_mm, pmd, CONSISTENT_BASE);
+		pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
 		if (!pte) {
 			printk(KERN_ERR "%s: no pte tables\n", __func__);
 			ret = -ENOMEM;
@@ -357,8 +355,6 @@ static int __init dma_alloc_init(void)
 		consistent_pte = pte;
 	} while (0);
 
-	spin_unlock(&init_mm.page_table_lock);
-
 	return ret;
 }
 
diff --git a/arch/ppc/mm/4xx_mmu.c b/arch/ppc/mm/4xx_mmu.c
index b7bcbc232f3..4d006aa1a0d 100644
--- a/arch/ppc/mm/4xx_mmu.c
+++ b/arch/ppc/mm/4xx_mmu.c
@@ -110,13 +110,11 @@ unsigned long __init mmu_mapin_ram(void)
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
-		spin_lock(&init_mm.page_table_lock);
 		pmdp = pmd_offset(pgd_offset_k(v), v);
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
 		pmd_val(*pmdp++) = val;
-		spin_unlock(&init_mm.page_table_lock);
 
 		v += LARGE_PAGE_SIZE_16M;
 		p += LARGE_PAGE_SIZE_16M;
@@ -127,10 +125,8 @@ unsigned long __init mmu_mapin_ram(void)
 		pmd_t *pmdp;
 		unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE;
 
-		spin_lock(&init_mm.page_table_lock);
 		pmdp = pmd_offset(pgd_offset_k(v), v);
 		pmd_val(*pmdp) = val;
-		spin_unlock(&init_mm.page_table_lock);
 
 		v += LARGE_PAGE_SIZE_4M;
 		p += LARGE_PAGE_SIZE_4M;
diff --git a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
index 43505b1fc5d..6ea9185fd12 100644
--- a/arch/ppc/mm/pgtable.c
+++ b/arch/ppc/mm/pgtable.c
@@ -280,18 +280,16 @@ map_page(unsigned long va, phys_addr_t pa, int flags)
 	pte_t *pg;
 	int err = -ENOMEM;
 
-	spin_lock(&init_mm.page_table_lock);
 	/* Use upper 10 bits of VA to index the first level map */
 	pd = pmd_offset(pgd_offset_k(va), va);
 	/* Use middle 10 bits of VA to index the second-level map */
-	pg = pte_alloc_kernel(&init_mm, pd, va);
+	pg = pte_alloc_kernel(pd, va);
 	if (pg != 0) {
 		err = 0;
 		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		if (mem_init_done)
 			flush_HPTE(0, va, pmd_val(*pd));
 	}
-	spin_unlock(&init_mm.page_table_lock);
 	return err;
 }
 
diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c
index efa985f05ac..4aacf521e3e 100644
--- a/arch/ppc64/kernel/vdso.c
+++ b/arch/ppc64/kernel/vdso.c
@@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
 		return NOPAGE_SIGBUS;
 
 	/*
-	 * Last page is systemcfg, special handling here, no get_page() a
-	 * this is a reserved page
+	 * Last page is systemcfg.
 	 */
 	if ((vma->vm_end - address) <= PAGE_SIZE)
-		return virt_to_page(systemcfg);
+		pg = virt_to_page(systemcfg);
+	else
+		pg = virt_to_page(vbase + offset);
 
-	pg = virt_to_page(vbase + offset);
 	get_page(pg);
 	DBG(" ->page count: %d\n", page_count(pg));
 
@@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
 	 * gettimeofday will be totally dead. It's fine to use that for setting
 	 * breakpoints in the vDSO code pages though
 	 */
-	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
+	vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
 	vma->vm_flags |= mm->def_flags;
 	vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
 	vma->vm_ops = &vdso_vmops;
@@ -603,6 +603,8 @@ void __init vdso_init(void)
 		ClearPageReserved(pg);
 		get_page(pg);
 	}
+
+	get_page(virt_to_page(systemcfg));
 }
 
 int in_gate_area_no_task(unsigned long addr)
diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c
index c65b87b9275..f4ca29cf536 100644
--- a/arch/ppc64/mm/imalloc.c
+++ b/arch/ppc64/mm/imalloc.c
@@ -300,12 +300,7 @@ void im_free(void * addr)
 	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
 		if (tmp->addr == addr) {
 			*p = tmp->next;
-
-			/* XXX: do we need the lock? */
-			spin_lock(&init_mm.page_table_lock);
 			unmap_vm_area(tmp);
-			spin_unlock(&init_mm.page_table_lock);
-
 			kfree(tmp);
 			up(&imlist_sem);
 			return;
diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c
index be64b157afc..e2bd7776622 100644
--- a/arch/ppc64/mm/init.c
+++ b/arch/ppc64/mm/init.c
@@ -104,6 +104,8 @@ void show_mem(void)
 	show_free_areas();
 	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat_page_nr(pgdat, i);
 			total++;
@@ -114,6 +116,7 @@ void show_mem(void)
 			else if (page_count(page))
 				shared += page_count(page) - 1;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 	printk("%ld pages of RAM\n", total);
 	printk("%ld reserved pages\n", reserved);
@@ -155,7 +158,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 	unsigned long vsid;
 
 	if (mem_init_done) {
-		spin_lock(&init_mm.page_table_lock);
 		pgdp = pgd_offset_k(ea);
 		pudp = pud_alloc(&init_mm, pgdp, ea);
 		if (!pudp)
@@ -163,12 +165,11 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags)
 		pmdp = pmd_alloc(&init_mm, pudp, ea);
 		if (!pmdp)
 			return -ENOMEM;
-		ptep = pte_alloc_kernel(&init_mm, pmdp, ea);
+		ptep = pte_alloc_kernel(pmdp, ea);
 		if (!ptep)
 			return -ENOMEM;
 		set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
 							  __pgprot(flags)));
-		spin_unlock(&init_mm.page_table_lock);
 	} else {
 		unsigned long va, vpn, hash, hpteg;
 
@@ -649,11 +650,14 @@ void __init mem_init(void)
 #endif
 
 	for_each_pgdat(pgdat) {
+		unsigned long flags;
+		pgdat_resize_lock(pgdat, &flags);
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			page = pgdat_page_nr(pgdat, i);
 			if (PageReserved(page))
 				reservedpages++;
 		}
+		pgdat_resize_unlock(pgdat, &flags);
 	}
 
 	codesize = (unsigned long)&_etext - (unsigned long)&_stext;
@@ -867,3 +871,80 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
 	return vma_prot;
 }
 EXPORT_SYMBOL(phys_mem_access_prot);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+void online_page(struct page *page)
+{
+	ClearPageReserved(page);
+	free_cold_page(page);
+	totalram_pages++;
+	num_physpages++;
+}
+
+/*
+ * This works only for the non-NUMA case.  Later, we'll need a lookup
+ * to convert from real physical addresses to nid, that doesn't use
+ * pfn_to_nid().
+ */
+int __devinit add_memory(u64 start, u64 size)
+{
+	struct pglist_data *pgdata = NODE_DATA(0);
+	struct zone *zone;
+	unsigned long start_pfn = start >> PAGE_SHIFT;
+	unsigned long nr_pages = size >> PAGE_SHIFT;
+
+	/* this should work for most non-highmem platforms */
+	zone = pgdata->node_zones;
+
+	return __add_pages(zone, start_pfn, nr_pages);
+
+	return 0;
+}
+
+/*
+ * First pass at this code will check to determine if the remove
+ * request is within the RMO.  Do not allow removal within the RMO.
+ */
+int __devinit remove_memory(u64 start, u64 size)
+{
+	struct zone *zone;
+	unsigned long start_pfn, end_pfn, nr_pages;
+
+	start_pfn = start >> PAGE_SHIFT;
+	nr_pages = size >> PAGE_SHIFT;
+	end_pfn = start_pfn + nr_pages;
+
+	printk("%s(): Attempting to remove memoy in range "
+			"%lx to %lx\n", __func__, start, start+size);
+	/*
+	 * check for range within RMO
+	 */
+	zone = page_zone(pfn_to_page(start_pfn));
+
+	printk("%s(): memory will be removed from "
+			"the %s zone\n", __func__, zone->name);
+
+	/*
+	 * not handling removing memory ranges that
+	 * overlap multiple zones yet
+	 */
+	if (end_pfn > (zone->zone_start_pfn + zone->spanned_pages))
+		goto overlap;
+
+	/* make sure it is NOT in RMO */
+	if ((start < lmb.rmo_size) || ((start+size) < lmb.rmo_size)) {
+		printk("%s(): range to be removed must NOT be in RMO!\n",
+			__func__);
+		goto in_rmo;
+	}
+
+	return __remove_pages(zone, start_pfn, nr_pages);
+
+overlap:
+	printk("%s(): memory range to be removed overlaps "
+		"multiple zones!!!\n", __func__);
+in_rmo:
+	return -1;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c
index c6c39d868bc..0f6e9ecbefe 100644
--- a/arch/s390/mm/ioremap.c
+++ b/arch/s390/mm/ioremap.c
@@ -58,7 +58,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -80,7 +80,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -94,7 +93,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return 0;
 }
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 7abba2161da..775f86cd3fe 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -194,10 +194,13 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 			       unsigned long address)
 {
 	unsigned long addrmax = P4SEG;
-	pgd_t *dir;
+	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
+	struct mm_struct *mm;
+	spinlock_t *ptl;
+	int ret = 1;
 
 #ifdef CONFIG_SH_KGDB
 	if (kgdb_nofault && kgdb_bus_err_hook)
@@ -208,28 +211,28 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 	addrmax = P4SEG_STORE_QUE + 0x04000000;
 #endif
 
-	if (address >= P3SEG && address < addrmax)
-		dir = pgd_offset_k(address);
-	else if (address >= TASK_SIZE)
+	if (address >= P3SEG && address < addrmax) {
+		pgd = pgd_offset_k(address);
+		mm = NULL;
+	} else if (address >= TASK_SIZE)
 		return 1;
-	else if (!current->mm)
+	else if (!(mm = current->mm))
 		return 1;
 	else
-		dir = pgd_offset(current->mm, address);
+		pgd = pgd_offset(mm, address);
 
-	pmd = pmd_offset(dir, address);
-	if (pmd_none(*pmd))
-		return 1;
-	if (pmd_bad(*pmd)) {
-		pmd_ERROR(*pmd);
-		pmd_clear(pmd);
+	pmd = pmd_offset(pgd, address);
+	if (pmd_none_or_clear_bad(pmd))
 		return 1;
-	}
-	pte = pte_offset_kernel(pmd, address);
+	if (mm)
+		pte = pte_offset_map_lock(mm, pmd, address, &ptl);
+	else
+		pte = pte_offset_kernel(pmd, address);
+
 	entry = *pte;
 	if (pte_none(entry) || pte_not_present(entry)
 	    || (writeaccess && !pte_write(entry)))
-		return 1;
+		goto unlock;
 
 	if (writeaccess)
 		entry = pte_mkdirty(entry);
@@ -251,8 +254,11 @@ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 
 	set_pte(pte, entry);
 	update_mmu_cache(NULL, address, entry);
-
-	return 0;
+	ret = 0;
+unlock:
+	if (mm)
+		pte_unmap_unlock(pte, ptl);
+	return ret;
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 95bb1a6c606..6b7a7688c98 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -54,8 +54,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return pte;
 }
 
-#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
-
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		     pte_t *ptep, pte_t entry)
 {
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index 9f490c2742f..e794e27a72f 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -57,7 +57,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address,
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -79,7 +79,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd;
 		pmd = pmd_alloc(&init_mm, dir, address);
@@ -93,7 +92,6 @@ int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return error;
 }
diff --git a/arch/sh64/mm/cache.c b/arch/sh64/mm/cache.c
index 3b87e25ea77..c0c1b21350d 100644
--- a/arch/sh64/mm/cache.c
+++ b/arch/sh64/mm/cache.c
@@ -584,32 +584,36 @@ static void sh64_dcache_purge_phy_page(unsigned long paddr)
 	}
 }
 
-static void sh64_dcache_purge_user_page(struct mm_struct *mm, unsigned long eaddr)
+static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
+				unsigned long addr, unsigned long end)
 {
 	pgd_t *pgd;
 	pmd_t *pmd;
 	pte_t *pte;
 	pte_t entry;
+	spinlock_t *ptl;
 	unsigned long paddr;
 
-	/* NOTE : all the callers of this have mm->page_table_lock held, so the
-	   following page table traversal is safe even on SMP/pre-emptible. */
-
-	if (!mm) return; /* No way to find physical address of page */
-	pgd = pgd_offset(mm, eaddr);
-	if (pgd_bad(*pgd)) return;
-
-	pmd = pmd_offset(pgd, eaddr);
-	if (pmd_none(*pmd) || pmd_bad(*pmd)) return;
-
-	pte = pte_offset_kernel(pmd, eaddr);
-	entry = *pte;
-	if (pte_none(entry) || !pte_present(entry)) return;
-
-	paddr = pte_val(entry) & PAGE_MASK;
-
-	sh64_dcache_purge_coloured_phy_page(paddr, eaddr);
-
+	if (!mm)
+		return; /* No way to find physical address of page */
+
+	pgd = pgd_offset(mm, addr);
+	if (pgd_bad(*pgd))
+		return;
+
+	pmd = pmd_offset(pgd, addr);
+	if (pmd_none(*pmd) || pmd_bad(*pmd))
+		return;
+
+	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+	do {
+		entry = *pte;
+		if (pte_none(entry) || !pte_present(entry))
+			continue;
+		paddr = pte_val(entry) & PAGE_MASK;
+		sh64_dcache_purge_coloured_phy_page(paddr, addr);
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	pte_unmap_unlock(pte - 1, ptl);
 }
 /****************************************************************************/
 
@@ -668,7 +672,7 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 	int n_pages;
 
 	n_pages = ((end - start) >> PAGE_SHIFT);
-	if (n_pages >= 64) {
+	if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
 #if 1
 		sh64_dcache_purge_all();
 #else
@@ -707,20 +711,10 @@ static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 		}
 #endif
 	} else {
-		/* 'Small' range */
-		unsigned long aligned_start;
-		unsigned long eaddr;
-		unsigned long last_page_start;
-
-		aligned_start = start & PAGE_MASK;
-		/* 'end' is 1 byte beyond the end of the range */
-		last_page_start = (end - 1) & PAGE_MASK;
-
-		eaddr = aligned_start;
-		while (eaddr <= last_page_start) {
-			sh64_dcache_purge_user_page(mm, eaddr);
-			eaddr += PAGE_SIZE;
-		}
+		/* Small range, covered by a single page table page */
+		start &= PAGE_MASK;	/* should already be so */
+		end = PAGE_ALIGN(end);	/* should already be so */
+		sh64_dcache_purge_user_pages(mm, start, end);
 	}
 	return;
 }
@@ -880,9 +874,7 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 	   addresses from the user address space specified by mm, after writing
 	   back any dirty data.
 
-	   Note(1), 'end' is 1 byte beyond the end of the range to flush.
-
-	   Note(2), this is called with mm->page_table_lock held.*/
+	   Note, 'end' is 1 byte beyond the end of the range to flush. */
 
 	sh64_dcache_purge_user_range(mm, start, end);
 	sh64_icache_inv_user_page_range(mm, start, end);
@@ -898,7 +890,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned
 	   the I-cache must be searched too in case the page in question is
 	   both writable and being executed from (e.g. stack trampolines.)
 
-	   Note(1), this is called with mm->page_table_lock held.
+	   Note, this is called with pte lock held.
 	   */
 
 	sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index dcd9c8a8baf..ed6a505b3ee 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -54,41 +54,31 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 	return pte;
 }
 
-#define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
-
-static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
-			 struct page *page, pte_t * page_table, int write_access)
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
+		     pte_t *ptep, pte_t entry)
 {
-	unsigned long i;
-	pte_t entry;
-
-	add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
-
-	if (write_access)
-		entry = pte_mkwrite(pte_mkdirty(mk_pte(page,
-						       vma->vm_page_prot)));
-	else
-		entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
-	entry = pte_mkyoung(entry);
-	mk_pte_huge(entry);
+	int i;
 
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-		set_pte(page_table, entry);
-		page_table++;
-
+		set_pte_at(mm, addr, ptep, entry);
+		ptep++;
+		addr += PAGE_SIZE;
 		pte_val(entry) += PAGE_SIZE;
 	}
 }
 
-pte_t huge_ptep_get_and_clear(pte_t *ptep)
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
 {
 	pte_t entry;
+	int i;
 
 	entry = *ptep;
 
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-		pte_clear(pte);
-		pte++;
+		pte_clear(mm, addr, ptep);
+		addr += PAGE_SIZE;
+		ptep++;
 	}
 
 	return entry;
@@ -106,79 +96,6 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 
-int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
-			    struct vm_area_struct *vma)
-{
-	pte_t *src_pte, *dst_pte, entry;
-	struct page *ptepage;
-	unsigned long addr = vma->vm_start;
-	unsigned long end = vma->vm_end;
-	int i;
-
-	while (addr < end) {
-		dst_pte = huge_pte_alloc(dst, addr);
-		if (!dst_pte)
-			goto nomem;
-		src_pte = huge_pte_offset(src, addr);
-		BUG_ON(!src_pte || pte_none(*src_pte));
-		entry = *src_pte;
-		ptepage = pte_page(entry);
-		get_page(ptepage);
-		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			set_pte(dst_pte, entry);
-			pte_val(entry) += PAGE_SIZE;
-			dst_pte++;
-		}
-		add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
-		addr += HPAGE_SIZE;
-	}
-	return 0;
-
-nomem:
-	return -ENOMEM;
-}
-
-int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
-			struct page **pages, struct vm_area_struct **vmas,
-			unsigned long *position, int *length, int i)
-{
-	unsigned long vaddr = *position;
-	int remainder = *length;
-
-	WARN_ON(!is_vm_hugetlb_page(vma));
-
-	while (vaddr < vma->vm_end && remainder) {
-		if (pages) {
-			pte_t *pte;
-			struct page *page;
-
-			pte = huge_pte_offset(mm, vaddr);
-
-			/* hugetlb should be locked, and hence, prefaulted */
-			BUG_ON(!pte || pte_none(*pte));
-
-			page = pte_page(*pte);
-
-			WARN_ON(!PageCompound(page));
-
-			get_page(page);
-			pages[i] = page;
-		}
-
-		if (vmas)
-			vmas[i] = vma;
-
-		vaddr += PAGE_SIZE;
-		--remainder;
-		++i;
-	}
-
-	*length = remainder;
-	*position = vaddr;
-
-	return i;
-}
-
 struct page *follow_huge_addr(struct mm_struct *mm,
 			      unsigned long address, int write)
 {
@@ -195,84 +112,3 @@ struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
 {
 	return NULL;
 }
-
-void unmap_hugepage_range(struct vm_area_struct *vma,
-			  unsigned long start, unsigned long end)
-{
-	struct mm_struct *mm = vma->vm_mm;
-	unsigned long address;
-	pte_t *pte;
-	struct page *page;
-	int i;
-
-	BUG_ON(start & (HPAGE_SIZE - 1));
-	BUG_ON(end & (HPAGE_SIZE - 1));
-
-	for (address = start; address < end; address += HPAGE_SIZE) {
-		pte = huge_pte_offset(mm, address);
-		BUG_ON(!pte);
-		if (pte_none(*pte))
-			continue;
-		page = pte_page(*pte);
-		put_page(page);
-		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			pte_clear(mm, address+(i*PAGE_SIZE), pte);
-			pte++;
-		}
-	}
-	add_mm_counter(mm, rss, -((end - start) >> PAGE_SHIFT));
-	flush_tlb_range(vma, start, end);
-}
-
-int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
-{
-	struct mm_struct *mm = current->mm;
-	unsigned long addr;
-	int ret = 0;
-
-	BUG_ON(vma->vm_start & ~HPAGE_MASK);
-	BUG_ON(vma->vm_end & ~HPAGE_MASK);
-
-	spin_lock(&mm->page_table_lock);
-	for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
-		unsigned long idx;
-		pte_t *pte = huge_pte_alloc(mm, addr);
-		struct page *page;
-
-		if (!pte) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		if (!pte_none(*pte))
-			continue;
-
-		idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
-			+ (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
-		page = find_get_page(mapping, idx);
-		if (!page) {
-			/* charge the fs quota first */
-			if (hugetlb_get_quota(mapping)) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			page = alloc_huge_page();
-			if (!page) {
-				hugetlb_put_quota(mapping);
-				ret = -ENOMEM;
-				goto out;
-			}
-			ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
-			if (! ret) {
-				unlock_page(page);
-			} else {
-				hugetlb_put_quota(mapping);
-				free_huge_page(page);
-				goto out;
-			}
-		}
-		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
-	}
-out:
-	spin_unlock(&mm->page_table_lock);
-	return ret;
-}
diff --git a/arch/sh64/mm/ioremap.c b/arch/sh64/mm/ioremap.c
index f4003da556b..fb1866fa2c9 100644
--- a/arch/sh64/mm/ioremap.c
+++ b/arch/sh64/mm/ioremap.c
@@ -79,7 +79,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 		BUG();
 
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -101,7 +101,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pmd_t *pmd = pmd_alloc(&init_mm, dir, address);
 		error = -ENOMEM;
@@ -115,7 +114,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return 0;
 }
diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c
index 20ccb957fb7..9604893ffdb 100644
--- a/arch/sparc/mm/generic.c
+++ b/arch/sparc/mm/generic.c
@@ -73,14 +73,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	int space = GET_IOSPACE(pfn);
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 
+	/* See comment in mm/memory.c remap_pfn_range */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+
 	prot = __pgprot(pg_iobits);
 	offset -= from;
 	dir = pgd_offset(mm, from);
 	flush_cache_range(vma, beg, end);
 
-	spin_lock(&mm->page_table_lock);
 	while (from < end) {
-		pmd_t *pmd = pmd_alloc(current->mm, dir, from);
+		pmd_t *pmd = pmd_alloc(mm, dir, from);
 		error = -ENOMEM;
 		if (!pmd)
 			break;
@@ -90,7 +92,6 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	}
-	spin_unlock(&mm->page_table_lock);
 
 	flush_tlb_range(vma, beg, end);
 	return error;
diff --git a/arch/sparc64/kernel/binfmt_aout32.c b/arch/sparc64/kernel/binfmt_aout32.c
index b2854ef221d..edf52d06b28 100644
--- a/arch/sparc64/kernel/binfmt_aout32.c
+++ b/arch/sparc64/kernel/binfmt_aout32.c
@@ -241,7 +241,6 @@ static int load_aout32_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->brk = ex.a_bss +
 		(current->mm->start_brk = N_BSSADDR(ex));
 
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
index c954d91f01d..112c316e7cd 100644
--- a/arch/sparc64/mm/generic.c
+++ b/arch/sparc64/mm/generic.c
@@ -127,14 +127,16 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 	int space = GET_IOSPACE(pfn);
 	unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;
 
+	/* See comment in mm/memory.c remap_pfn_range */
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+
 	prot = __pgprot(pg_iobits);
 	offset -= from;
 	dir = pgd_offset(mm, from);
 	flush_cache_range(vma, beg, end);
 
-	spin_lock(&mm->page_table_lock);
 	while (from < end) {
-		pud_t *pud = pud_alloc(current->mm, dir, from);
+		pud_t *pud = pud_alloc(mm, dir, from);
 		error = -ENOMEM;
 		if (!pud)
 			break;
@@ -144,8 +146,7 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	}
-	flush_tlb_range(vma, beg, end);
-	spin_unlock(&mm->page_table_lock);
 
+	flush_tlb_range(vma, beg, end);
 	return error;
 }
diff --git a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
index 90ca99d0b89..8b104be4662 100644
--- a/arch/sparc64/mm/tlb.c
+++ b/arch/sparc64/mm/tlb.c
@@ -18,8 +18,7 @@
 
 /* Heavily inspired by the ppc64 code.  */
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) =
-	{ NULL, 0, 0, 0, 0, 0, { 0 }, { NULL }, };
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers) = { 0, };
 
 void flush_tlb_pending(void)
 {
@@ -72,7 +71,7 @@ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t
 
 no_cache_flush:
 
-	if (mp->tlb_frozen)
+	if (mp->fullmm)
 		return;
 
 	nr = mp->tlb_nr;
@@ -97,7 +96,7 @@ void flush_tlb_pgtables(struct mm_struct *mm, unsigned long start, unsigned long
 	unsigned long nr = mp->tlb_nr;
 	long s = start, e = end, vpte_base;
 
-	if (mp->tlb_frozen)
+	if (mp->fullmm)
 		return;
 
 	/* If start is greater than end, that is a real problem.  */
diff --git a/arch/um/include/tlb.h b/arch/um/include/tlb.h
index 45d7da6c3b2..8efc1e0f1b8 100644
--- a/arch/um/include/tlb.h
+++ b/arch/um/include/tlb.h
@@ -34,7 +34,6 @@ struct host_vm_op {
 	} u;
 };
 
-extern void mprotect_kernel_vm(int w);
 extern void force_flush_all(void);
 extern void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
                              unsigned long end_addr, int force,
diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
index 0d73ceeece7..34b54a3e213 100644
--- a/arch/um/kernel/process_kern.c
+++ b/arch/um/kernel/process_kern.c
@@ -222,6 +222,7 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
+	pte_t ptent;
 
 	if(task->mm == NULL) 
 		return(ERR_PTR(-EINVAL));
@@ -238,12 +239,13 @@ void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
 		return(ERR_PTR(-EINVAL));
 
 	pte = pte_offset_kernel(pmd, addr);
-	if(!pte_present(*pte)) 
+	ptent = *pte;
+	if(!pte_present(ptent))
 		return(ERR_PTR(-EINVAL));
 
 	if(pte_out != NULL)
-		*pte_out = *pte;
-	return((void *) (pte_val(*pte) & PAGE_MASK) + (addr & ~PAGE_MASK));
+		*pte_out = ptent;
+	return((void *) (pte_val(ptent) & PAGE_MASK) + (addr & ~PAGE_MASK));
 }
 
 char *current_cmd(void)
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 240143b616a..9e5e39cea82 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -28,7 +28,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	pmd_t *pmd;
 	pte_t *pte;
 
-	spin_lock(&mm->page_table_lock);
 	pgd = pgd_offset(mm, proc);
 	pud = pud_alloc(mm, pgd, proc);
 	if (!pud)
@@ -63,7 +62,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
 	*pte = mk_pte(virt_to_page(kernel), __pgprot(_PAGE_PRESENT));
 	*pte = pte_mkexec(*pte);
 	*pte = pte_wrprotect(*pte);
-	spin_unlock(&mm->page_table_lock);
 	return(0);
 
  out_pmd:
@@ -71,7 +69,6 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
  out_pte:
 	pmd_free(pmd);
  out:
-	spin_unlock(&mm->page_table_lock);
 	return(-ENOMEM);
 }
 
@@ -147,6 +144,7 @@ void destroy_context_skas(struct mm_struct *mm)
 
 	if(!proc_mm || !ptrace_faultinfo){
 		free_page(mmu->id.stack);
+		pte_lock_deinit(virt_to_page(mmu->last_page_table));
 		pte_free_kernel((pte_t *) mmu->last_page_table);
                 dec_page_state(nr_page_table_pages);
 #ifdef CONFIG_3_LEVEL_PGTABLES
diff --git a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c
index f1d85dbb45b..ae6217c8613 100644
--- a/arch/um/kernel/tt/tlb.c
+++ b/arch/um/kernel/tt/tlb.c
@@ -74,42 +74,6 @@ void flush_tlb_kernel_range_tt(unsigned long start, unsigned long end)
                 atomic_inc(&vmchange_seq);
 }
 
-static void protect_vm_page(unsigned long addr, int w, int must_succeed)
-{
-	int err;
-
-	err = protect_memory(addr, PAGE_SIZE, 1, w, 1, must_succeed);
-	if(err == 0) return;
-	else if((err == -EFAULT) || (err == -ENOMEM)){
-		flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
-		protect_vm_page(addr, w, 1);
-	}
-	else panic("protect_vm_page : protect failed, errno = %d\n", err);
-}
-
-void mprotect_kernel_vm(int w)
-{
-	struct mm_struct *mm;
-	pgd_t *pgd;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-	unsigned long addr;
-	
-	mm = &init_mm;
-	for(addr = start_vm; addr < end_vm;){
-		pgd = pgd_offset(mm, addr);
-		pud = pud_offset(pgd, addr);
-		pmd = pmd_offset(pud, addr);
-		if(pmd_present(*pmd)){
-			pte = pte_offset_kernel(pmd, addr);
-			if(pte_present(*pte)) protect_vm_page(addr, w, 0);
-			addr += PAGE_SIZE;
-		}
-		else addr += PMD_SIZE;
-	}
-}
-
 void flush_tlb_kernel_vm_tt(void)
 {
         flush_tlb_kernel_range(start_vm, end_vm);
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 3e6780fa018..93c60f4aa47 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -314,7 +314,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	current->mm->free_area_cache = TASK_UNMAPPED_BASE;
 	current->mm->cached_hole_size = 0;
 
-	set_mm_counter(current->mm, rss, 0);
 	current->mm->mmap = NULL;
 	compute_creds(bprm);
  	current->flags &= ~PF_FORKNOEXEC;
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index 6972df480d2..ecf7acb5db9 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -60,7 +60,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
 	if (address >= end)
 		BUG();
 	do {
-		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		pte_t * pte = pte_alloc_kernel(pmd, address);
 		if (!pte)
 			return -ENOMEM;
 		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
@@ -105,7 +105,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 	flush_cache_all();
 	if (address >= end)
 		BUG();
-	spin_lock(&init_mm.page_table_lock);
 	do {
 		pud_t *pud;
 		pud = pud_alloc(&init_mm, pgd, address);
@@ -119,7 +118,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		pgd++;
 	} while (address && (address < end));
-	spin_unlock(&init_mm.page_table_lock);
 	flush_tlb_all();
 	return error;
 }