aboutsummaryrefslogtreecommitdiff
path: root/include/asm-powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'include/asm-powerpc')
-rw-r--r--include/asm-powerpc/dma-mapping.h285
-rw-r--r--include/asm-powerpc/io.h462
-rw-r--r--include/asm-powerpc/mmu.h399
-rw-r--r--include/asm-powerpc/mmu_context.h89
-rw-r--r--include/asm-powerpc/mmzone.h50
-rw-r--r--include/asm-powerpc/pci-bridge.h153
-rw-r--r--include/asm-powerpc/pci.h247
-rw-r--r--include/asm-powerpc/pgalloc.h156
-rw-r--r--include/asm-powerpc/pgtable-4k.h91
-rw-r--r--include/asm-powerpc/pgtable-64k.h90
-rw-r--r--include/asm-powerpc/pgtable.h524
-rw-r--r--include/asm-powerpc/ppc-pci.h2
-rw-r--r--include/asm-powerpc/spinlock.h269
13 files changed, 2815 insertions, 2 deletions
diff --git a/include/asm-powerpc/dma-mapping.h b/include/asm-powerpc/dma-mapping.h
new file mode 100644
index 00000000000..59a80163f75
--- /dev/null
+++ b/include/asm-powerpc/dma-mapping.h
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2004 IBM
+ *
+ * Implements the generic device dma API for powerpc.
+ * the pci and vio busses
+ */
+#ifndef _ASM_DMA_MAPPING_H
+#define _ASM_DMA_MAPPING_H
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/cache.h>
+/* need struct page definitions */
+#include <linux/mm.h>
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+#include <asm/bug.h>
+
+#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+/*
+ * DMA-consistent mapping functions for PowerPCs that don't support
+ * cache snooping. These allocate/free a region of uncached mapped
+ * memory space for use with DMA devices. Alternatively, you could
+ * allocate the space "normally" and use the cache management functions
+ * to ensure it is consistent.
+ */
+extern void *__dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp);
+extern void __dma_free_coherent(size_t size, void *vaddr);
+extern void __dma_sync(void *vaddr, size_t size, int direction);
+extern void __dma_sync_page(struct page *page, unsigned long offset,
+ size_t size, int direction);
+
+#else /* ! CONFIG_NOT_COHERENT_CACHE */
+/*
+ * Cache coherent cores.
+ */
+
+#define __dma_alloc_coherent(gfp, size, handle) NULL
+#define __dma_free_coherent(size, addr) do { } while (0)
+#define __dma_sync(addr, size, rw) do { } while (0)
+#define __dma_sync_page(pg, off, sz, rw) do { } while (0)
+
+#endif /* ! CONFIG_NOT_COHERENT_CACHE */
+
+#ifdef CONFIG_PPC64
+
+extern int dma_supported(struct device *dev, u64 mask);
+extern int dma_set_mask(struct device *dev, u64 dma_mask);
+extern void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag);
+extern void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle);
+extern dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
+ size_t size, enum dma_data_direction direction);
+extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction);
+extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction);
+extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
+ size_t size, enum dma_data_direction direction);
+extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction);
+extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nhwentries, enum dma_data_direction direction);
+
+#else /* CONFIG_PPC64 */
+
+#define dma_supported(dev, mask) (1)
+
+static inline int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (!dev->dma_mask || !dma_supported(dev, mask))
+ return -EIO;
+
+ *dev->dma_mask = dma_mask;
+
+ return 0;
+}
+
+static inline void *dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t * dma_handle,
+ gfp_t gfp)
+{
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ return __dma_alloc_coherent(size, dma_handle, gfp);
+#else
+ void *ret;
+ /* ignore region specifiers */
+ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM);
+
+ if (dev == NULL || dev->coherent_dma_mask < 0xffffffff)
+ gfp |= GFP_DMA;
+
+ ret = (void *)__get_free_pages(gfp, get_order(size));
+
+ if (ret != NULL) {
+ memset(ret, 0, size);
+ *dma_handle = virt_to_bus(ret);
+ }
+
+ return ret;
+#endif
+}
+
+static inline void
+dma_free_coherent(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle)
+{
+#ifdef CONFIG_NOT_COHERENT_CACHE
+ __dma_free_coherent(size, vaddr);
+#else
+ free_pages((unsigned long)vaddr, get_order(size));
+#endif
+}
+
+static inline dma_addr_t
+dma_map_single(struct device *dev, void *ptr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+
+ __dma_sync(ptr, size, direction);
+
+ return virt_to_bus(ptr);
+}
+
+/* We do nothing. */
+#define dma_unmap_single(dev, addr, size, dir) do { } while (0)
+
+static inline dma_addr_t
+dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+
+ __dma_sync_page(page, offset, size, direction);
+
+ return page_to_bus(page) + offset;
+}
+
+/* We do nothing. */
+#define dma_unmap_page(dev, handle, size, dir) do { } while (0)
+
+static inline int
+dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i;
+
+ BUG_ON(direction == DMA_NONE);
+
+ for (i = 0; i < nents; i++, sg++) {
+ BUG_ON(!sg->page);
+ __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+ sg->dma_address = page_to_bus(sg->page) + sg->offset;
+ }
+
+ return nents;
+}
+
+/* We don't do anything here. */
+#define dma_unmap_sg(dev, sg, nents, dir) do { } while (0)
+
+#endif /* CONFIG_PPC64 */
+
+static inline void dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ __dma_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+static inline void dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ __dma_sync(bus_to_virt(dma_handle), size, direction);
+}
+
+static inline void dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i;
+
+ BUG_ON(direction == DMA_NONE);
+
+ for (i = 0; i < nents; i++, sg++)
+ __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+}
+
+static inline void dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg, int nents,
+ enum dma_data_direction direction)
+{
+ int i;
+
+ BUG_ON(direction == DMA_NONE);
+
+ for (i = 0; i < nents; i++, sg++)
+ __dma_sync_page(sg->page, sg->offset, sg->length, direction);
+}
+
+static inline int dma_mapping_error(dma_addr_t dma_addr)
+{
+#ifdef CONFIG_PPC64
+ return (dma_addr == DMA_ERROR_CODE);
+#else
+ return 0;
+#endif
+}
+
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
+#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define dma_is_consistent(d) (0)
+#else
+#define dma_is_consistent(d) (1)
+#endif
+
+static inline int dma_get_cache_alignment(void)
+{
+#ifdef CONFIG_PPC64
+ /* no easy way to get cache size on all processors, so return
+ * the maximum possible, to be safe */
+ return (1 << L1_CACHE_SHIFT_MAX);
+#else
+ /*
+ * Each processor family will define its own L1_CACHE_SHIFT,
+ * L1_CACHE_BYTES wraps to this, so this is always safe.
+ */
+ return L1_CACHE_BYTES;
+#endif
+}
+
+static inline void dma_sync_single_range_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ /* just sync everything for now */
+ dma_sync_single_for_cpu(dev, dma_handle, offset + size, direction);
+}
+
+static inline void dma_sync_single_range_for_device(struct device *dev,
+ dma_addr_t dma_handle, unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+{
+ /* just sync everything for now */
+ dma_sync_single_for_device(dev, dma_handle, offset + size, direction);
+}
+
+static inline void dma_cache_sync(void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+ BUG_ON(direction == DMA_NONE);
+ __dma_sync(vaddr, size, (int)direction);
+}
+
+/*
+ * DMA operations are abstracted for G5 vs. i/pSeries, PCI vs. VIO
+ */
+struct dma_mapping_ops {
+ void * (*alloc_coherent)(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag);
+ void (*free_coherent)(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_handle);
+ dma_addr_t (*map_single)(struct device *dev, void *ptr,
+ size_t size, enum dma_data_direction direction);
+ void (*unmap_single)(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction direction);
+ int (*map_sg)(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction);
+ void (*unmap_sg)(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction);
+ int (*dma_supported)(struct device *dev, u64 mask);
+ int (*dac_dma_supported)(struct device *dev, u64 mask);
+};
+
+#endif /* _ASM_DMA_MAPPING_H */
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
new file mode 100644
index 00000000000..48938d84d05
--- /dev/null
+++ b/include/asm-powerpc/io.h
@@ -0,0 +1,462 @@
+#ifndef _ASM_POWERPC_IO_H
+#define _ASM_POWERPC_IO_H
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef CONFIG_PPC64
+#include <asm-ppc/io.h>
+#else
+
+#include <linux/compiler.h>
+#include <asm/page.h>
+#include <asm/byteorder.h>
+#ifdef CONFIG_PPC_ISERIES
+#include <asm/iseries/iseries_io.h>
+#endif
+#include <asm/synch.h>
+#include <asm/delay.h>
+
+#include <asm-generic/iomap.h>
+
+#define __ide_mm_insw(p, a, c) _insw_ns((volatile u16 __iomem *)(p), (a), (c))
+#define __ide_mm_insl(p, a, c) _insl_ns((volatile u32 __iomem *)(p), (a), (c))
+#define __ide_mm_outsw(p, a, c) _outsw_ns((volatile u16 __iomem *)(p), (a), (c))
+#define __ide_mm_outsl(p, a, c) _outsl_ns((volatile u32 __iomem *)(p), (a), (c))
+
+
+#define SIO_CONFIG_RA 0x398
+#define SIO_CONFIG_RD 0x399
+
+#define SLOW_DOWN_IO
+
+extern unsigned long isa_io_base;
+extern unsigned long pci_io_base;
+extern unsigned long io_page_mask;
+
+#define MAX_ISA_PORT 0x10000
+
+#define _IO_IS_VALID(port) ((port) >= MAX_ISA_PORT || (1 << (port>>PAGE_SHIFT)) \
+ & io_page_mask)
+
+#ifdef CONFIG_PPC_ISERIES
+/* __raw_* accessors aren't supported on iSeries */
+#define __raw_readb(addr) { BUG(); 0; }
+#define __raw_readw(addr) { BUG(); 0; }
+#define __raw_readl(addr) { BUG(); 0; }
+#define __raw_readq(addr) { BUG(); 0; }
+#define __raw_writeb(v, addr) { BUG(); 0; }
+#define __raw_writew(v, addr) { BUG(); 0; }
+#define __raw_writel(v, addr) { BUG(); 0; }
+#define __raw_writeq(v, addr) { BUG(); 0; }
+#define readb(addr) iSeries_Read_Byte(addr)
+#define readw(addr) iSeries_Read_Word(addr)
+#define readl(addr) iSeries_Read_Long(addr)
+#define writeb(data, addr) iSeries_Write_Byte((data),(addr))
+#define writew(data, addr) iSeries_Write_Word((data),(addr))
+#define writel(data, addr) iSeries_Write_Long((data),(addr))
+#define memset_io(a,b,c) iSeries_memset_io((a),(b),(c))
+#define memcpy_fromio(a,b,c) iSeries_memcpy_fromio((a), (b), (c))
+#define memcpy_toio(a,b,c) iSeries_memcpy_toio((a), (b), (c))
+
+#define inb(addr) readb(((void __iomem *)(long)(addr)))
+#define inw(addr) readw(((void __iomem *)(long)(addr)))
+#define inl(addr) readl(((void __iomem *)(long)(addr)))
+#define outb(data,addr) writeb(data,((void __iomem *)(long)(addr)))
+#define outw(data,addr) writew(data,((void __iomem *)(long)(addr)))
+#define outl(data,addr) writel(data,((void __iomem *)(long)(addr)))
+/*
+ * The *_ns versions below don't do byte-swapping.
+ * Neither do the standard versions now, these are just here
+ * for older code.
+ */
+#define insw_ns(port, buf, ns) _insw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
+#define insl_ns(port, buf, nl) _insl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
+#else
+
+static inline unsigned char __raw_readb(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned char __force *)addr;
+}
+static inline unsigned short __raw_readw(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned short __force *)addr;
+}
+static inline unsigned int __raw_readl(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned int __force *)addr;
+}
+static inline unsigned long __raw_readq(const volatile void __iomem *addr)
+{
+ return *(volatile unsigned long __force *)addr;
+}
+static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr)
+{
+ *(volatile unsigned char __force *)addr = v;
+}
+static inline void __raw_writew(unsigned short v, volatile void __iomem *addr)
+{
+ *(volatile unsigned short __force *)addr = v;
+}
+static inline void __raw_writel(unsigned int v, volatile void __iomem *addr)
+{
+ *(volatile unsigned int __force *)addr = v;
+}
+static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
+{
+ *(volatile unsigned long __force *)addr = v;
+}
+#define readb(addr) eeh_readb(addr)
+#define readw(addr) eeh_readw(addr)
+#define readl(addr) eeh_readl(addr)
+#define readq(addr) eeh_readq(addr)
+#define writeb(data, addr) eeh_writeb((data), (addr))
+#define writew(data, addr) eeh_writew((data), (addr))
+#define writel(data, addr) eeh_writel((data), (addr))
+#define writeq(data, addr) eeh_writeq((data), (addr))
+#define memset_io(a,b,c) eeh_memset_io((a),(b),(c))
+#define memcpy_fromio(a,b,c) eeh_memcpy_fromio((a),(b),(c))
+#define memcpy_toio(a,b,c) eeh_memcpy_toio((a),(b),(c))
+#define inb(port) eeh_inb((unsigned long)port)
+#define outb(val, port) eeh_outb(val, (unsigned long)port)
+#define inw(port) eeh_inw((unsigned long)port)
+#define outw(val, port) eeh_outw(val, (unsigned long)port)
+#define inl(port) eeh_inl((unsigned long)port)
+#define outl(val, port) eeh_outl(val, (unsigned long)port)
+
+/*
+ * The insw/outsw/insl/outsl macros don't do byte-swapping.
+ * They are only used in practice for transferring buffers which
+ * are arrays of bytes, and byte-swapping is not appropriate in
+ * that case. - paulus */
+#define insb(port, buf, ns) eeh_insb((port), (buf), (ns))
+#define insw(port, buf, ns) eeh_insw_ns((port), (buf), (ns))
+#define insl(port, buf, nl) eeh_insl_ns((port), (buf), (nl))
+#define insw_ns(port, buf, ns) eeh_insw_ns((port), (buf), (ns))
+#define insl_ns(port, buf, nl) eeh_insl_ns((port), (buf), (nl))
+
+#define outsb(port, buf, ns) _outsb((u8 __iomem *)((port)+pci_io_base), (buf), (ns))
+#define outsw(port, buf, ns) _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
+#define outsl(port, buf, nl) _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
+
+#endif
+
+#define readb_relaxed(addr) readb(addr)
+#define readw_relaxed(addr) readw(addr)
+#define readl_relaxed(addr) readl(addr)
+#define readq_relaxed(addr) readq(addr)
+
+extern void _insb(volatile u8 __iomem *port, void *buf, int ns);
+extern void _outsb(volatile u8 __iomem *port, const void *buf, int ns);
+extern void _insw(volatile u16 __iomem *port, void *buf, int ns);
+extern void _outsw(volatile u16 __iomem *port, const void *buf, int ns);
+extern void _insl(volatile u32 __iomem *port, void *buf, int nl);
+extern void _outsl(volatile u32 __iomem *port, const void *buf, int nl);
+extern void _insw_ns(volatile u16 __iomem *port, void *buf, int ns);
+extern void _outsw_ns(volatile u16 __iomem *port, const void *buf, int ns);
+extern void _insl_ns(volatile u32 __iomem *port, void *buf, int nl);
+extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, int nl);
+
+#define mmiowb()
+
+/*
+ * output pause versions need a delay at least for the
+ * w83c105 ide controller in a p610.
+ */
+#define inb_p(port) inb(port)
+#define outb_p(val, port) (udelay(1), outb((val), (port)))
+#define inw_p(port) inw(port)
+#define outw_p(val, port) (udelay(1), outw((val), (port)))
+#define inl_p(port) inl(port)
+#define outl_p(val, port) (udelay(1), outl((val), (port)))
+
+/*
+ * The *_ns versions below don't do byte-swapping.
+ * Neither do the standard versions now, these are just here
+ * for older code.
+ */
+#define outsw_ns(port, buf, ns) _outsw_ns((u16 __iomem *)((port)+pci_io_base), (buf), (ns))
+#define outsl_ns(port, buf, nl) _outsl_ns((u32 __iomem *)((port)+pci_io_base), (buf), (nl))
+
+
+#define IO_SPACE_LIMIT ~(0UL)
+
+
+#ifdef __KERNEL__
+extern int __ioremap_explicit(unsigned long p_addr, unsigned long v_addr,
+ unsigned long size, unsigned long flags);
+extern void __iomem *__ioremap(unsigned long address, unsigned long size,
+ unsigned long flags);
+
+/**
+ * ioremap - map bus memory into CPU space
+ * @address: bus address of the memory
+ * @size: size of the resource to map
+ *
+ * ioremap performs a platform specific sequence of operations to
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
+ * writew/writel functions and the other mmio helpers. The returned
+ * address is not guaranteed to be usable directly as a virtual
+ * address.
+ */
+extern void __iomem *ioremap(unsigned long address, unsigned long size);
+
+#define ioremap_nocache(addr, size) ioremap((addr), (size))
+extern int iounmap_explicit(volatile void __iomem *addr, unsigned long size);
+extern void iounmap(volatile void __iomem *addr);
+extern void __iomem * reserve_phb_iospace(unsigned long size);
+
+/**
+ * virt_to_phys - map virtual addresses to physical
+ * @address: address to remap
+ *
+ * The returned physical address is the physical (CPU) mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses directly mapped or allocated via kmalloc.
+ *
+ * This function does not give bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+static inline unsigned long virt_to_phys(volatile void * address)
+{
+ return __pa((unsigned long)address);
+}
+
+/**
+ * phys_to_virt - map physical address to virtual
+ * @address: address to remap
+ *
+ * The returned virtual address is a current CPU mapping for
+ * the memory address given. It is only valid to use this function on
+ * addresses that have a kernel mapping
+ *
+ * This function does not handle bus mappings for DMA transfers. In
+ * almost all conceivable cases a device driver should not be using
+ * this function
+ */
+static inline void * phys_to_virt(unsigned long address)
+{
+ return (void *)__va(address);
+}
+
+/*
+ * Change "struct page" to physical address.
+ */
+#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
+
+/* We do NOT want virtual merging, it would put too much pressure on
+ * our iommu allocator. Instead, we want drivers to be smart enough
+ * to coalesce sglists that happen to have been mapped in a contiguous
+ * way by the iommu
+ */
+#define BIO_VMERGE_BOUNDARY 0
+
+#endif /* __KERNEL__ */
+
+static inline void iosync(void)
+{
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
+
+/* Enforce in-order execution of data I/O.
+ * No distinction between read/write on PPC; use eieio for all three.
+ */
+#define iobarrier_rw() eieio()
+#define iobarrier_r() eieio()
+#define iobarrier_w() eieio()
+
+/*
+ * 8, 16 and 32 bit, big and little endian I/O operations, with barrier.
+ * These routines do not perform EEH-related I/O address translation,
+ * and should not be used directly by device drivers. Use inb/readb
+ * instead.
+ */
+static inline int in_8(const volatile unsigned char __iomem *addr)
+{
+ int ret;
+
+ __asm__ __volatile__("lbz%U1%X1 %0,%1; twi 0,%0,0; isync"
+ : "=r" (ret) : "m" (*addr));
+ return ret;
+}
+
+static inline void out_8(volatile unsigned char __iomem *addr, int val)
+{
+ __asm__ __volatile__("stb%U0%X0 %1,%0; sync"
+ : "=m" (*addr) : "r" (val));
+}
+
+static inline int in_le16(const volatile unsigned short __iomem *addr)
+{
+ int ret;
+
+ __asm__ __volatile__("lhbrx %0,0,%1; twi 0,%0,0; isync"
+ : "=r" (ret) : "r" (addr), "m" (*addr));
+ return ret;
+}
+
+static inline int in_be16(const volatile unsigned short __iomem *addr)
+{
+ int ret;
+
+ __asm__ __volatile__("lhz%U1%X1 %0,%1; twi 0,%0,0; isync"
+ : "=r" (ret) : "m" (*addr));
+ return ret;
+}
+
+static inline void out_le16(volatile unsigned short __iomem *addr, int val)
+{
+ __asm__ __volatile__("sthbrx %1,0,%2; sync"
+ : "=m" (*addr) : "r" (val), "r" (addr));
+}
+
+static inline void out_be16(volatile unsigned short __iomem *addr, int val)
+{
+ __asm__ __volatile__("sth%U0%X0 %1,%0; sync"
+ : "=m" (*addr) : "r" (val));
+}
+
+static inline unsigned in_le32(const volatile unsigned __iomem *addr)
+{
+ unsigned ret;
+
+ __asm__ __volatile__("lwbrx %0,0,%1; twi 0,%0,0; isync"
+ : "=r" (ret) : "r" (addr), "m" (*addr));
+ return ret;
+}
+
+static inline unsigned in_be32(const volatile unsigned __iomem *addr)
+{
+ unsigned ret;
+
+ __asm__ __volatile__("lwz%U1%X1 %0,%1; twi 0,%0,0; isync"
+ : "=r" (ret) : "m" (*addr));
+ return ret;
+}
+
+static inline void out_le32(volatile unsigned __iomem *addr, int val)
+{
+ __asm__ __volatile__("stwbrx %1,0,%2; sync" : "=m" (*addr)
+ : "r" (val), "r" (addr));
+}
+
+static inline void out_be32(volatile unsigned __iomem *addr, int val)
+{
+ __asm__ __volatile__("stw%U0%X0 %1,%0; sync"
+ : "=m" (*addr) : "r" (val));
+}
+
+static inline unsigned long in_le64(const volatile unsigned long __iomem *addr)
+{
+ unsigned long tmp, ret;
+
+ __asm__ __volatile__(
+ "ld %1,0(%2)\n"
+ "twi 0,%1,0\n"
+ "isync\n"
+ "rldimi %0,%1,5*8,1*8\n"
+ "rldimi %0,%1,3*8,2*8\n"
+ "rldimi %0,%1,1*8,3*8\n"
+ "rldimi %0,%1,7*8,4*8\n"
+ "rldicl %1,%1,32,0\n"
+ "rlwimi %0,%1,8,8,31\n"
+ "rlwimi %0,%1,24,16,23\n"
+ : "=r" (ret) , "=r" (tmp) : "b" (addr) , "m" (*addr));
+ return ret;
+}
+
+static inline unsigned long in_be64(const volatile unsigned long __iomem *addr)
+{
+ unsigned long ret;
+
+ __asm__ __volatile__("ld%U1%X1 %0,%1; twi 0,%0,0; isync"
+ : "=r" (ret) : "m" (*addr));
+ return ret;
+}
+
+static inline void out_le64(volatile unsigned long __iomem *addr, unsigned long val)
+{
+ unsigned long tmp;
+
+ __asm__ __volatile__(
+ "rldimi %0,%1,5*8,1*8\n"
+ "rldimi %0,%1,3*8,2*8\n"
+ "rldimi %0,%1,1*8,3*8\n"
+ "rldimi %0,%1,7*8,4*8\n"
+ "rldicl %1,%1,32,0\n"
+ "rlwimi %0,%1,8,8,31\n"
+ "rlwimi %0,%1,24,16,23\n"
+ "std %0,0(%3)\n"
+ "sync"
+ : "=&r" (tmp) , "=&r" (val) : "1" (val) , "b" (addr) , "m" (*addr));
+}
+
+static inline void out_be64(volatile unsigned long __iomem *addr, unsigned long val)
+{
+ __asm__ __volatile__("std%U0%X0 %1,%0; sync" : "=m" (*addr) : "r" (val));
+}
+
+#ifndef CONFIG_PPC_ISERIES
+#include <asm/eeh.h>
+#endif
+
+#ifdef __KERNEL__
+
+/**
+ * check_signature - find BIOS signatures
+ * @io_addr: mmio address to check
+ * @signature: signature block
+ * @length: length of signature
+ *
+ * Perform a signature comparison with the mmio address io_addr. This
+ * address should have been obtained by ioremap.
+ * Returns 1 on a match.
+ */
+static inline int check_signature(const volatile void __iomem * io_addr,
+ const unsigned char *signature, int length)
+{
+ int retval = 0;
+#ifndef CONFIG_PPC_ISERIES
+ do {
+ if (readb(io_addr) != *signature)
+ goto out;
+ io_addr++;
+ signature++;
+ length--;
+ } while (length);
+ retval = 1;
+out:
+#endif
+ return retval;
+}
+
+/* Nothing to do */
+
+#define dma_cache_inv(_start,_size) do { } while (0)
+#define dma_cache_wback(_start,_size) do { } while (0)
+#define dma_cache_wback_inv(_start,_size) do { } while (0)
+
+/* Check of existence of legacy devices */
+extern int check_legacy_ioport(unsigned long base_port);
+
+
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+#define xlate_dev_mem_ptr(p) __va(p)
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
+#endif /* __KERNEL__ */
+
+#endif /* CONFIG_PPC64 */
+#endif /* _ASM_POWERPC_IO_H */
diff --git a/include/asm-powerpc/mmu.h b/include/asm-powerpc/mmu.h
new file mode 100644
index 00000000000..c1b4bbabbe9
--- /dev/null
+++ b/include/asm-powerpc/mmu.h
@@ -0,0 +1,399 @@
+#ifndef _ASM_POWERPC_MMU_H_
+#define _ASM_POWERPC_MMU_H_
+
+#ifndef CONFIG_PPC64
+#include <asm-ppc/mmu.h>
+#else
+
+/*
+ * PowerPC memory management structures
+ *
+ * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com>
+ * PPC64 rework.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/asm-compat.h>
+#include <asm/page.h>
+
+/*
+ * Segment table
+ */
+
+#define STE_ESID_V 0x80
+#define STE_ESID_KS 0x20
+#define STE_ESID_KP 0x10
+#define STE_ESID_N 0x08
+
+#define STE_VSID_SHIFT 12
+
+/* Location of cpu0's segment table */
+#define STAB0_PAGE 0x6
+#define STAB0_PHYS_ADDR (STAB0_PAGE<<12)
+
+#ifndef __ASSEMBLY__
+extern char initial_stab[];
+#endif /* ! __ASSEMBLY */
+
+/*
+ * SLB
+ */
+
+#define SLB_NUM_BOLTED 3
+#define SLB_CACHE_ENTRIES 8
+
+/* Bits in the SLB ESID word */
+#define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */
+
+/* Bits in the SLB VSID word */
+#define SLB_VSID_SHIFT 12
+#define SLB_VSID_B ASM_CONST(0xc000000000000000)
+#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
+#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
+#define SLB_VSID_KS ASM_CONST(0x0000000000000800)
+#define SLB_VSID_KP ASM_CONST(0x0000000000000400)
+#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
+#define SLB_VSID_L ASM_CONST(0x0000000000000100)
+#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
+#define SLB_VSID_LP ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
+#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
+#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
+#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
+
+#define SLB_VSID_KERNEL (SLB_VSID_KP)
+#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
+
+#define SLBIE_C (0x08000000)
+
+/*
+ * Hash table
+ */
+
+#define HPTES_PER_GROUP 8
+
+#define HPTE_V_AVPN_SHIFT 7
+#define HPTE_V_AVPN ASM_CONST(0xffffffffffffff80)
+#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
+#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & HPTE_V_AVPN))
+#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
+#define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
+#define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
+#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002)
+#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
+
+#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
+#define HPTE_R_TS ASM_CONST(0x4000000000000000)
+#define HPTE_R_RPN_SHIFT 12
+#define HPTE_R_RPN ASM_CONST(0x3ffffffffffff000)
+#define HPTE_R_FLAGS ASM_CONST(0x00000000000003ff)
+#define HPTE_R_PP ASM_CONST(0x0000000000000003)
+#define HPTE_R_N ASM_CONST(0x0000000000000004)
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+/* pp0 will always be 0 for linux */
+#define PP_RWXX 0 /* Supervisor read/write, User none */
+#define PP_RWRX 1 /* Supervisor read/write, User read */
+#define PP_RWRW 2 /* Supervisor read/write, User read/write */
+#define PP_RXRX 3 /* Supervisor read, User read */
+
+#ifndef __ASSEMBLY__
+
+typedef struct {
+ unsigned long v;
+ unsigned long r;
+} hpte_t;
+
+extern hpte_t *htab_address;
+extern unsigned long htab_hash_mask;
+
+/*
+ * Page size definition
+ *
+ * shift : is the "PAGE_SHIFT" value for that page size
+ * sllp : is a bit mask with the value of SLB L || LP to be or'ed
+ * directly to a slbmte "vsid" value
+ * penc : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def
+{
+ unsigned int shift; /* number of bits */
+ unsigned int penc; /* HPTE encoding */
+ unsigned int tlbiel; /* tlbiel supported for that page size */
+ unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
+};
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * The kernel use the constants below to index in the page sizes array.
+ * The use of fixed constants for this purpose is better for performances
+ * of the low level hash refill handlers.
+ *
+ * A non supported page size has a "shift" field set to 0
+ *
+ * Any new page size being implemented can get a new entry in here. Whether
+ * the kernel will use it or not is a different matter though. The actual page
+ * size used by hugetlbfs is not defined here and may be made variable
+ */
+
+#define MMU_PAGE_4K 0 /* 4K */
+#define MMU_PAGE_64K 1 /* 64K */
+#define MMU_PAGE_64K_AP 2 /* 64K Admixed (in a 4K segment) */
+#define MMU_PAGE_1M 3 /* 1M */
+#define MMU_PAGE_16M 4 /* 16M */
+#define MMU_PAGE_16G 5 /* 16G */
+#define MMU_PAGE_COUNT 6
+
+#ifndef __ASSEMBLY__
+
+/*
+ * The current system page sizes
+ */
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+extern int mmu_linear_psize;
+extern int mmu_virtual_psize;
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * The page size index of the huge pages for use by hugetlbfs
+ */
+extern int mmu_huge_psize;
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+/*
+ * This function sets the AVPN and L fields of the HPTE appropriately
+ * for the page size
+ */
+static inline unsigned long hpte_encode_v(unsigned long va, int psize)
+{
+ unsigned long v =
+ v = (va >> 23) & ~(mmu_psize_defs[psize].avpnm);
+ v <<= HPTE_V_AVPN_SHIFT;
+ if (psize != MMU_PAGE_4K)
+ v |= HPTE_V_LARGE;
+ return v;
+}
+
+/*
+ * This function sets the ARPN, and LP fields of the HPTE appropriately
+ * for the page size. We assume the pa is already "clean" that is properly
+ * aligned for the requested page size
+ */
+static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
+{
+ unsigned long r;
+
+ /* A 4K page needs no special encoding */
+ if (psize == MMU_PAGE_4K)
+ return pa & HPTE_R_RPN;
+ else {
+ unsigned int penc = mmu_psize_defs[psize].penc;
+ unsigned int shift = mmu_psize_defs[psize].shift;
+ return (pa & ~((1ul << shift) - 1)) | (penc << 12);
+ }
+ return r;
+}
+
+/*
+ * This hashes a virtual address for a 256Mb segment only for now
+ */
+
+static inline unsigned long hpt_hash(unsigned long va, unsigned int shift)
+{
+ return ((va >> 28) & 0x7fffffffffUL) ^ ((va & 0x0fffffffUL) >> shift);
+}
+
+extern int __hash_page_4K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned int local);
+extern int __hash_page_64K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned int local);
+struct mm_struct;
+extern int hash_huge_page(struct mm_struct *mm, unsigned long access,
+ unsigned long ea, unsigned long vsid, int local);
+
+extern void htab_finish_init(void);
+extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long mode,
+ int psize);
+
+extern void htab_initialize(void);
+extern void htab_initialize_secondary(void);
+extern void hpte_init_native(void);
+extern void hpte_init_lpar(void);
+extern void hpte_init_iSeries(void);
+extern void mm_init_ppc64(void);
+
+extern long pSeries_lpar_hpte_insert(unsigned long hpte_group,
+ unsigned long va, unsigned long prpn,
+ unsigned long rflags,
+ unsigned long vflags, int psize);
+
+extern long native_hpte_insert(unsigned long hpte_group,
+ unsigned long va, unsigned long prpn,
+ unsigned long rflags,
+ unsigned long vflags, int psize);
+
+extern long iSeries_hpte_insert(unsigned long hpte_group,
+ unsigned long va, unsigned long prpn,
+ unsigned long rflags,
+ unsigned long vflags, int psize);
+
+extern void stabs_alloc(void);
+extern void slb_initialize(void);
+extern void stab_initialize(unsigned long stab);
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * VSID allocation
+ *
+ * We first generate a 36-bit "proto-VSID". For kernel addresses this
+ * is equal to the ESID, for user addresses it is:
+ * (context << 15) | (esid & 0x7fff)
+ *
+ * The two forms are distinguishable because the top bit is 0 for user
+ * addresses, whereas the top two bits are 1 for kernel addresses.
+ * Proto-VSIDs with the top two bits equal to 0b10 are reserved for
+ * now.
+ *
+ * The proto-VSIDs are then scrambled into real VSIDs with the
+ * multiplicative hash:
+ *
+ * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
+ * where VSID_MULTIPLIER = 268435399 = 0xFFFFFC7
+ * VSID_MODULUS = 2^36-1 = 0xFFFFFFFFF
+ *
+ * This scramble is only well defined for proto-VSIDs below
+ * 0xFFFFFFFFF, so both proto-VSID and actual VSID 0xFFFFFFFFF are
+ * reserved. VSID_MULTIPLIER is prime, so in particular it is
+ * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
+ * Because the modulus is 2^n-1 we can compute it efficiently without
+ * a divide or extra multiply (see below).
+ *
+ * This scheme has several advantages over older methods:
+ *
+ * - We have VSIDs allocated for every kernel address
+ * (i.e. everything above 0xC000000000000000), except the very top
+ * segment, which simplifies several things.
+ *
+ * - We allow for 15 significant bits of ESID and 20 bits of
+ * context for user addresses. i.e. 8T (43 bits) of address space for
+ * up to 1M contexts (although the page table structure and context
+ * allocation will need changes to take advantage of this).
+ *
+ * - The scramble function gives robust scattering in the hash
+ * table (at least based on some initial results). The previous
+ * method was more susceptible to pathological cases giving excessive
+ * hash collisions.
+ */
+/*
+ * WARNING - If you change these you must make sure the asm
+ * implementations in slb_allocate (slb_low.S), do_stab_bolted
+ * (head.S) and ASM_VSID_SCRAMBLE (below) are changed accordingly.
+ *
+ * You'll also need to change the precomputed VSID values in head.S
+ * which are used by the iSeries firmware.
+ */
+
+#define VSID_MULTIPLIER ASM_CONST(200730139) /* 28-bit prime */
+#define VSID_BITS 36
+#define VSID_MODULUS ((1UL<<VSID_BITS)-1)
+
+#define CONTEXT_BITS 19
+#define USER_ESID_BITS 16
+
+#define USER_VSID_RANGE (1UL << (USER_ESID_BITS + SID_SHIFT))
+
+/*
+ * This macro generates asm code to compute the VSID scramble
+ * function. Used in slb_allocate() and do_stab_bolted. The function
+ * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ * rt = register continaing the proto-VSID and into which the
+ * VSID will be stored
+ * rx = scratch register (clobbered)
+ *
+ * - rt and rx must be different registers
+ * - The answer will end up in the low 36 bits of rt. The higher
+ * bits may contain other garbage, so you may need to mask the
+ * result.
+ */
+#define ASM_VSID_SCRAMBLE(rt, rx) \
+ lis rx,VSID_MULTIPLIER@h; \
+ ori rx,rx,VSID_MULTIPLIER@l; \
+ mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
+ \
+ srdi rx,rt,VSID_BITS; \
+ clrldi rt,rt,(64-VSID_BITS); \
+ add rt,rt,rx; /* add high and low bits */ \
+ /* Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
+ * 2^36-1+2^28-1. That in particular means that if r3 >= \
+ * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
+ * the bit clear, r3 already has the answer we want, if it \
+ * doesn't, the answer is the low 36 bits of r3+1. So in all \
+ * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
+ addi rx,rt,1; \
+ srdi rx,rx,VSID_BITS; /* extract 2^36 bit */ \
+ add rt,rt,rx
+
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long mm_context_id_t;
+
+typedef struct {
+ mm_context_id_t id;
+#ifdef CONFIG_HUGETLB_PAGE
+ u16 low_htlb_areas, high_htlb_areas;
+#endif
+} mm_context_t;
+
+
+static inline unsigned long vsid_scramble(unsigned long protovsid)
+{
+#if 0
+ /* The code below is equivalent to this function for arguments
+ * < 2^VSID_BITS, which is all this should ever be called
+ * with. However gcc is not clever enough to compute the
+ * modulus (2^n-1) without a second multiply. */
+ return ((protovsid * VSID_MULTIPLIER) % VSID_MODULUS);
+#else /* 1 */
+ unsigned long x;
+
+ x = protovsid * VSID_MULTIPLIER;
+ x = (x >> VSID_BITS) + (x & VSID_MODULUS);
+ return (x + ((x+1) >> VSID_BITS)) & VSID_MODULUS;
+#endif /* 1 */
+}
+
+/* This is only valid for addresses >= KERNELBASE */
+static inline unsigned long get_kernel_vsid(unsigned long ea)
+{
+ return vsid_scramble(ea >> SID_SHIFT);
+}
+
+/* This is only valid for user addresses (which are below 2^41) */
+static inline unsigned long get_vsid(unsigned long context, unsigned long ea)
+{
+ return vsid_scramble((context << USER_ESID_BITS)
+ | (ea >> SID_SHIFT));
+}
+
+#define VSID_SCRAMBLE(pvsid) (((pvsid) * VSID_MULTIPLIER) % VSID_MODULUS)
+#define KERNEL_VSID(ea) VSID_SCRAMBLE(GET_ESID(ea))
+
+#endif /* __ASSEMBLY */
+
+#endif /* CONFIG_PPC64 */
+#endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/include/asm-powerpc/mmu_context.h b/include/asm-powerpc/mmu_context.h
new file mode 100644
index 00000000000..ea6798c7d5f
--- /dev/null
+++ b/include/asm-powerpc/mmu_context.h
@@ -0,0 +1,89 @@
+#ifndef __ASM_POWERPC_MMU_CONTEXT_H
+#define __ASM_POWERPC_MMU_CONTEXT_H
+
+#ifndef CONFIG_PPC64
+#include <asm-ppc/mmu_context.h>
+#else
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/mmu.h>
+#include <asm/cputable.h>
+
+/*
+ * Copyright (C) 2001 PPC 64 Team, IBM Corp
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Getting into a kernel thread, there is no valid user segment, mark
+ * paca->pgdir NULL so that SLB miss on user addresses will fault
+ */
+static inline void enter_lazy_tlb(struct mm_struct *mm,
+ struct task_struct *tsk)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ get_paca()->pgdir = NULL;
+#endif /* CONFIG_PPC_64K_PAGES */
+}
+
+#define NO_CONTEXT 0
+#define MAX_CONTEXT (0x100000-1)
+
+extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+extern void destroy_context(struct mm_struct *mm);
+
+extern void switch_stab(struct task_struct *tsk, struct mm_struct *mm);
+extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
+
+/*
+ * switch_mm is the entry point called from the architecture independent
+ * code in kernel/sched.c
+ */
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (!cpu_isset(smp_processor_id(), next->cpu_vm_mask))
+ cpu_set(smp_processor_id(), next->cpu_vm_mask);
+
+ /* No need to flush userspace segments if the mm doesnt change */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (prev == next && get_paca()->pgdir == next->pgd)
+ return;
+#else
+ if (prev == next)
+ return;
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile ("dssall");
+#endif /* CONFIG_ALTIVEC */
+
+ if (cpu_has_feature(CPU_FTR_SLB))
+ switch_slb(tsk, next);
+ else
+ switch_stab(tsk, next);
+}
+
+#define deactivate_mm(tsk,mm) do { } while (0)
+
+/*
+ * After we have set current->mm to a new value, this activates
+ * the context for the new mm so we see the new mappings.
+ */
+static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ switch_mm(prev, next, current);
+ local_irq_restore(flags);
+}
+
+#endif /* CONFIG_PPC64 */
+#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/include/asm-powerpc/mmzone.h b/include/asm-powerpc/mmzone.h
new file mode 100644
index 00000000000..54958d6cae0
--- /dev/null
+++ b/include/asm-powerpc/mmzone.h
@@ -0,0 +1,50 @@
+/*
+ * Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99
+ *
+ * PowerPC64 port:
+ * Copyright (C) 2002 Anton Blanchard, IBM Corp.
+ */
+#ifndef _ASM_MMZONE_H_
+#define _ASM_MMZONE_H_
+
+#include <linux/config.h>
+
+/*
+ * generic non-linear memory support:
+ *
+ * 1) we will not split memory into more chunks than will fit into the
+ * flags field of the struct page
+ */
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+
+extern struct pglist_data *node_data[];
+/*
+ * Return a pointer to the node data for node n.
+ */
+#define NODE_DATA(nid) (node_data[nid])
+
+/*
+ * Following are specific to this numa platform.
+ */
+
+extern int numa_cpu_lookup_table[];
+extern cpumask_t numa_cpumask_lookup_table[];
+#ifdef CONFIG_MEMORY_HOTPLUG
+extern unsigned long max_pfn;
+#endif
+
+/*
+ * Following are macros that each numa implmentation must define.
+ */
+
+#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
+#define node_end_pfn(nid) (NODE_DATA(nid)->node_end_pfn)
+
+#endif /* CONFIG_NEED_MULTIPLE_NODES */
+
+#ifdef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+extern int __init early_pfn_to_nid(unsigned long pfn);
+#endif
+
+#endif /* _ASM_MMZONE_H_ */
diff --git a/include/asm-powerpc/pci-bridge.h b/include/asm-powerpc/pci-bridge.h
new file mode 100644
index 00000000000..223ec7bd81d
--- /dev/null
+++ b/include/asm-powerpc/pci-bridge.h
@@ -0,0 +1,153 @@
+#ifndef _ASM_POWERPC_PCI_BRIDGE_H
+#define _ASM_POWERPC_PCI_BRIDGE_H
+
+#ifndef CONFIG_PPC64
+#include <asm-ppc/pci-bridge.h>
+#else
+
+#include <linux/pci.h>
+#include <linux/list.h>
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Structure of a PCI controller (host bridge)
+ */
+struct pci_controller {
+ struct pci_bus *bus;
+ char is_dynamic;
+ void *arch_data;
+ struct list_head list_node;
+
+ int first_busno;
+ int last_busno;
+
+ void __iomem *io_base_virt;
+ unsigned long io_base_phys;
+
+ /* Some machines have a non 1:1 mapping of
+ * the PCI memory space in the CPU bus space
+ */
+ unsigned long pci_mem_offset;
+ unsigned long pci_io_size;
+
+ struct pci_ops *ops;
+ volatile unsigned int __iomem *cfg_addr;
+ volatile void __iomem *cfg_data;
+
+ /* Currently, we limit ourselves to 1 IO range and 3 mem
+ * ranges since the common pci_bus structure can't handle more
+ */
+ struct resource io_resource;
+ struct resource mem_resources[3];
+ int global_number;
+ int local_number;
+ unsigned long buid;
+ unsigned long dma_window_base_cur;
+ unsigned long dma_window_size;
+};
+
+/*
+ * PCI stuff, for nodes representing PCI devices, pointed to
+ * by device_node->data.
+ */
+struct pci_controller;
+struct iommu_table;
+
+struct pci_dn {
+ int busno; /* for pci devices */
+ int bussubno; /* for pci devices */
+ int devfn; /* for pci devices */
+
+#ifdef CONFIG_PPC_PSERIES
+ int eeh_mode; /* See eeh.h for possible EEH_MODEs */
+ int eeh_config_addr;
+ int eeh_check_count; /* # times driver ignored error */
+ int eeh_freeze_count; /* # times this device froze up. */
+ int eeh_is_bridge; /* device is pci-to-pci bridge */
+#endif
+ int pci_ext_config_space; /* for pci devices */
+ struct pci_controller *phb; /* for pci devices */
+ struct iommu_table *iommu_table; /* for phb's or bridges */
+ struct pci_dev *pcidev; /* back-pointer to the pci device */
+ struct device_node *node; /* back-pointer to the device_node */
+#ifdef CONFIG_PPC_ISERIES
+ struct list_head Device_List;
+ int Irq; /* Assigned IRQ */
+ int Flags; /* Possible flags(disable/bist)*/
+ u8 LogicalSlot; /* Hv Slot Index for Tces */
+#endif
+ u32 config_space[16]; /* saved PCI config space */
+};
+
+/* Get the pointer to a device_node's pci_dn */
+#define PCI_DN(dn) ((struct pci_dn *) (dn)->data)
+
+struct device_node *fetch_dev_dn(struct pci_dev *dev);
+
+/* Get a device_node from a pci_dev. This code must be fast except
+ * in the case where the sysdata is incorrect and needs to be fixed
+ * up (this will only happen once).
+ * In this case the sysdata will have been inherited from a PCI host
+ * bridge or a PCI-PCI bridge further up the tree, so it will point
+ * to a valid struct pci_dn, just not the one we want.
+ */
+static inline struct device_node *pci_device_to_OF_node(struct pci_dev *dev)
+{
+ struct device_node *dn = dev->sysdata;
+ struct pci_dn *pdn = dn->data;
+
+ if (pdn && pdn->devfn == dev->devfn && pdn->busno == dev->bus->number)
+ return dn; /* fast path. sysdata is good */
+ return fetch_dev_dn(dev);
+}
+
+static inline int pci_device_from_OF_node(struct device_node *np,
+ u8 *bus, u8 *devfn)
+{
+ if (!PCI_DN(np))
+ return -ENODEV;
+ *bus = PCI_DN(np)->busno;
+ *devfn = PCI_DN(np)->devfn;
+ return 0;
+}
+
+static inline struct device_node *pci_bus_to_OF_node(struct pci_bus *bus)
+{
+ if (bus->self)
+ return pci_device_to_OF_node(bus->self);
+ else
+ return bus->sysdata; /* Must be root bus (PHB) */
+}
+
+extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ struct device_node *dev, int primary);
+
+extern int pcibios_remove_root_bus(struct pci_controller *phb);
+
+extern void phbs_remap_io(void);
+
+static inline struct pci_controller *pci_bus_to_host(struct pci_bus *bus)
+{
+ struct device_node *busdn = bus->sysdata;
+
+ BUG_ON(busdn == NULL);
+ return PCI_DN(busdn)->phb;
+}
+
+extern struct pci_controller *
+pcibios_alloc_controller(struct device_node *dev);
+extern void pcibios_free_controller(struct pci_controller *phb);
+
+/* Return values for ppc_md.pci_probe_mode function */
+#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
+#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
+#define PCI_PROBE_DEVTREE 1 /* Instantiate from device tree */
+
+#endif /* CONFIG_PPC64 */
+#endif
diff --git a/include/asm-powerpc/pci.h b/include/asm-powerpc/pci.h
new file mode 100644
index 00000000000..d5934a076bd
--- /dev/null
+++ b/include/asm-powerpc/pci.h
@@ -0,0 +1,247 @@
+#ifndef __ASM_POWERPC_PCI_H
+#define __ASM_POWERPC_PCI_H
+#ifdef __KERNEL__
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/machdep.h>
+#include <asm/scatterlist.h>
+#include <asm/io.h>
+#include <asm/prom.h>
+#include <asm/pci-bridge.h>
+
+#include <asm-generic/pci-dma-compat.h>
+
+#define PCIBIOS_MIN_IO 0x1000
+#define PCIBIOS_MIN_MEM 0x10000000
+
+struct pci_dev;
+
+/* Values for the `which' argument to sys_pciconfig_iobase syscall. */
+#define IOBASE_BRIDGE_NUMBER 0
+#define IOBASE_MEMORY 1
+#define IOBASE_IO 2
+#define IOBASE_ISA_IO 3
+#define IOBASE_ISA_MEM 4
+
+/*
+ * Set this to 1 if you want the kernel to re-assign all PCI
+ * bus numbers
+ */
+extern int pci_assign_all_buses;
+#define pcibios_assign_all_busses() (pci_assign_all_buses)
+
+#define pcibios_scan_all_fns(a, b) 0
+
+static inline void pcibios_set_master(struct pci_dev *dev)
+{
+ /* No special bus mastering setup handling */
+}
+
+static inline void pcibios_penalize_isa_irq(int irq, int active)
+{
+ /* We don't do dynamic PCI IRQ allocation */
+}
+
+#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
+static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
+{
+ if (ppc_md.pci_get_legacy_ide_irq)
+ return ppc_md.pci_get_legacy_ide_irq(dev, channel);
+ return channel ? 15 : 14;
+}
+
+#ifdef CONFIG_PPC64
+#define HAVE_ARCH_PCI_MWI 1
+static inline int pcibios_prep_mwi(struct pci_dev *dev)
+{
+ /*
+ * We would like to avoid touching the cacheline size or MWI bit
+ * but we cant do that with the current pcibios_prep_mwi
+ * interface. pSeries firmware sets the cacheline size (which is not
+ * the cpu cacheline size in all cases) and hardware treats MWI
+ * the same as memory write. So we dont touch the cacheline size
+ * here and allow the generic code to set the MWI bit.
+ */
+ return 0;
+}
+
+extern struct dma_mapping_ops pci_dma_ops;
+
+/* For DAC DMA, we currently don't support it by default, but
+ * we let 64-bit platforms override this.
+ */
+static inline int pci_dac_dma_supported(struct pci_dev *hwdev,u64 mask)
+{
+ if (pci_dma_ops.dac_dma_supported)
+ return pci_dma_ops.dac_dma_supported(&hwdev->dev, mask);
+ return 0;
+}
+
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+ enum pci_dma_burst_strategy *strat,
+ unsigned long *strategy_parameter)
+{
+ unsigned long cacheline_size;
+ u8 byte;
+
+ pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
+ if (byte == 0)
+ cacheline_size = 1024;
+ else
+ cacheline_size = (int) byte * 4;
+
+ *strat = PCI_DMA_BURST_MULTIPLE;
+ *strategy_parameter = cacheline_size;
+}
+#endif
+
+extern int pci_domain_nr(struct pci_bus *bus);
+
+/* Decide whether to display the domain number in /proc */
+extern int pci_proc_domain(struct pci_bus *bus);
+
+#else /* 32-bit */
+
+#ifdef CONFIG_PCI
+static inline void pci_dma_burst_advice(struct pci_dev *pdev,
+ enum pci_dma_burst_strategy *strat,
+ unsigned long *strategy_parameter)
+{
+ *strat = PCI_DMA_BURST_INFINITY;
+ *strategy_parameter = ~0UL;
+}
+#endif
+
+/*
+ * At present there are very few 32-bit PPC machines that can have
+ * memory above the 4GB point, and we don't support that.
+ */
+#define pci_dac_dma_supported(pci_dev, mask) (0)
+
+/* Return the index of the PCI controller for device PDEV. */
+#define pci_domain_nr(bus) ((struct pci_controller *)(bus)->sysdata)->index
+
+/* Set the name of the bus as it appears in /proc/bus/pci */
+static inline int pci_proc_domain(struct pci_bus *bus)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+struct vm_area_struct;
+/* Map a range of PCI memory or I/O space for a device into user space */
+int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
+ enum pci_mmap_state mmap_state, int write_combine);
+
+/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
+#define HAVE_PCI_MMAP 1
+
+#ifdef CONFIG_PPC64
+/* pci_unmap_{single,page} is not a nop, thus... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
+ dma_addr_t ADDR_NAME;
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
+ __u32 LEN_NAME;
+#define pci_unmap_addr(PTR, ADDR_NAME) \
+ ((PTR)->ADDR_NAME)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
+ (((PTR)->ADDR_NAME) = (VAL))
+#define pci_unmap_len(PTR, LEN_NAME) \
+ ((PTR)->LEN_NAME)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
+ (((PTR)->LEN_NAME) = (VAL))
+
+/* The PCI address space does not equal the physical memory address
+ * space (we have an IOMMU). The IDE and SCSI device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS (0)
+
+#else /* 32-bit */
+
+/* The PCI address space does equal the physical memory
+ * address space (no IOMMU). The IDE and SCSI device layers use
+ * this boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS (1)
+
+/* pci_unmap_{page,single} is a nop so... */
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
+#define pci_unmap_addr(PTR, ADDR_NAME) (0)
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
+#define pci_unmap_len(PTR, LEN_NAME) (0)
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
+
+#endif /* CONFIG_PPC64 */
+
+extern void pcibios_resource_to_bus(struct pci_dev *dev,
+ struct pci_bus_region *region,
+ struct resource *res);
+
+extern void pcibios_bus_to_resource(struct pci_dev *dev,
+ struct resource *res,
+ struct pci_bus_region *region);
+
+static inline struct resource *pcibios_select_root(struct pci_dev *pdev,
+ struct resource *res)
+{
+ struct resource *root = NULL;
+
+ if (res->flags & IORESOURCE_IO)
+ root = &ioport_resource;
+ if (res->flags & IORESOURCE_MEM)
+ root = &iomem_resource;
+
+ return root;
+}
+
+extern int unmap_bus_range(struct pci_bus *bus);
+
+extern int remap_bus_range(struct pci_bus *bus);
+
+extern void pcibios_fixup_device_resources(struct pci_dev *dev,
+ struct pci_bus *bus);
+
+extern struct pci_controller *init_phb_dynamic(struct device_node *dn);
+
+extern struct pci_dev *of_create_pci_dev(struct device_node *node,
+ struct pci_bus *bus, int devfn);
+
+extern void of_scan_pci_bridge(struct device_node *node,
+ struct pci_dev *dev);
+
+extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
+
+extern int pci_read_irq_line(struct pci_dev *dev);
+
+extern void pcibios_add_platform_entries(struct pci_dev *dev);
+
+struct file;
+extern pgprot_t pci_phys_mem_access_prot(struct file *file,
+ unsigned long pfn,
+ unsigned long size,
+ pgprot_t prot);
+
+#if defined(CONFIG_PPC_MULTIPLATFORM) || defined(CONFIG_PPC32)
+#define HAVE_ARCH_PCI_RESOURCE_TO_USER
+extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
+ const struct resource *rsrc,
+ u64 *start, u64 *end);
+#endif /* CONFIG_PPC_MULTIPLATFORM || CONFIG_PPC32 */
+
+#endif /* __KERNEL__ */
+#endif /* __ASM_POWERPC_PCI_H */
diff --git a/include/asm-powerpc/pgalloc.h b/include/asm-powerpc/pgalloc.h
new file mode 100644
index 00000000000..bfc2113b363
--- /dev/null
+++ b/include/asm-powerpc/pgalloc.h
@@ -0,0 +1,156 @@
+#ifndef _ASM_POWERPC_PGALLOC_H
+#define _ASM_POWERPC_PGALLOC_H
+
+#ifndef CONFIG_PPC64
+#include <asm-ppc/pgalloc.h>
+#else
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+
+extern kmem_cache_t *pgtable_cache[];
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define PTE_CACHE_NUM 0
+#define PMD_CACHE_NUM 1
+#define PGD_CACHE_NUM 2
+#else
+#define PTE_CACHE_NUM 0
+#define PMD_CACHE_NUM 1
+#define PUD_CACHE_NUM 1
+#define PGD_CACHE_NUM 0
+#endif
+
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL);
+}
+
+static inline void pgd_free(pgd_t *pgd)
+{
+ kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd);
+}
+
+#ifndef CONFIG_PPC_64K_PAGES
+
+#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM],
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pud_free(pud_t *pud)
+{
+ kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, (unsigned long)pmd);
+}
+
+#define pmd_populate(mm, pmd, pte_page) \
+ pmd_populate_kernel(mm, pmd, page_address(pte_page))
+#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
+
+
+#else /* CONFIG_PPC_64K_PAGES */
+
+#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ pmd_set(pmd, (unsigned long)pte);
+}
+
+#define pmd_populate(mm, pmd, pte_page) \
+ pmd_populate_kernel(mm, pmd, page_address(pte_page))
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM],
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline void pmd_free(pmd_t *pmd)
+{
+ kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd);
+}
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
+ unsigned long address)
+{
+ return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM],
+ GFP_KERNEL|__GFP_REPEAT);
+}
+
+static inline struct page *pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ return virt_to_page(pte_alloc_one_kernel(mm, address));
+}
+
+static inline void pte_free_kernel(pte_t *pte)
+{
+ kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte);
+}
+
+static inline void pte_free(struct page *ptepage)
+{
+ pte_free_kernel(page_address(ptepage));
+}
+
+#define PGF_CACHENUM_MASK 0xf
+
+typedef struct pgtable_free {
+ unsigned long val;
+} pgtable_free_t;
+
+static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
+ unsigned long mask)
+{
+ BUG_ON(cachenum > PGF_CACHENUM_MASK);
+
+ return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum};
+}
+
+static inline void pgtable_free(pgtable_free_t pgf)
+{
+ void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK);
+ int cachenum = pgf.val & PGF_CACHENUM_MASK;
+
+ kmem_cache_free(pgtable_cache[cachenum], p);
+}
+
+extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
+
+#define __pte_free_tlb(tlb, ptepage) \
+ pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
+ PTE_CACHE_NUM, PTE_TABLE_SIZE-1))
+#define __pmd_free_tlb(tlb, pmd) \
+ pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
+ PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
+#ifndef CONFIG_PPC_64K_PAGES
+#define __pud_free_tlb(tlb, pmd) \
+ pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
+ PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#define check_pgt_cache() do { } while (0)
+
+#endif /* CONFIG_PPC64 */
+#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/include/asm-powerpc/pgtable-4k.h b/include/asm-powerpc/pgtable-4k.h
new file mode 100644
index 00000000000..e9590c06ad9
--- /dev/null
+++ b/include/asm-powerpc/pgtable-4k.h
@@ -0,0 +1,91 @@
+/*
+ * Entries per page directory level. The PTE level must use a 64b record
+ * for each page table entry. The PMD and PGD level use a 32b record for
+ * each entry by assuming that each entry is page aligned.
+ */
+#define PTE_INDEX_SIZE 9
+#define PMD_INDEX_SIZE 7
+#define PUD_INDEX_SIZE 7
+#define PGD_INDEX_SIZE 9
+
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+
+/* With 4k base page size, hugepage PTEs go at the PMD level */
+#define MIN_HUGEPTE_SHIFT PMD_SHIFT
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* PTE bits */
+#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
+#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
+#define _PAGE_F_SECOND _PAGE_SECONDARY
+#define _PAGE_F_GIX _PAGE_GROUP_IX
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
+ _PAGE_SECONDARY | _PAGE_GROUP_IX)
+
+/* PAGE_MASK gives the right answer below, but only by accident */
+/* It should be preserving the high 48 bits and then specifically */
+/* preserving _PAGE_SECONDARY | _PAGE_GROUP_IX */
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | \
+ _PAGE_HPTEFLAGS)
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0
+
+/* shift to put page number into pte */
+#define PTE_RPN_SHIFT (17)
+
+#define __real_pte(e,p) ((real_pte_t)(e))
+#define __rpte_to_pte(r) (r)
+#define __rpte_to_hidx(r,index) (pte_val((r)) >> 12)
+
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
+ do { \
+ index = 0; \
+ shift = mmu_psize_defs[psize].shift; \
+
+#define pte_iterate_hashed_end() } while(0)
+
+/*
+ * 4-level page tables related bits
+ */
+
+#define pgd_none(pgd) (!pgd_val(pgd))
+#define pgd_bad(pgd) (pgd_val(pgd) == 0)
+#define pgd_present(pgd) (pgd_val(pgd) != 0)
+#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
+#define pgd_page(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
+
+#define pud_offset(pgdp, addr) \
+ (((pud_t *) pgd_page(*(pgdp))) + \
+ (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
+#define pud_ERROR(e) \
+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e))
diff --git a/include/asm-powerpc/pgtable-64k.h b/include/asm-powerpc/pgtable-64k.h
new file mode 100644
index 00000000000..154f1840ece
--- /dev/null
+++ b/include/asm-powerpc/pgtable-64k.h
@@ -0,0 +1,90 @@
+#include <asm-generic/pgtable-nopud.h>
+
+
+#define PTE_INDEX_SIZE 12
+#define PMD_INDEX_SIZE 12
+#define PUD_INDEX_SIZE 0
+#define PGD_INDEX_SIZE 4
+
+#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/* With 4k base page size, hugepage PTEs go at the PMD level */
+#define MIN_HUGEPTE_SHIFT PAGE_SHIFT
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a third-level page table entry can map */
+#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* Additional PTE bits (don't change without checking asm in hash_low.S) */
+#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */
+#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
+#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
+#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
+#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_HPTE_SUB |\
+ _PAGE_COMBO)
+
+/* Shift to put page number into pte.
+ *
+ * That gives us a max RPN of 32 bits, which means a max of 48 bits
+ * of addressable physical space.
+ * We could get 3 more bits here by setting PTE_RPN_SHIFT to 29 but
+ * 32 makes PTEs more readable for debugging for now :)
+ */
+#define PTE_RPN_SHIFT (32)
+#define PTE_RPN_MAX (1UL << (64 - PTE_RPN_SHIFT))
+#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
+
+/* _PAGE_CHG_MASK masks of bits that are to be preserved accross
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+ _PAGE_ACCESSED)
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0x1ff
+/* Bits to mask out from a PGD/PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0x1ff
+
+#ifndef __ASSEMBLY__
+
+/* Manipulate "rpte" values */
+#define __real_pte(e,p) ((real_pte_t) { \
+ (e), pte_val(*((p) + PTRS_PER_PTE)) })
+#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
+ (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
+#define __rpte_to_pte(r) ((r).pte)
+#define __rpte_sub_valid(rpte, index) \
+ (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
+
+
+/* Trick: we set __end to va + 64k, which happens works for
+ * a 16M page as well as we want only one iteration
+ */
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
+ do { \
+ unsigned long __end = va + PAGE_SIZE; \
+ unsigned __split = (psize == MMU_PAGE_4K || \
+ psize == MMU_PAGE_64K_AP); \
+ shift = mmu_psize_defs[psize].shift; \
+ for (index = 0; va < __end; index++, va += (1 << shift)) { \
+ if (!__split || __rpte_sub_valid(rpte, index)) do { \
+
+#define pte_iterate_hashed_end() } while(0); } } while(0)
+
+
+#endif /* __ASSEMBLY__ */
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
new file mode 100644
index 00000000000..0303f57366c
--- /dev/null
+++ b/include/asm-powerpc/pgtable.h
@@ -0,0 +1,524 @@
+#ifndef _ASM_POWERPC_PGTABLE_H
+#define _ASM_POWERPC_PGTABLE_H
+
+#ifndef CONFIG_PPC64
+#include <asm-ppc/pgtable.h>
+#else
+
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the ppc64 hashed page table.
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <asm/processor.h> /* For TASK_SIZE */
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+struct mm_struct;
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/pgtable-64k.h>
+#else
+#include <asm/pgtable-4k.h>
+#endif
+
+#define FIRST_USER_ADDRESS 0
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
+
+#if TASK_SIZE_USER64 > PGTABLE_RANGE
+#error TASK_SIZE_USER64 exceeds pagetable range
+#endif
+
+#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT))
+#error TASK_SIZE_USER64 exceeds user VSID range
+#endif
+
+/*
+ * Define the address range of the vmalloc VM area.
+ */
+#define VMALLOC_START (0xD000000000000000ul)
+#define VMALLOC_SIZE (0x80000000000UL)
+#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
+
+/*
+ * Define the address range of the imalloc VM area.
+ */
+#define PHBS_IO_BASE VMALLOC_END
+#define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */
+#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE)
+
+/*
+ * Common bits in a linux-style PTE. These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible. Additional
+ * bits may be defined in pgtable-*.h
+ */
+#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
+#define _PAGE_USER 0x0002 /* matches one of the PP bits */
+#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
+#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
+#define _PAGE_GUARDED 0x0008
+#define _PAGE_COHERENT 0x0010 /* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
+#define _PAGE_DIRTY 0x0080 /* C: page changed */
+#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
+#define _PAGE_RW 0x0200 /* software: user write access allowed */
+#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
+#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
+
+#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_COHERENT)
+
+#define _PAGE_WRENABLE (_PAGE_RW | _PAGE_DIRTY)
+
+/* __pgprot defined in asm-powerpc/page.h */
+#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED)
+
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_USER)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RW | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_WRENABLE)
+#define PAGE_KERNEL_CI __pgprot(_PAGE_PRESENT | _PAGE_ACCESSED | \
+ _PAGE_WRENABLE | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_EXEC __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_EXEC)
+
+#define PAGE_AGP __pgprot(_PAGE_BASE | _PAGE_WRENABLE | _PAGE_NO_CACHE)
+#define HAVE_PAGE_AGP
+
+/* PTEIDX nibble */
+#define _PTEIDX_SECONDARY 0x8
+#define _PTEIDX_GROUP_IX 0x7
+
+
+/*
+ * POWER4 and newer have per page execute protection, older chips can only
+ * do this on a segment (256MB) basis.
+ *
+ * Also, write permissions imply read permissions.
+ * This is the closest we can get..
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
+ */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
+#define __P101 PAGE_READONLY_X
+#define __P110 PAGE_COPY_X
+#define __P111 PAGE_COPY_X
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
+#define __S101 PAGE_READONLY_X
+#define __S110 PAGE_SHARED_X
+#define __S111 PAGE_SHARED_X
+
+#ifndef __ASSEMBLY__
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_HUGETLB_PAGE
+
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
+#endif
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * mk_pte takes a (struct page *) as input
+ */
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+ pte_t pte;
+
+
+ pte_val(pte) = (pfn << PTE_RPN_SHIFT) | pgprot_val(pgprot);
+ return pte;
+}
+
+#define pte_modify(_pte, newprot) \
+ (__pte((pte_val(_pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)))
+
+#define pte_none(pte) ((pte_val(pte) & ~_PAGE_HPTEFLAGS) == 0)
+#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
+
+/* pte_clear moved to later in this file */
+
+#define pte_pfn(x) ((unsigned long)((pte_val(x)>>PTE_RPN_SHIFT)))
+#define pte_page(x) pfn_to_page(pte_pfn(x))
+
+#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval))
+#define pmd_none(pmd) (!pmd_val(pmd))
+#define pmd_bad(pmd) (pmd_val(pmd) == 0)
+#define pmd_present(pmd) (pmd_val(pmd) != 0)
+#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
+#define pmd_page_kernel(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
+#define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd))
+
+#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
+#define pud_none(pud) (!pud_val(pud))
+#define pud_bad(pud) ((pud_val(pud)) == 0)
+#define pud_present(pud) (pud_val(pud) != 0)
+#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
+#define pud_page(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
+
+#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
+
+/*
+ * Find an entry in a page-table-directory. We combine the address region
+ * (the high order N bits) and the pgd portion of the address.
+ */
+/* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff)
+
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
+
+#define pmd_offset(pudp,addr) \
+ (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+
+#define pte_offset_kernel(dir,addr) \
+ (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+
+#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
+#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte) do { } while(0)
+#define pte_unmap_nested(pte) do { } while(0)
+
+/* to find an entry in a kernel page-table-directory */
+/* This now only contains the vmalloc pages */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/*
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
+static inline int pte_read(pte_t pte) { return pte_val(pte) & _PAGE_USER;}
+static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW;}
+static inline int pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC;}
+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY;}
+static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED;}
+static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE;}
+
+static inline void pte_uncache(pte_t pte) { pte_val(pte) |= _PAGE_NO_CACHE; }
+static inline void pte_cache(pte_t pte) { pte_val(pte) &= ~_PAGE_NO_CACHE; }
+
+static inline pte_t pte_rdprotect(pte_t pte) {
+ pte_val(pte) &= ~_PAGE_USER; return pte; }
+static inline pte_t pte_exprotect(pte_t pte) {
+ pte_val(pte) &= ~_PAGE_EXEC; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte) {
+ pte_val(pte) &= ~(_PAGE_RW); return pte; }
+static inline pte_t pte_mkclean(pte_t pte) {
+ pte_val(pte) &= ~(_PAGE_DIRTY); return pte; }
+static inline pte_t pte_mkold(pte_t pte) {
+ pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkread(pte_t pte) {
+ pte_val(pte) |= _PAGE_USER; return pte; }
+static inline pte_t pte_mkexec(pte_t pte) {
+ pte_val(pte) |= _PAGE_USER | _PAGE_EXEC; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) {
+ pte_val(pte) |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) {
+ pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) {
+ pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) {
+ return pte; }
+
+/* Atomic PTE updates */
+static inline unsigned long pte_update(pte_t *p, unsigned long clr)
+{
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3 # pte_update\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ andc %1,%0,%4 \n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old), "=&r" (tmp), "=m" (*p)
+ : "r" (p), "r" (clr), "m" (*p), "i" (_PAGE_BUSY)
+ : "cc" );
+ return old;
+}
+
+/* PTE updating functions, this function puts the PTE in the
+ * batch, doesn't actually triggers the hash flush immediately,
+ * you need to call flush_tlb_pending() to do that.
+ * Pass -1 for "normal" size (4K or 64K)
+ */
+extern void hpte_update(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long pte, int huge);
+
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+
+ if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pte_update(ptep, _PAGE_ACCESSED);
+ if (old & _PAGE_HASHPTE) {
+ hpte_update(mm, addr, ptep, old, 0);
+ flush_tlb_pending();
+ }
+ return (old & _PAGE_ACCESSED) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+({ \
+ int __r; \
+ __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+ __r; \
+})
+
+/*
+ * On RW/DIRTY bit transitions we can avoid flushing the hpte. For the
+ * moment we always flush but we need to fix hpte_update and test if the
+ * optimisation is worth it.
+ */
+static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+
+ if ((pte_val(*ptep) & _PAGE_DIRTY) == 0)
+ return 0;
+ old = pte_update(ptep, _PAGE_DIRTY);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr, ptep, old, 0);
+ return (old & _PAGE_DIRTY) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define ptep_test_and_clear_dirty(__vma, __addr, __ptep) \
+({ \
+ int __r; \
+ __r = __ptep_test_and_clear_dirty((__vma)->vm_mm, __addr, __ptep); \
+ __r; \
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ unsigned long old;
+
+ if ((pte_val(*ptep) & _PAGE_RW) == 0)
+ return;
+ old = pte_update(ptep, _PAGE_RW);
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr, ptep, old, 0);
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty. The generic routines only flush if the
+ * entry was young or dirty which is not good enough.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep) \
+({ \
+ int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+ __ptep); \
+ __young; \
+})
+
+#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
+#define ptep_clear_flush_dirty(__vma, __address, __ptep) \
+({ \
+ int __dirty = __ptep_test_and_clear_dirty((__vma)->vm_mm, __address, \
+ __ptep); \
+ flush_tlb_page(__vma, __address); \
+ __dirty; \
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old = pte_update(ptep, ~0UL);
+
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr, ptep, old, 0);
+ return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t * ptep)
+{
+ unsigned long old = pte_update(ptep, ~0UL);
+
+ if (old & _PAGE_HASHPTE)
+ hpte_update(mm, addr, ptep, old, 0);
+}
+
+/*
+ * set_pte stores a linux PTE into the linux page table.
+ */
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ if (pte_present(*ptep)) {
+ pte_clear(mm, addr, ptep);
+ flush_tlb_pending();
+ }
+ pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
+
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mmu_virtual_psize != MMU_PAGE_64K)
+ pte = __pte(pte_val(pte) | _PAGE_COMBO);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ *ptep = pte;
+}
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to flush the hash entry
+ */
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry, int dirty)
+{
+ unsigned long bits = pte_val(entry) &
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+ unsigned long old, tmp;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%4\n\
+ andi. %1,%0,%6\n\
+ bne- 1b \n\
+ or %0,%3,%0\n\
+ stdcx. %0,0,%4\n\
+ bne- 1b"
+ :"=&r" (old), "=&r" (tmp), "=m" (*ptep)
+ :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+ :"cc");
+}
+#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
+ do { \
+ __ptep_set_access_flags(__ptep, __entry, __dirty); \
+ flush_tlb_page_nohash(__vma, __address); \
+ } while(0)
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+#define pgprot_noncached(prot) (__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+extern pgd_t swapper_pg_dir[];
+
+extern void paging_init(void);
+
+#ifdef CONFIG_HUGETLB_PAGE
+#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
+ free_pgd_range(tlb, addr, end, floor, ceiling)
+#endif
+
+/*
+ * This gets called at the end of handling a page fault, when
+ * the kernel has put a new PTE into the page table for the process.
+ * We use it to put a corresponding HPTE into the hash table
+ * ahead of time, instead of waiting for the inevitable extra
+ * hash-table miss exception.
+ */
+struct vm_area_struct;
+extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t);
+
+/* Encode and de-code a swap entry */
+#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
+#define __swp_offset(entry) ((entry).val >> 8)
+#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
+#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
+#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
+#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
+#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
+
+/*
+ * kern_addr_valid is intended to indicate whether an address is a valid
+ * kernel address. Most 32-bit archs define it as always true (like this)
+ * but most 64-bit archs actually perform a test. What should we do here?
+ * The only use is in fs/ncpfs/dir.c
+ */
+#define kern_addr_valid(addr) (1)
+
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+void pgtable_cache_init(void);
+
+/*
+ * find_linux_pte returns the address of a linux pte for a given
+ * effective address and directory. If not found, it returns zero.
+ */static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea)
+{
+ pgd_t *pg;
+ pud_t *pu;
+ pmd_t *pm;
+ pte_t *pt = NULL;
+
+ pg = pgdir + pgd_index(ea);
+ if (!pgd_none(*pg)) {
+ pu = pud_offset(pg, ea);
+ if (!pud_none(*pu)) {
+ pm = pmd_offset(pu, ea);
+ if (pmd_present(*pm))
+ pt = pte_offset_kernel(pm, ea);
+ }
+ }
+ return pt;
+}
+
+#include <asm-generic/pgtable.h>
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_PPC64 */
+#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/include/asm-powerpc/ppc-pci.h b/include/asm-powerpc/ppc-pci.h
index 2e36e5a7f4f..36cdc869e58 100644
--- a/include/asm-powerpc/ppc-pci.h
+++ b/include/asm-powerpc/ppc-pci.h
@@ -48,8 +48,6 @@ extern void pSeries_final_fixup(void);
extern void pSeries_irq_bus_setup(struct pci_bus *bus);
extern unsigned long pci_probe_only;
-extern unsigned long pci_assign_all_buses;
-extern int pci_read_irq_line(struct pci_dev *pci_dev);
/* ---- EEH internal-use-only related routines ---- */
#ifdef CONFIG_EEH
diff --git a/include/asm-powerpc/spinlock.h b/include/asm-powerpc/spinlock.h
new file mode 100644
index 00000000000..caa4b14e0e9
--- /dev/null
+++ b/include/asm-powerpc/spinlock.h
@@ -0,0 +1,269 @@
+#ifndef __ASM_SPINLOCK_H
+#define __ASM_SPINLOCK_H
+
+/*
+ * Simple spin lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ * Rework to support virtual processors
+ *
+ * Type of int is used as a full 64b word is not necessary.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * (the type definitions are in asm/spinlock_types.h)
+ */
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/hvcall.h>
+#include <asm/iseries/hv_call.h>
+#endif
+#include <asm/asm-compat.h>
+#include <asm/synch.h>
+
+#define __raw_spin_is_locked(x) ((x)->slock != 0)
+
+#ifdef CONFIG_PPC64
+/* use 0x800000yy when locked, where yy == CPU number */
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN 1
+#endif
+
+/*
+ * This returns the old value in the lock, so we succeeded
+ * in getting the lock if the return value is 0.
+ */
+static __inline__ unsigned long __spin_trylock(raw_spinlock_t *lock)
+{
+ unsigned long tmp, token;
+
+ token = LOCK_TOKEN;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 # __spin_trylock\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n\
+ stwcx. %1,0,%2\n\
+ bne- 1b\n\
+ isync\n\
+2:" : "=&r" (tmp)
+ : "r" (token), "r" (&lock->slock)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+static int __inline__ __raw_spin_trylock(raw_spinlock_t *lock)
+{
+ return __spin_trylock(lock) == 0;
+}
+
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor. Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held. Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
+#if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
+/* We only yield to the hypervisor if we are in shared processor mode */
+#define SHARED_PROCESSOR (get_paca()->lppaca.shared_proc)
+extern void __spin_yield(raw_spinlock_t *lock);
+extern void __rw_yield(raw_rwlock_t *lock);
+#else /* SPLPAR || ISERIES */
+#define __spin_yield(x) barrier()
+#define __rw_yield(x) barrier()
+#define SHARED_PROCESSOR 0
+#endif
+
+static void __inline__ __raw_spin_lock(raw_spinlock_t *lock)
+{
+ while (1) {
+ if (likely(__spin_trylock(lock) == 0))
+ break;
+ do {
+ HMT_low();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ } while (unlikely(lock->slock != 0));
+ HMT_medium();
+ }
+}
+
+static void __inline__ __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
+{
+ unsigned long flags_dis;
+
+ while (1) {
+ if (likely(__spin_trylock(lock) == 0))
+ break;
+ local_save_flags(flags_dis);
+ local_irq_restore(flags);
+ do {
+ HMT_low();
+ if (SHARED_PROCESSOR)
+ __spin_yield(lock);
+ } while (unlikely(lock->slock != 0));
+ HMT_medium();
+ local_irq_restore(flags_dis);
+ }
+}
+
+static __inline__ void __raw_spin_unlock(raw_spinlock_t *lock)
+{
+ __asm__ __volatile__(SYNC_ON_SMP" # __raw_spin_unlock"
+ : : :"memory");
+ lock->slock = 0;
+}
+
+#ifdef CONFIG_PPC64
+extern void __raw_spin_unlock_wait(raw_spinlock_t *lock);
+#else
+#define __raw_spin_unlock_wait(lock) \
+ do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+#endif
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#define __raw_read_can_lock(rw) ((rw)->lock >= 0)
+#define __raw_write_can_lock(rw) (!(rw)->lock)
+
+#ifdef CONFIG_PPC64
+#define __DO_SIGN_EXTEND "extsw %0,%0\n"
+#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
+#else
+#define __DO_SIGN_EXTEND
+#define WRLOCK_TOKEN (-1)
+#endif
+
+/*
+ * This returns the old value in the lock + 1,
+ * so we got a read lock if the return value is > 0.
+ */
+static long __inline__ __read_trylock(raw_rwlock_t *rw)
+{
+ long tmp;
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1 # read_trylock\n"
+ __DO_SIGN_EXTEND
+" addic. %0,%0,1\n\
+ ble- 2f\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 1b\n\
+ isync\n\
+2:" : "=&r" (tmp)
+ : "r" (&rw->lock)
+ : "cr0", "xer", "memory");
+
+ return tmp;
+}
+
+/*
+ * This returns the old value in the lock,
+ * so we got the write lock if the return value is 0.
+ */
+static __inline__ long __write_trylock(raw_rwlock_t *rw)
+{
+ long tmp, token;
+
+ token = WRLOCK_TOKEN;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2 # write_trylock\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %1,0,%2\n\
+ bne- 1b\n\
+ isync\n\
+2:" : "=&r" (tmp)
+ : "r" (token), "r" (&rw->lock)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+static void __inline__ __raw_read_lock(raw_rwlock_t *rw)
+{
+ while (1) {
+ if (likely(__read_trylock(rw) > 0))
+ break;
+ do {
+ HMT_low();
+ if (SHARED_PROCESSOR)
+ __rw_yield(rw);
+ } while (unlikely(rw->lock < 0));
+ HMT_medium();
+ }
+}
+
+static void __inline__ __raw_write_lock(raw_rwlock_t *rw)
+{
+ while (1) {
+ if (likely(__write_trylock(rw) == 0))
+ break;
+ do {
+ HMT_low();
+ if (SHARED_PROCESSOR)
+ __rw_yield(rw);
+ } while (unlikely(rw->lock != 0));
+ HMT_medium();
+ }
+}
+
+static int __inline__ __raw_read_trylock(raw_rwlock_t *rw)
+{
+ return __read_trylock(rw) > 0;
+}
+
+static int __inline__ __raw_write_trylock(raw_rwlock_t *rw)
+{
+ return __write_trylock(rw) == 0;
+}
+
+static void __inline__ __raw_read_unlock(raw_rwlock_t *rw)
+{
+ long tmp;
+
+ __asm__ __volatile__(
+ "eieio # read_unlock\n\
+1: lwarx %0,0,%1\n\
+ addic %0,%0,-1\n"
+ PPC405_ERR77(0,%1)
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r"(tmp)
+ : "r"(&rw->lock)
+ : "cr0", "memory");
+}
+
+static __inline__ void __raw_write_unlock(raw_rwlock_t *rw)
+{
+ __asm__ __volatile__(SYNC_ON_SMP" # write_unlock"
+ : : :"memory");
+ rw->lock = 0;
+}
+
+#endif /* __ASM_SPINLOCK_H */