aboutsummaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bootmem.h100
-rw-r--r--include/linux/console.h5
-rw-r--r--include/linux/cpu.h8
-rw-r--r--include/linux/elf-em.h1
-rw-r--r--include/linux/elfnote.h90
-rw-r--r--include/linux/gfp.h36
-rw-r--r--include/linux/highmem.h5
-rw-r--r--include/linux/irq.h6
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/mempolicy.h4
-rw-r--r--include/linux/mm.h128
-rw-r--r--include/linux/mmzone.h120
-rw-r--r--include/linux/page-flags.h35
-rw-r--r--include/linux/pagemap.h15
-rw-r--r--include/linux/percpu.h89
-rw-r--r--include/linux/resume-trace.h24
-rw-r--r--include/linux/rmap.h14
-rw-r--r--include/linux/selinux.h29
-rw-r--r--include/linux/slab.h29
-rw-r--r--include/linux/smp.h3
-rw-r--r--include/linux/suspend.h32
-rw-r--r--include/linux/swap.h12
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/vmalloc.h2
-rw-r--r--include/linux/vmstat.h18
-rw-r--r--include/linux/writeback.h1
26 files changed, 561 insertions, 247 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index e319c649e4f..31e9abb6d97 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -4,11 +4,8 @@
#ifndef _LINUX_BOOTMEM_H
#define _LINUX_BOOTMEM_H
-#include <asm/pgtable.h>
-#include <asm/dma.h>
-#include <linux/cache.h>
-#include <linux/init.h>
#include <linux/mmzone.h>
+#include <asm/dma.h>
/*
* simple boot-time physical memory area allocator.
@@ -41,45 +38,64 @@ typedef struct bootmem_data {
struct list_head list;
} bootmem_data_t;
-extern unsigned long __init bootmem_bootmap_pages (unsigned long);
-extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
-extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal);
-extern void * __init __alloc_bootmem_low(unsigned long size,
- unsigned long align,
- unsigned long goal);
-extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
- unsigned long size,
- unsigned long align,
- unsigned long goal);
-extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata,
- unsigned long size, unsigned long align, unsigned long goal,
- unsigned long limit);
+extern unsigned long bootmem_bootmap_pages(unsigned long);
+extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
+extern void free_bootmem(unsigned long addr, unsigned long size);
+extern void *__alloc_bootmem(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_nopanic(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_low(unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern void *__alloc_bootmem_core(struct bootmem_data *bdata,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal,
+ unsigned long limit);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
-extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void reserve_bootmem(unsigned long addr, unsigned long size);
#define alloc_bootmem(x) \
- __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low(x) \
- __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
+ __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_pages(x) \
- __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages(x) \
- __alloc_bootmem_low((x), PAGE_SIZE, 0)
+ __alloc_bootmem_low(x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
-extern unsigned long __init free_all_bootmem (void);
-extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
-extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
-extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
-extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
-extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
+
+extern unsigned long free_all_bootmem(void);
+extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
+extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+ unsigned long size,
+ unsigned long align,
+ unsigned long goal);
+extern unsigned long init_bootmem_node(pg_data_t *pgdat,
+ unsigned long freepfn,
+ unsigned long startpfn,
+ unsigned long endpfn);
+extern void reserve_bootmem_node(pg_data_t *pgdat,
+ unsigned long physaddr,
+ unsigned long size);
+extern void free_bootmem_node(pg_data_t *pgdat,
+ unsigned long addr,
+ unsigned long size);
+
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_pages_node(pgdat, x) \
- __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
#define alloc_bootmem_low_pages_node(pgdat, x) \
- __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
+ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size)
{
return NULL;
}
-#endif
+#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
extern unsigned long __meminitdata nr_kernel_pages;
extern unsigned long nr_all_pages;
-extern void *__init alloc_large_system_hash(const char *tablename,
- unsigned long bucketsize,
- unsigned long numentries,
- int scale,
- int flags,
- unsigned int *_hash_shift,
- unsigned int *_hash_mask,
- unsigned long limit);
+extern void *alloc_large_system_hash(const char *tablename,
+ unsigned long bucketsize,
+ unsigned long numentries,
+ int scale,
+ int flags,
+ unsigned int *_hash_shift,
+ unsigned int *_hash_mask,
+ unsigned long limit);
#define HASH_HIGHMEM 0x00000001 /* Consider highmem? */
#define HASH_EARLY 0x00000002 /* Allocating during early boot? */
diff --git a/include/linux/console.h b/include/linux/console.h
index 3bdf2155e56..76a1807726e 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -120,9 +120,14 @@ extern void console_stop(struct console *);
extern void console_start(struct console *);
extern int is_console_locked(void);
+#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND
/* Suspend and resume console messages over PM events */
extern void suspend_console(void);
extern void resume_console(void);
+#else
+static inline void suspend_console(void) {}
+static inline void resume_console(void) {}
+#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */
/* Some debug stub to catch some of the obvious races in the VT code */
#if 1
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 8fb344a9abd..3fef7d67aed 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu);
static inline int cpu_is_offline(int cpu) { return 0; }
#endif
+#ifdef CONFIG_SUSPEND_SMP
+extern int disable_nonboot_cpus(void);
+extern void enable_nonboot_cpus(void);
+#else
+static inline int disable_nonboot_cpus(void) { return 0; }
+static inline void enable_nonboot_cpus(void) {}
+#endif
+
#endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h
index 6a5796c81c9..666e0a5f00f 100644
--- a/include/linux/elf-em.h
+++ b/include/linux/elf-em.h
@@ -31,6 +31,7 @@
#define EM_M32R 88 /* Renesas M32R */
#define EM_H8_300 46 /* Renesas H8/300,300H,H8S */
#define EM_FRV 0x5441 /* Fujitsu FR-V */
+#define EM_AVR32 0x18ad /* Atmel AVR32 */
/*
* This is an interim value that we will use until the committee comes
diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h
new file mode 100644
index 00000000000..67396db141e
--- /dev/null
+++ b/include/linux/elfnote.h
@@ -0,0 +1,90 @@
+#ifndef _LINUX_ELFNOTE_H
+#define _LINUX_ELFNOTE_H
+/*
+ * Helper macros to generate ELF Note structures, which are put into a
+ * PT_NOTE segment of the final vmlinux image. These are useful for
+ * including name-value pairs of metadata into the kernel binary (or
+ * modules?) for use by external programs.
+ *
+ * Each note has three parts: a name, a type and a desc. The name is
+ * intended to distinguish the note's originator, so it would be a
+ * company, project, subsystem, etc; it must be in a suitable form for
+ * use in a section name. The type is an integer which is used to tag
+ * the data, and is considered to be within the "name" namespace (so
+ * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The
+ * "desc" field is the actual data. There are no constraints on the
+ * desc field's contents, though typically they're fairly small.
+ *
+ * All notes from a given NAME are put into a section named
+ * .note.NAME. When the kernel image is finally linked, all the notes
+ * are packed into a single .notes section, which is mapped into the
+ * PT_NOTE segment. Because notes for a given name are grouped into
+ * the same section, they'll all be adjacent the output file.
+ *
+ * This file defines macros for both C and assembler use. Their
+ * syntax is slightly different, but they're semantically similar.
+ *
+ * See the ELF specification for more detail about ELF notes.
+ */
+
+#ifdef __ASSEMBLER__
+/*
+ * Generate a structure with the same shape as Elf{32,64}_Nhdr (which
+ * turn out to be the same size and shape), followed by the name and
+ * desc data with appropriate padding. The 'desctype' argument is the
+ * assembler pseudo op defining the type of the data e.g. .asciz while
+ * 'descdata' is the data itself e.g. "hello, world".
+ *
+ * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two")
+ * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef)
+ */
+#define ELFNOTE(name, type, desctype, descdata) \
+.pushsection .note.name ; \
+ .align 4 ; \
+ .long 2f - 1f /* namesz */ ; \
+ .long 4f - 3f /* descsz */ ; \
+ .long type ; \
+1:.asciz "name" ; \
+2:.align 4 ; \
+3:desctype descdata ; \
+4:.align 4 ; \
+.popsection ;
+#else /* !__ASSEMBLER__ */
+#include <linux/elf.h>
+/*
+ * Use an anonymous structure which matches the shape of
+ * Elf{32,64}_Nhdr, but includes the name and desc data. The size and
+ * type of name and desc depend on the macro arguments. "name" must
+ * be a literal string, and "desc" must be passed by value. You may
+ * only define one note per line, since __LINE__ is used to generate
+ * unique symbols.
+ */
+#define _ELFNOTE_PASTE(a,b) a##b
+#define _ELFNOTE(size, name, unique, type, desc) \
+ static const struct { \
+ struct elf##size##_note _nhdr; \
+ unsigned char _name[sizeof(name)] \
+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+ typeof(desc) _desc \
+ __attribute__((aligned(sizeof(Elf##size##_Word)))); \
+ } _ELFNOTE_PASTE(_note_, unique) \
+ __attribute_used__ \
+ __attribute__((section(".note." name), \
+ aligned(sizeof(Elf##size##_Word)), \
+ unused)) = { \
+ { \
+ sizeof(name), \
+ sizeof(desc), \
+ type, \
+ }, \
+ name, \
+ desc \
+ }
+#define ELFNOTE(size, name, type, desc) \
+ _ELFNOTE(size, name, __LINE__, type, desc)
+
+#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc)
+#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc)
+#endif /* __ASSEMBLER__ */
+
+#endif /* _LINUX_ELFNOTE_H */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index cc9e6084448..8b34aabfe4c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -9,17 +9,16 @@ struct vm_area_struct;
/*
* GFP bitmasks..
+ *
+ * Zone modifiers (see linux/mmzone.h - low three bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use the consistently. The definitions here may
+ * be used in bit comparisons.
*/
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
#define __GFP_DMA ((__force gfp_t)0x01u)
#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
-#ifdef CONFIG_DMA_IS_DMA32
-#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */
-#elif BITS_PER_LONG < 64
-#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */
-#else
-#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */
-#endif
+#define __GFP_DMA32 ((__force gfp_t)0x04u)
/*
* Action modifiers - doesn't change the zoning
@@ -46,6 +45,7 @@ struct vm_area_struct;
#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +54,7 @@ struct vm_area_struct;
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
@@ -67,6 +67,8 @@ struct vm_area_struct;
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
+#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
platforms, used as appropriate on others */
@@ -76,11 +78,19 @@ struct vm_area_struct;
#define GFP_DMA32 __GFP_DMA32
-static inline int gfp_zone(gfp_t gfp)
+static inline enum zone_type gfp_zone(gfp_t flags)
{
- int zone = GFP_ZONEMASK & (__force int) gfp;
- BUG_ON(zone >= GFP_ZONETYPES);
- return zone;
+ if (flags & __GFP_DMA)
+ return ZONE_DMA;
+#ifdef CONFIG_ZONE_DMA32
+ if (flags & __GFP_DMA32)
+ return ZONE_DMA32;
+#endif
+#ifdef CONFIG_HIGHMEM
+ if (flags & __GFP_HIGHMEM)
+ return ZONE_HIGHMEM;
+#endif
+ return ZONE_NORMAL;
}
/*
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 85ce7ef9a51..fd7d12daa94 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page)
/* declarations for linux/mm/highmem.c */
unsigned int nr_free_highpages(void);
+extern unsigned long totalhigh_pages;
#else /* CONFIG_HIGHMEM */
static inline unsigned int nr_free_highpages(void) { return 0; }
+#define totalhigh_pages 0
+
+#ifndef ARCH_HAS_KMAP
static inline void *kmap(struct page *page)
{
might_sleep();
@@ -41,6 +45,7 @@ static inline void *kmap(struct page *page)
#define kunmap_atomic(addr, idx) do { } while (0)
#define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn))
#define kmap_atomic_to_page(ptr) virt_to_page(ptr)
+#endif
#endif /* CONFIG_HIGHMEM */
diff --git a/include/linux/irq.h b/include/linux/irq.h
index fbf6d901e9c..48d3cb3b6a4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *,
* Monolithic do_IRQ implementation.
* (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
*/
+#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs);
+#endif
/*
* Architectures call this to let the generic IRQ layer
@@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs)
{
struct irq_desc *desc = irq_desc + irq;
+#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
+ desc->handle_irq(irq, desc, regs);
+#else
if (likely(desc->handle_irq))
desc->handle_irq(irq, desc, regs);
else
__do_IRQ(irq, regs);
+#endif
}
/* Handling of unhandled and spurious interrupts: */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b2ae4fdce8..e44a37e2c71 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -33,6 +33,7 @@ extern const char linux_banner[];
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL))
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
#define KERN_EMERG "<0>" /* system is unusable */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 72440f0a443..09f0f575ddf 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr);
extern unsigned slab_node(struct mempolicy *policy);
-extern int policy_zone;
+extern enum zone_type policy_zone;
-static inline void check_highest_zone(int k)
+static inline void check_highest_zone(enum zone_type k)
{
if (k > policy_zone)
policy_zone = k;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 224178a000d..856f0ee7e84 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/debug_locks.h>
+#include <linux/backing-dev.h>
struct mempolicy;
struct anon_vma;
@@ -218,7 +219,8 @@ struct inode;
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* moment. Note that we have no way to track which tasks are using
- * a page.
+ * a page, though if it is a pagecache page, rmap structures can tell us
+ * who is mapping it.
*/
struct page {
unsigned long flags; /* Atomic flags, some possibly
@@ -278,6 +280,12 @@ struct page {
*/
#include <linux/page-flags.h>
+#ifdef CONFIG_DEBUG_VM
+#define VM_BUG_ON(cond) BUG_ON(cond)
+#else
+#define VM_BUG_ON(condition) do { } while(0)
+#endif
+
/*
* Methods to modify the page usage count.
*
@@ -292,12 +300,11 @@ struct page {
*/
/*
- * Drop a ref, return true if the logical refcount fell to zero (the page has
- * no users)
+ * Drop a ref, return true if the refcount fell to zero (the page has no users)
*/
static inline int put_page_testzero(struct page *page)
{
- BUG_ON(atomic_read(&page->_count) == 0);
+ VM_BUG_ON(atomic_read(&page->_count) == 0);
return atomic_dec_and_test(&page->_count);
}
@@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page)
*/
static inline int get_page_unless_zero(struct page *page)
{
+ VM_BUG_ON(PageCompound(page));
return atomic_inc_not_zero(&page->_count);
}
-extern void FASTCALL(__page_cache_release(struct page *));
-
static inline int page_count(struct page *page)
{
if (unlikely(PageCompound(page)))
@@ -323,6 +329,7 @@ static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
page = (struct page *)page_private(page);
+ VM_BUG_ON(atomic_read(&page->_count) == 0);
atomic_inc(&page->_count);
}
@@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order);
* For the non-reserved pages, page_count(page) denotes a reference count.
* page_count() == 0 means the page is free. page->lru is then used for
* freelist management in the buddy allocator.
- * page_count() == 1 means the page is used for exactly one purpose
- * (e.g. a private data page of one process).
+ * page_count() > 0 means the page has been allocated.
+ *
+ * Pages are allocated by the slab allocator in order to provide memory
+ * to kmalloc and kmem_cache_alloc. In this case, the management of the
+ * page, and the fields in 'struct page' are the responsibility of mm/slab.c
+ * unless a particular usage is carefully commented. (the responsibility of
+ * freeing the kmalloc memory is the caller's, of course).
*
- * A page may be used for kmalloc() or anyone else who does a
- * __get_free_page(). In this case the page_count() is at least 1, and
- * all other fields are unused but should be 0 or NULL. The
- * management of this page is the responsibility of the one who uses
- * it.
+ * A page may be used by anyone else who does a __get_free_page().
+ * In this case, page_count still tracks the references, and should only
+ * be used through the normal accessor functions. The top bits of page->flags
+ * and page->virtual store page management information, but all other fields
+ * are unused and could be used privately, carefully. The management of this
+ * page is the responsibility of the one who allocated it, and those who have
+ * subsequently been given references to it.
*
- * The other pages (we may call them "process pages") are completely
+ * The other pages (we may call them "pagecache pages") are completely
* managed by the Linux memory manager: I/O, buffers, swapping etc.
* The following discussion applies only to them.
*
- * A page may belong to an inode's memory mapping. In this case,
- * page->mapping is the pointer to the inode, and page->index is the
- * file offset of the page, in units of PAGE_CACHE_SIZE.
+ * A pagecache page contains an opaque `private' member, which belongs to the
+ * page's address_space. Usually, this is the address of a circular list of
+ * the page's disk buffers. PG_private must be set to tell the VM to call
+ * into the filesystem to release these pages.
*
- * A page contains an opaque `private' member, which belongs to the
- * page's address_space. Usually, this is the address of a circular
- * list of the page's disk buffers.
+ * A page may belong to an inode's memory mapping. In this case, page->mapping
+ * is the pointer to the inode, and page->index is the file offset of the page,
+ * in units of PAGE_CACHE_SIZE.
*
- * For pages belonging to inodes, the page_count() is the number of
- * attaches, plus 1 if `private' contains something, plus one for
- * the page cache itself.
+ * If pagecache pages are not associated with an inode, they are said to be
+ * anonymous pages. These may become associated with the swapcache, and in that
+ * case PG_swapcache is set, and page->private is an offset into the swapcache.
*
- * Instead of keeping dirty/clean pages in per address-space lists, we instead
- * now tag pages as dirty/under writeback in the radix tree.
+ * In either case (swapcache or inode backed), the pagecache itself holds one
+ * reference to the page. Setting PG_private should also increment the
+ * refcount. The each user mapping also has a reference to the page.
*
- * There is also a per-mapping radix tree mapping index to the page
- * in memory if present. The tree is rooted at mapping->root.
+ * The pagecache pages are stored in a per-mapping radix tree, which is
+ * rooted at mapping->page_tree, and indexed by offset.
+ * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space
+ * lists, we instead now tag pages as dirty/writeback in the radix tree.
*
- * All process pages can do I/O:
+ * All pagecache pages may be subject to I/O:
* - inode pages may need to be read from disk,
* - inode pages which have been modified and are MAP_SHARED may need
- * to be written to disk,
- * - private pages which have been modified may need to be swapped out
- * to swap space and (later) to be read back into memory.
+ * to be written back to the inode on disk,
+ * - anonymous pages (including MAP_PRIVATE file mappings) which have been
+ * modified may need to be swapped out to swap space and (later) to be read
+ * back into memory.
*/
/*
@@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order);
#define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1)
#define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1)
-static inline unsigned long page_zonenum(struct page *page)
+static inline enum zone_type page_zonenum(struct page *page)
{
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
@@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page)
return zone_table[page_zone_id(page)];
}
+static inline unsigned long zone_to_nid(struct zone *zone)
+{
+ return zone->zone_pgdat->node_id;
+}
+
static inline unsigned long page_to_nid(struct page *page)
{
if (FLAGS_HAS_NODE)
return (page->flags >> NODES_PGSHIFT) & NODES_MASK;
else
- return page_zone(page)->zone_pgdat->node_id;
+ return zone_to_nid(page_zone(page));
}
static inline unsigned long page_to_section(struct page *page)
{
return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK;
}
-static inline void set_page_zone(struct page *page, unsigned long zone)
+static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT;
}
+
static inline void set_page_node(struct page *page, unsigned long node)
{
page->flags &= ~(NODES_MASK << NODES_PGSHIFT);
@@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section)
page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT;
}
-static inline void set_page_links(struct page *page, unsigned long zone,
+static inline void set_page_links(struct page *page, enum zone_type zone,
unsigned long node, unsigned long pfn)
{
set_page_zone(page, zone);
@@ -802,6 +827,39 @@ struct shrinker;
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
+/*
+ * Some shared mappigns will want the pages marked read-only
+ * to track write events. If so, we'll downgrade vm_page_prot
+ * to the private version (using protection_map[] without the
+ * VM_SHARED bit).
+ */
+static inline int vma_wants_writenotify(struct vm_area_struct *vma)
+{
+ unsigned int vm_flags = vma->vm_flags;
+
+ /* If it was private or non-writable, the write bit is already clear */
+ if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
+ return 0;
+
+ /* The backer wishes to know when pages are first written to? */
+ if (vma->vm_ops && vma->vm_ops->page_mkwrite)
+ return 1;
+
+ /* The open routine did something to the protections already? */
+ if (pgprot_val(vma->vm_page_prot) !=
+ pgprot_val(protection_map[vm_flags &
+ (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]))
+ return 0;
+
+ /* Specialty mapping? */
+ if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE))
+ return 0;
+
+ /* Can the mapping track the dirty pages? */
+ return vma->vm_file && vma->vm_file->f_mapping &&
+ mapping_cap_account_dirty(vma->vm_file->f_mapping);
+}
+
extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f45163c528e..3693f1a5278 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -51,7 +51,8 @@ enum zone_stat_item {
NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
only modified from process context */
NR_FILE_PAGES,
- NR_SLAB, /* Pages used by slab allocator */
+ NR_SLAB_RECLAIMABLE,
+ NR_SLAB_UNRECLAIMABLE,
NR_PAGETABLE, /* used for pagetables */
NR_FILE_DIRTY,
NR_WRITEBACK,
@@ -88,53 +89,68 @@ struct per_cpu_pageset {
#define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)])
#endif
-#define ZONE_DMA 0
-#define ZONE_DMA32 1
-#define ZONE_NORMAL 2
-#define ZONE_HIGHMEM 3
-
-#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */
-#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */
-
+enum zone_type {
+ /*
+ * ZONE_DMA is used when there are devices that are not able
+ * to do DMA to all of addressable memory (ZONE_NORMAL). Then we
+ * carve out the portion of memory that is needed for these devices.
+ * The range is arch specific.
+ *
+ * Some examples
+ *
+ * Architecture Limit
+ * ---------------------------
+ * parisc, ia64, sparc <4G
+ * s390 <2G
+ * arm26 <48M
+ * arm Various
+ * alpha Unlimited or 0-16MB.
+ *
+ * i386, x86_64 and multiple other arches
+ * <16M.
+ */
+ ZONE_DMA,
+#ifdef CONFIG_ZONE_DMA32
+ /*
+ * x86_64 needs two ZONE_DMAs because it supports devices that are
+ * only able to do DMA to the lower 16M but also 32 bit devices that
+ * can only do DMA areas below 4G.
+ */
+ ZONE_DMA32,
+#endif
+ /*
+ * Normal addressable memory is in ZONE_NORMAL. DMA operations can be
+ * performed on pages in ZONE_NORMAL if the DMA devices support
+ * transfers to all addressable memory.
+ */
+ ZONE_NORMAL,
+#ifdef CONFIG_HIGHMEM
+ /*
+ * A memory area that is only addressable by the kernel through
+ * mapping portions into its own address space. This is for example
+ * used by i386 to allow the kernel to address the memory beyond
+ * 900MB. The kernel will set up special mappings (page
+ * table entries on i386) for each page that the kernel needs to
+ * access.
+ */
+ ZONE_HIGHMEM,
+#endif
+ MAX_NR_ZONES
+};
/*
* When a memory allocation must conform to specific limitations (such
* as being suitable for DMA) the caller will pass in hints to the
* allocator in the gfp_mask, in the zone modifier bits. These bits
* are used to select a priority ordered list of memory zones which
- * match the requested limits. GFP_ZONEMASK defines which bits within
- * the gfp_mask should be considered as zone modifiers. Each valid
- * combination of the zone modifier bits has a corresponding list
- * of zones (in node_zonelists). Thus for two zone modifiers there
- * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will
- * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible
- * combinations of zone modifiers in "zone modifier space".
- *
- * As an optimisation any zone modifier bits which are only valid when
- * no other zone modifier bits are set (loners) should be placed in
- * the highest order bits of this field. This allows us to reduce the
- * extent of the zonelists thus saving space. For example in the case
- * of three zone modifier bits, we could require up to eight zonelists.
- * If the left most zone modifier is a "loner" then the highest valid
- * zonelist would be four allowing us to allocate only five zonelists.
- * Use the first form for GFP_ZONETYPES when the left most bit is not
- * a "loner", otherwise use the second.
- *
- * NOTE! Make sure this matches the zones in <linux/gfp.h>
+ * match the requested limits. See gfp_zone() in include/linux/gfp.h
*/
-#define GFP_ZONEMASK 0x07
-/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */
-#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */
-/*
- * On machines where it is needed (eg PCs) we divide physical memory
- * into multiple physical zones. On a 32bit PC we have 4 zones:
- *
- * ZONE_DMA < 16 MB ISA DMA capable memory
- * ZONE_DMA32 0 MB Empty
- * ZONE_NORMAL 16-896 MB direct mapped by the kernel
- * ZONE_HIGHMEM > 896 MB only page cache and user processes
- */
+#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM)
+#define ZONES_SHIFT 1
+#else
+#define ZONES_SHIFT 2
+#endif
struct zone {
/* Fields commonly accessed by the page allocator */
@@ -154,7 +170,8 @@ struct zone {
/*
* zone reclaim becomes active if more unmapped pages exist.
*/
- unsigned long min_unmapped_ratio;
+ unsigned long min_unmapped_pages;
+ unsigned long min_slab_pages;
struct per_cpu_pageset *pageset[NR_CPUS];
#else
struct per_cpu_pageset pageset[NR_CPUS];
@@ -266,7 +283,6 @@ struct zone {
char *name;
} ____cacheline_internodealigned_in_smp;
-
/*
* The "priority" of VM scanning is how much of the queues we will scan in one
* go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the
@@ -304,7 +320,7 @@ struct zonelist {
struct bootmem_data;
typedef struct pglist_data {
struct zone node_zones[MAX_NR_ZONES];
- struct zonelist node_zonelists[GFP_ZONETYPES];
+ struct zonelist node_zonelists[MAX_NR_ZONES];
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP
struct page *node_mem_map;
@@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone)
return (!!zone->present_pages);
}
-static inline int is_highmem_idx(int idx)
+static inline int is_highmem_idx(enum zone_type idx)
{
+#ifdef CONFIG_HIGHMEM
return (idx == ZONE_HIGHMEM);
+#else
+ return 0;
+#endif
}
-static inline int is_normal_idx(int idx)
+static inline int is_normal_idx(enum zone_type idx)
{
return (idx == ZONE_NORMAL);
}
@@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx)
*/
static inline int is_highmem(struct zone *zone)
{
+#ifdef CONFIG_HIGHMEM
return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
+#else
+ return 0;
+#endif
}
static inline int is_normal(struct zone *zone)
@@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone)
static inline int is_dma32(struct zone *zone)
{
+#ifdef CONFIG_ZONE_DMA32
return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+#else
+ return 0;
+#endif
}
static inline int is_dma(struct zone *zone)
@@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file
void __user *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
struct file *, void __user *, size_t *, loff_t *);
+int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+ struct file *, void __user *, size_t *, loff_t *);
#include <linux/topology.h>
/* Returns the number of the current Node. */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5748642e9f3..9d7921dd50f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -13,24 +13,25 @@
* PG_reserved is set for special pages, which can never be swapped out. Some
* of them might not even exist (eg empty_bad_page)...
*
- * The PG_private bitflag is set if page->private contains a valid value.
+ * The PG_private bitflag is set on pagecache pages if they contain filesystem
+ * specific data (which is normally at page->private). It can be used by
+ * private allocations for its own usage.
*
- * During disk I/O, PG_locked is used. This bit is set before I/O and
- * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
- * waiting for the I/O on this page to complete.
+ * During initiation of disk I/O, PG_locked is set. This bit is set before I/O
+ * and cleared when writeback _starts_ or when read _completes_. PG_writeback
+ * is set before writeback starts and cleared when it finishes.
+ *
+ * PG_locked also pins a page in pagecache, and blocks truncation of the file
+ * while it is held.
+ *
+ * page_waitqueue(page) is a wait queue of all tasks waiting for the page
+ * to become unlocked.
*
* PG_uptodate tells whether the page's contents is valid. When a read
* completes, the page becomes uptodate, unless a disk I/O error happened.
*
- * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
- * which is set any time the system accesses that page through the (mapping,
- * index) hash table. This referenced bit, together with the referenced bit
- * in the page tables, is used to manipulate page->age and move the page across
- * the active, inactive_dirty and inactive_clean lists.
- *
- * Note that the referenced bit, the page->lru list_head and the active,
- * inactive_dirty and inactive_clean lists are protected by the
- * zone->lru_lock, and *NOT* by the usual PG_locked bit!
+ * PG_referenced, PG_reclaim are used for page reclaim for anonymous and
+ * file-backed pagecache (see mm/vmscan.c).
*
* PG_error is set to indicate that an I/O error occurred on this page.
*
@@ -42,6 +43,10 @@
* space, they need to be kmapped separately for doing IO on the pages. The
* struct page (these bits with information) are always mapped into kernel
* address space...
+ *
+ * PG_buddy is set to indicate that the page is free and in the buddy system
+ * (see mm/page_alloc.c).
+ *
*/
/*
@@ -74,7 +79,7 @@
#define PG_checked 8 /* kill me in 2.5.<early>. */
#define PG_arch_1 9
#define PG_reserved 10
-#define PG_private 11 /* Has something at ->private */
+#define PG_private 11 /* If pagecache, has fs-private data */
#define PG_writeback 12 /* Page is under writeback */
#define PG_nosave 13 /* Used for system suspend/resume */
@@ -83,7 +88,7 @@
#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
#define PG_reclaim 17 /* To be reclaimed asap */
-#define PG_nosave_free 18 /* Free, should not be written */
+#define PG_nosave_free 18 /* Used for system suspend/resume */
#define PG_buddy 19 /* Page is free, on buddy lists */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0a2f5d27f60..64f95092515 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
}
extern void FASTCALL(__lock_page(struct page *page));
+extern void FASTCALL(__lock_page_nosync(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
+/*
+ * lock_page may only be called if we have the page's inode pinned.
+ */
static inline void lock_page(struct page *page)
{
might_sleep();
if (TestSetPageLocked(page))
__lock_page(page);
}
+
+/*
+ * lock_page_nosync should only be used if we can't pin the page's inode.
+ * Doesn't play quite so well with block device plugging.
+ */
+static inline void lock_page_nosync(struct page *page)
+{
+ might_sleep();
+ if (TestSetPageLocked(page))
+ __lock_page_nosync(page);
+}
/*
* This is exported only for wait_on_page_locked/wait_on_page_writeback.
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index cb9039a21f2..3835a9642f1 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -1,9 +1,12 @@
#ifndef __LINUX_PERCPU_H
#define __LINUX_PERCPU_H
+
#include <linux/spinlock.h> /* For preempt_disable() */
#include <linux/slab.h> /* For kmalloc() */
#include <linux/smp.h>
#include <linux/string.h> /* For memset() */
+#include <linux/cpumask.h>
+
#include <asm/percpu.h>
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
@@ -11,8 +14,14 @@
#define PERCPU_ENOUGH_ROOM 32768
#endif
-/* Must be an lvalue. */
-#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); }))
+/*
+ * Must be an lvalue. Since @var must be a simple identifier,
+ * we force a syntax error here if it isn't.
+ */
+#define get_cpu_var(var) (*({ \
+ extern int simple_indentifier_##var(void); \
+ preempt_disable(); \
+ &__get_cpu_var(var); }))
#define put_cpu_var(var) preempt_enable()
#ifdef CONFIG_SMP
@@ -21,39 +30,77 @@ struct percpu_data {
void *ptrs[NR_CPUS];
};
+#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
/*
- * Use this to get to a cpu's version of the per-cpu object allocated using
- * alloc_percpu. Non-atomic access to the current CPU's version should
+ * Use this to get to a cpu's version of the per-cpu object dynamically
+ * allocated. Non-atomic access to the current CPU's version should
* probably be combined with get_cpu()/put_cpu().
*/
-#define per_cpu_ptr(ptr, cpu) \
-({ \
- struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \
- (__typeof__(ptr))__p->ptrs[(cpu)]; \
+#define percpu_ptr(ptr, cpu) \
+({ \
+ struct percpu_data *__p = __percpu_disguise(ptr); \
+ (__typeof__(ptr))__p->ptrs[(cpu)]; \
})
-extern void *__alloc_percpu(size_t size);
-extern void free_percpu(const void *);
+extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
+extern void percpu_depopulate(void *__pdata, int cpu);
+extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask);
+extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
+extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+extern void percpu_free(void *__pdata);
#else /* CONFIG_SMP */
-#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+
+static inline void percpu_depopulate(void *__pdata, int cpu)
+{
+}
+
+static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
+{
+}
-static inline void *__alloc_percpu(size_t size)
+static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
+ int cpu)
{
- void *ret = kmalloc(size, GFP_KERNEL);
- if (ret)
- memset(ret, 0, size);
- return ret;
+ return percpu_ptr(__pdata, cpu);
}
-static inline void free_percpu(const void *ptr)
-{
- kfree(ptr);
+
+static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
+ cpumask_t *mask)
+{
+ return 0;
+}
+
+static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
+{
+ return kzalloc(size, gfp);
+}
+
+static inline void percpu_free(void *__pdata)
+{
+ kfree(__pdata);
}
#endif /* CONFIG_SMP */
-/* Simple wrapper for the common case: zeros memory. */
-#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type))))
+#define percpu_populate_mask(__pdata, size, gfp, mask) \
+ __percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+#define percpu_depopulate_mask(__pdata, mask) \
+ __percpu_depopulate_mask((__pdata), &(mask))
+#define percpu_alloc_mask(size, gfp, mask) \
+ __percpu_alloc_mask((size), (gfp), &(mask))
+
+#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+
+/* (legacy) interface for use without CPU hotplug handling */
+
+#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
+ cpu_possible_map)
+#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
+#define free_percpu(ptr) percpu_free((ptr))
+#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
#endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h
index a376bd4ade3..81e9299ca14 100644
--- a/include/linux/resume-trace.h
+++ b/include/linux/resume-trace.h
@@ -3,21 +3,25 @@
#ifdef CONFIG_PM_TRACE
+extern int pm_trace_enabled;
+
struct device;
extern void set_trace_device(struct device *);
extern void generate_resume_trace(void *tracedata, unsigned int user);
#define TRACE_DEVICE(dev) set_trace_device(dev)
-#define TRACE_RESUME(user) do { \
- void *tracedata; \
- asm volatile("movl $1f,%0\n" \
- ".section .tracedata,\"a\"\n" \
- "1:\t.word %c1\n" \
- "\t.long %c2\n" \
- ".previous" \
- :"=r" (tracedata) \
- : "i" (__LINE__), "i" (__FILE__)); \
- generate_resume_trace(tracedata, user); \
+#define TRACE_RESUME(user) do { \
+ if (pm_trace_enabled) { \
+ void *tracedata; \
+ asm volatile("movl $1f,%0\n" \
+ ".section .tracedata,\"a\"\n" \
+ "1:\t.word %c1\n" \
+ "\t.long %c2\n" \
+ ".previous" \
+ :"=r" (tracedata) \
+ : "i" (__LINE__), "i" (__FILE__)); \
+ generate_resume_trace(tracedata, user); \
+ } \
} while (0)
#else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf97b090001..db2c1df4fef 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *,
*/
unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
+/*
+ * Cleans the PTEs of shared mappings.
+ * (and since clean PTEs should also be readonly, write protects them too)
+ *
+ * returns the number of cleaned PTEs.
+ */
+int page_mkclean(struct page *);
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
@@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
#define page_referenced(page,l) TestClearPageReferenced(page)
#define try_to_unmap(page, refs) SWAP_FAIL
+static inline int page_mkclean(struct page *page)
+{
+ return 0;
+}
+
+
#endif /* CONFIG_MMU */
/*
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index aad4e390d6a..d1b7ca6c1c5 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
/**
* selinux_audit_rule_match - determine if a context ID matches a rule.
- * @ctxid: the context ID to check
+ * @sid: the context ID to check
* @field: the field this rule refers to
* @op: the operater the rule uses
* @rule: pointer to the audit rule to check against
@@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule);
* Returns 1 if the context id matches the rule, 0 if it does not, and
* -errno on failure.
*/
-int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx);
@@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
void selinux_audit_set_callback(int (*callback)(void));
/**
- * selinux_task_ctxid - determine a context ID for a process.
- * @tsk: the task object
- * @ctxid: ID value returned via this
- *
- * On return, ctxid will contain an ID for the context. This value
- * should only be used opaquely.
- */
-void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
-
-/**
- * selinux_ctxid_to_string - map a security context ID to a string
- * @ctxid: security context ID to be converted.
+ * selinux_sid_to_string - map a security context ID to a string
+ * @sid: security context ID to be converted.
* @ctx: address of context string to be returned
* @ctxlen: length of returned context string.
*
@@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid);
* string will be allocated internally, and the caller must call
* kfree() on it after use.
*/
-int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen);
+int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen);
/**
* selinux_get_inode_sid - get the inode's security context ID
@@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule)
return;
}
-static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op,
+static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op,
struct selinux_audit_rule *rule,
struct audit_context *actx)
{
@@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void))
return;
}
-static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid)
-{
- *ctxid = 0;
-}
-
-static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen)
+static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen)
{
*ctx = NULL;
*ctxlen = 0;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 45ad55b70d1..66d6eb78d1c 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t);
extern void kmem_cache_free(kmem_cache_t *, void *);
extern unsigned int kmem_cache_size(kmem_cache_t *);
extern const char *kmem_cache_name(kmem_cache_t *);
-extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags);
/* Size description struct for general caches. */
struct cache_sizes {
@@ -203,7 +202,30 @@ extern int slab_is_available(void);
#ifdef CONFIG_NUMA
extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node);
-extern void *kmalloc_node(size_t size, gfp_t flags, int node);
+extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+
+static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+ if (__builtin_constant_p(size)) {
+ int i = 0;
+#define CACHE(x) \
+ if (size <= x) \
+ goto found; \
+ else \
+ i++;
+#include "kmalloc_sizes.h"
+#undef CACHE
+ {
+ extern void __you_cannot_kmalloc_that_much(void);
+ __you_cannot_kmalloc_that_much();
+ }
+found:
+ return kmem_cache_alloc_node((flags & GFP_DMA) ?
+ malloc_sizes[i].cs_dmacachep :
+ malloc_sizes[i].cs_cachep, flags, node);
+ }
+ return __kmalloc_node(size, flags, node);
+}
#else
static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node)
{
@@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr));
/* SLOB allocator routines */
void kmem_cache_init(void);
-struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags);
struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t,
unsigned long,
void (*)(void *, struct kmem_cache *, unsigned long),
@@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *sighand_cachep;
extern kmem_cache_t *bio_cachep;
-extern atomic_t slab_reclaim_pages;
-
#endif /* __KERNEL__ */
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 837e8bce134..51649987f69 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus);
*/
int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
+int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
+ int retry, int wait);
+
/*
* Call a function on all processors
*/
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 96e31aa64cc..b1237f16ecd 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -10,29 +10,11 @@
#include <linux/pm.h>
/* page backup entry */
-typedef struct pbe {
+struct pbe {
unsigned long address; /* address of the copy */
unsigned long orig_address; /* original address of page */
struct pbe *next;
-} suspend_pagedir_t;
-
-#define for_each_pbe(pbe, pblist) \
- for (pbe = pblist ; pbe ; pbe = pbe->next)
-
-#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe))
-#define PB_PAGE_SKIP (PBES_PER_PAGE-1)
-
-#define for_each_pb_page(pbe, pblist) \
- for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next)
-
-
-#define SWAP_FILENAME_MAXLENGTH 32
-
-
-extern dev_t swsusp_resume_device;
-
-/* mm/vmscan.c */
-extern int shrink_mem(void);
+};
/* mm/page_alloc.c */
extern void drain_local_pages(void);
@@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {}
static inline int software_suspend(void)
{
printk("Warning: fake suspend called\n");
- return -EPERM;
+ return -ENOSYS;
}
#endif /* CONFIG_PM */
-#ifdef CONFIG_SUSPEND_SMP
-extern void disable_nonboot_cpus(void);
-extern void enable_nonboot_cpus(void);
-#else
-static inline void disable_nonboot_cpus(void) {}
-static inline void enable_nonboot_cpus(void) {}
-#endif
-
void save_processor_state(void);
void restore_processor_state(void);
struct saved_context;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 5e59184c909..e7c36ba2a2d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -10,6 +10,10 @@
#include <asm/atomic.h>
#include <asm/page.h>
+struct notifier_block;
+
+struct bio;
+
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
#define SWAP_FLAG_PRIO_SHIFT 0
@@ -156,13 +160,14 @@ struct swap_list_t {
/* linux/mm/oom_kill.c */
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern int register_oom_notifier(struct notifier_block *nb);
+extern int unregister_oom_notifier(struct notifier_block *nb);
/* linux/mm/memory.c */
extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *);
/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
-extern unsigned long totalhigh_pages;
extern unsigned long totalreserve_pages;
extern long nr_swap_pages;
extern unsigned int nr_free_pages(void);
@@ -190,6 +195,7 @@ extern long vm_total_pages;
#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
+extern int sysctl_min_slab_ratio;
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
#else
#define zone_reclaim_mode 0
@@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
/* linux/mm/page_io.c */
extern int swap_readpage(struct file *, struct page *);
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
-extern int rw_swap_page_sync(int, swp_entry_t, struct page *);
+extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page,
+ struct bio **bio_chain);
+extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err);
/* linux/mm/swap_state.c */
extern struct address_space swapper_space;
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 736ed917a4f..eca555781d0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -191,6 +191,7 @@ enum
VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */
VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
+ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
};
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 71b6363caaa..dee88c6b6fa 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size);
extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot);
extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot);
-extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask,
- pgprot_t prot, int node);
extern void vfree(void *addr);
extern void *vmap(struct page **pages, unsigned int count,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 2d9b1b60798..176c7f79733 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -18,7 +18,19 @@
* generated will simply be the increment of a global address.
*/
-#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
+#ifdef CONFIG_ZONE_DMA32
+#define DMA32_ZONE(xx) xx##_DMA32,
+#else
+#define DMA32_ZONE(xx)
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_ZONE(xx) , xx##_HIGH
+#else
+#define HIGHMEM_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
@@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node,
struct zone *zones = NODE_DATA(node)->node_zones;
return
-#ifndef CONFIG_DMA_IS_NORMAL
-#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64
+#ifdef CONFIG_ZONE_DMA32
zone_page_state(&zones[ZONE_DMA32], item) +
#endif
zone_page_state(&zones[ZONE_NORMAL], item) +
-#endif
#ifdef CONFIG_HIGHMEM
zone_page_state(&zones[ZONE_HIGHMEM], item) +
#endif
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 0422036af4e..56a23a0e7f2 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
loff_t pos, loff_t count);
+void set_page_dirty_balance(struct page *page);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl