diff options
Diffstat (limited to 'include/linux')
33 files changed, 728 insertions, 329 deletions
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index e319c649e4f..31e9abb6d97 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -4,11 +4,8 @@ #ifndef _LINUX_BOOTMEM_H #define _LINUX_BOOTMEM_H -#include <asm/pgtable.h> -#include <asm/dma.h> -#include <linux/cache.h> -#include <linux/init.h> #include <linux/mmzone.h> +#include <asm/dma.h> /* * simple boot-time physical memory area allocator. @@ -41,45 +38,64 @@ typedef struct bootmem_data { struct list_head list; } bootmem_data_t; -extern unsigned long __init bootmem_bootmap_pages (unsigned long); -extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); -extern void __init free_bootmem (unsigned long addr, unsigned long size); -extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_nopanic (unsigned long size, unsigned long align, unsigned long goal); -extern void * __init __alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, - unsigned long size, - unsigned long align, - unsigned long goal); -extern void * __init __alloc_bootmem_core(struct bootmem_data *bdata, - unsigned long size, unsigned long align, unsigned long goal, - unsigned long limit); +extern unsigned long bootmem_bootmap_pages(unsigned long); +extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); +extern void free_bootmem(unsigned long addr, unsigned long size); +extern void *__alloc_bootmem(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_nopanic(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern void *__alloc_bootmem_core(struct bootmem_data *bdata, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern void __init reserve_bootmem (unsigned long addr, unsigned long size); +extern void reserve_bootmem(unsigned long addr, unsigned long size); #define alloc_bootmem(x) \ - __alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low(x) \ - __alloc_bootmem_low((x), SMP_CACHE_BYTES, 0) + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ - __alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low((x), PAGE_SIZE, 0) + __alloc_bootmem_low(x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ -extern unsigned long __init free_all_bootmem (void); -extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); -extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size); -extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size); -extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat); + +extern unsigned long free_all_bootmem(void); +extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); +extern void *__alloc_bootmem_node(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal); +extern unsigned long init_bootmem_node(pg_data_t *pgdat, + unsigned long freepfn, + unsigned long startpfn, + unsigned long endpfn); +extern void reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size); +extern void free_bootmem_node(pg_data_t *pgdat, + unsigned long addr, + unsigned long size); + #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE #define alloc_bootmem_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ - __alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ - __alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0) + __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP @@ -89,19 +105,19 @@ static inline void *alloc_remap(int nid, unsigned long size) { return NULL; } -#endif +#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */ extern unsigned long __meminitdata nr_kernel_pages; extern unsigned long nr_all_pages; -extern void *__init alloc_large_system_hash(const char *tablename, - unsigned long bucketsize, - unsigned long numentries, - int scale, - int flags, - unsigned int *_hash_shift, - unsigned int *_hash_mask, - unsigned long limit); +extern void *alloc_large_system_hash(const char *tablename, + unsigned long bucketsize, + unsigned long numentries, + int scale, + int flags, + unsigned int *_hash_shift, + unsigned int *_hash_mask, + unsigned long limit); #define HASH_HIGHMEM 0x00000001 /* Consider highmem? */ #define HASH_EARLY 0x00000002 /* Allocating during early boot? */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 9b4f1109493..060b96112ec 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -99,6 +99,11 @@ extern void __chk_io_ptr(void __iomem *); #define __must_check #endif +#ifndef CONFIG_ENABLE_MUST_CHECK +#undef __must_check +#define __must_check +#endif + /* * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. diff --git a/include/linux/console.h b/include/linux/console.h index 3bdf2155e56..76a1807726e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -120,9 +120,14 @@ extern void console_stop(struct console *); extern void console_start(struct console *); extern int is_console_locked(void); +#ifndef CONFIG_DISABLE_CONSOLE_SUSPEND /* Suspend and resume console messages over PM events */ extern void suspend_console(void); extern void resume_console(void); +#else +static inline void suspend_console(void) {} +static inline void resume_console(void) {} +#endif /* CONFIG_DISABLE_CONSOLE_SUSPEND */ /* Some debug stub to catch some of the obvious races in the VT code */ #if 1 diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 8fb344a9abd..3fef7d67aed 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -89,4 +89,12 @@ int cpu_down(unsigned int cpu); static inline int cpu_is_offline(int cpu) { return 0; } #endif +#ifdef CONFIG_SUSPEND_SMP +extern int disable_nonboot_cpus(void); +extern void enable_nonboot_cpus(void); +#else +static inline int disable_nonboot_cpus(void) { return 0; } +static inline void enable_nonboot_cpus(void) {} +#endif + #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/device.h b/include/linux/device.h index 1e5f30da98b..662e6a10144 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -15,6 +15,7 @@ #include <linux/kobject.h> #include <linux/klist.h> #include <linux/list.h> +#include <linux/compiler.h> #include <linux/types.h> #include <linux/module.h> #include <linux/pm.h> @@ -51,14 +52,17 @@ struct bus_type { int (*probe)(struct device * dev); int (*remove)(struct device * dev); void (*shutdown)(struct device * dev); - int (*suspend)(struct device * dev, pm_message_t state); - int (*resume)(struct device * dev); + + int (*suspend)(struct device * dev, pm_message_t state); + int (*suspend_late)(struct device * dev, pm_message_t state); + int (*resume_early)(struct device * dev); + int (*resume)(struct device * dev); }; -extern int bus_register(struct bus_type * bus); +extern int __must_check bus_register(struct bus_type * bus); extern void bus_unregister(struct bus_type * bus); -extern void bus_rescan_devices(struct bus_type * bus); +extern int __must_check bus_rescan_devices(struct bus_type * bus); /* iterator helpers for buses */ @@ -67,9 +71,9 @@ int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data, struct device * bus_find_device(struct bus_type *bus, struct device *start, void *data, int (*match)(struct device *, void *)); -int bus_for_each_drv(struct bus_type * bus, struct device_driver * start, - void * data, int (*fn)(struct device_driver *, void *)); - +int __must_check bus_for_each_drv(struct bus_type *bus, + struct device_driver *start, void *data, + int (*fn)(struct device_driver *, void *)); /* driverfs interface for exporting bus attributes */ @@ -82,7 +86,8 @@ struct bus_attribute { #define BUS_ATTR(_name,_mode,_show,_store) \ struct bus_attribute bus_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int bus_create_file(struct bus_type *, struct bus_attribute *); +extern int __must_check bus_create_file(struct bus_type *, + struct bus_attribute *); extern void bus_remove_file(struct bus_type *, struct bus_attribute *); struct device_driver { @@ -101,16 +106,18 @@ struct device_driver { void (*shutdown) (struct device * dev); int (*suspend) (struct device * dev, pm_message_t state); int (*resume) (struct device * dev); + + unsigned int multithread_probe:1; }; -extern int driver_register(struct device_driver * drv); +extern int __must_check driver_register(struct device_driver * drv); extern void driver_unregister(struct device_driver * drv); extern struct device_driver * get_driver(struct device_driver * drv); extern void put_driver(struct device_driver * drv); extern struct device_driver *driver_find(const char *name, struct bus_type *bus); - +extern int driver_probe_done(void); /* driverfs interface for exporting driver attributes */ @@ -123,16 +130,17 @@ struct driver_attribute { #define DRIVER_ATTR(_name,_mode,_show,_store) \ struct driver_attribute driver_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int driver_create_file(struct device_driver *, struct driver_attribute *); +extern int __must_check driver_create_file(struct device_driver *, + struct driver_attribute *); extern void driver_remove_file(struct device_driver *, struct driver_attribute *); -extern int driver_for_each_device(struct device_driver * drv, struct device * start, - void * data, int (*fn)(struct device *, void *)); +extern int __must_check driver_for_each_device(struct device_driver * drv, + struct device *start, void *data, + int (*fn)(struct device *, void *)); struct device * driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *, void *)); - /* * device classes */ @@ -146,17 +154,26 @@ struct class { struct list_head interfaces; struct semaphore sem; /* locks both the children and interfaces lists */ + struct kobject *virtual_dir; + struct class_attribute * class_attrs; struct class_device_attribute * class_dev_attrs; + struct device_attribute * dev_attrs; int (*uevent)(struct class_device *dev, char **envp, int num_envp, char *buffer, int buffer_size); + int (*dev_uevent)(struct device *dev, char **envp, int num_envp, + char *buffer, int buffer_size); void (*release)(struct class_device *dev); void (*class_release)(struct class *class); + void (*dev_release)(struct device *dev); + + int (*suspend)(struct device *, pm_message_t state); + int (*resume)(struct device *); }; -extern int class_register(struct class *); +extern int __must_check class_register(struct class *); extern void class_unregister(struct class *); @@ -169,7 +186,8 @@ struct class_attribute { #define CLASS_ATTR(_name,_mode,_show,_store) \ struct class_attribute class_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int class_create_file(struct class *, const struct class_attribute *); +extern int __must_check class_create_file(struct class *, + const struct class_attribute *); extern void class_remove_file(struct class *, const struct class_attribute *); struct class_device_attribute { @@ -182,7 +200,7 @@ struct class_device_attribute { struct class_device_attribute class_device_attr_##_name = \ __ATTR(_name,_mode,_show,_store) -extern int class_device_create_file(struct class_device *, +extern int __must_check class_device_create_file(struct class_device *, const struct class_device_attribute *); /** @@ -242,10 +260,10 @@ class_set_devdata (struct class_device *dev, void *data) } -extern int class_device_register(struct class_device *); +extern int __must_check class_device_register(struct class_device *); extern void class_device_unregister(struct class_device *); extern void class_device_initialize(struct class_device *); -extern int class_device_add(struct class_device *); +extern int __must_check class_device_add(struct class_device *); extern void class_device_del(struct class_device *); extern int class_device_rename(struct class_device *, char *); @@ -255,7 +273,7 @@ extern void class_device_put(struct class_device *); extern void class_device_remove_file(struct class_device *, const struct class_device_attribute *); -extern int class_device_create_bin_file(struct class_device *, +extern int __must_check class_device_create_bin_file(struct class_device *, struct bin_attribute *); extern void class_device_remove_bin_file(struct class_device *, struct bin_attribute *); @@ -266,22 +284,23 @@ struct class_interface { int (*add) (struct class_device *, struct class_interface *); void (*remove) (struct class_device *, struct class_interface *); + int (*add_dev) (struct device *, struct class_interface *); + void (*remove_dev) (struct device *, struct class_interface *); }; -extern int class_interface_register(struct class_interface *); +extern int __must_check class_interface_register(struct class_interface *); extern void class_interface_unregister(struct class_interface *); -extern struct class *class_create(struct module *owner, char *name); +extern struct class *class_create(struct module *owner, const char *name); extern void class_destroy(struct class *cls); extern struct class_device *class_device_create(struct class *cls, struct class_device *parent, dev_t devt, struct device *device, - char *fmt, ...) + const char *fmt, ...) __attribute__((format(printf,5,6))); extern void class_device_destroy(struct class *cls, dev_t devt); - /* interface for exporting device attributes */ struct device_attribute { struct attribute attr; @@ -294,8 +313,13 @@ struct device_attribute { #define DEVICE_ATTR(_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_name = __ATTR(_name,_mode,_show,_store) -extern int device_create_file(struct device *device, struct device_attribute * entry); +extern int __must_check device_create_file(struct device *device, + struct device_attribute * entry); extern void device_remove_file(struct device * dev, struct device_attribute * attr); +extern int __must_check device_create_bin_file(struct device *dev, + struct bin_attribute *attr); +extern void device_remove_bin_file(struct device *dev, + struct bin_attribute *attr); struct device { struct klist klist_children; struct klist_node knode_parent; /* node in sibling list */ @@ -305,6 +329,7 @@ struct device { struct kobject kobj; char bus_id[BUS_ID_SIZE]; /* position on parent bus */ + unsigned is_registered:1; struct device_attribute uevent_attr; struct device_attribute *devt_attr; @@ -338,6 +363,7 @@ struct device { struct list_head node; struct class *class; /* optional*/ dev_t devt; /* dev_t, creates the sysfs "dev" */ + struct attribute_group **groups; /* optional groups */ void (*release)(struct device * dev); }; @@ -356,38 +382,41 @@ dev_set_drvdata (struct device *dev, void *data) static inline int device_is_registered(struct device *dev) { - return klist_node_attached(&dev->knode_bus); + return dev->is_registered; } /* * High level routines for use by the bus drivers */ -extern int device_register(struct device * dev); +extern int __must_check device_register(struct device * dev); extern void device_unregister(struct device * dev); extern void device_initialize(struct device * dev); -extern int device_add(struct device * dev); +extern int __must_check device_add(struct device * dev); extern void device_del(struct device * dev); -extern int device_for_each_child(struct device *, void *, +extern int __must_check device_for_each_child(struct device *, void *, int (*fn)(struct device *, void *)); +extern int device_rename(struct device *dev, char *new_name); /* * Manual binding of a device to driver. See drivers/base/bus.c * for information on use. */ -extern void device_bind_driver(struct device * dev); +extern int __must_check device_bind_driver(struct device *dev); extern void device_release_driver(struct device * dev); -extern int device_attach(struct device * dev); -extern void driver_attach(struct device_driver * drv); -extern void device_reprobe(struct device *dev); +extern int __must_check device_attach(struct device * dev); +extern int __must_check driver_attach(struct device_driver *drv); +extern int __must_check device_reprobe(struct device *dev); /* * Easy functions for dynamically creating devices on the fly */ extern struct device *device_create(struct class *cls, struct device *parent, - dev_t devt, char *fmt, ...) + dev_t devt, const char *fmt, ...) __attribute__((format(printf,4,5))); extern void device_destroy(struct class *cls, dev_t devt); +extern int virtual_device_parent(struct device *dev); + /* * Platform "fixup" functions - allow the platform to have their say * about devices and actions that the general device layer doesn't @@ -412,7 +441,7 @@ extern void device_shutdown(void); /* drivers/base/firmware.c */ -extern int firmware_register(struct subsystem *); +extern int __must_check firmware_register(struct subsystem *); extern void firmware_unregister(struct subsystem *); /* debugging and troubleshooting/diagnostic helpers. */ diff --git a/include/linux/elf-em.h b/include/linux/elf-em.h index 6a5796c81c9..666e0a5f00f 100644 --- a/include/linux/elf-em.h +++ b/include/linux/elf-em.h @@ -31,6 +31,7 @@ #define EM_M32R 88 /* Renesas M32R */ #define EM_H8_300 46 /* Renesas H8/300,300H,H8S */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ +#define EM_AVR32 0x18ad /* Atmel AVR32 */ /* * This is an interim value that we will use until the committee comes diff --git a/include/linux/elfnote.h b/include/linux/elfnote.h new file mode 100644 index 00000000000..67396db141e --- /dev/null +++ b/include/linux/elfnote.h @@ -0,0 +1,90 @@ +#ifndef _LINUX_ELFNOTE_H +#define _LINUX_ELFNOTE_H +/* + * Helper macros to generate ELF Note structures, which are put into a + * PT_NOTE segment of the final vmlinux image. These are useful for + * including name-value pairs of metadata into the kernel binary (or + * modules?) for use by external programs. + * + * Each note has three parts: a name, a type and a desc. The name is + * intended to distinguish the note's originator, so it would be a + * company, project, subsystem, etc; it must be in a suitable form for + * use in a section name. The type is an integer which is used to tag + * the data, and is considered to be within the "name" namespace (so + * "FooCo"'s type 42 is distinct from "BarProj"'s type 42). The + * "desc" field is the actual data. There are no constraints on the + * desc field's contents, though typically they're fairly small. + * + * All notes from a given NAME are put into a section named + * .note.NAME. When the kernel image is finally linked, all the notes + * are packed into a single .notes section, which is mapped into the + * PT_NOTE segment. Because notes for a given name are grouped into + * the same section, they'll all be adjacent the output file. + * + * This file defines macros for both C and assembler use. Their + * syntax is slightly different, but they're semantically similar. + * + * See the ELF specification for more detail about ELF notes. + */ + +#ifdef __ASSEMBLER__ +/* + * Generate a structure with the same shape as Elf{32,64}_Nhdr (which + * turn out to be the same size and shape), followed by the name and + * desc data with appropriate padding. The 'desctype' argument is the + * assembler pseudo op defining the type of the data e.g. .asciz while + * 'descdata' is the data itself e.g. "hello, world". + * + * e.g. ELFNOTE(XYZCo, 42, .asciz, "forty-two") + * ELFNOTE(XYZCo, 12, .long, 0xdeadbeef) + */ +#define ELFNOTE(name, type, desctype, descdata) \ +.pushsection .note.name ; \ + .align 4 ; \ + .long 2f - 1f /* namesz */ ; \ + .long 4f - 3f /* descsz */ ; \ + .long type ; \ +1:.asciz "name" ; \ +2:.align 4 ; \ +3:desctype descdata ; \ +4:.align 4 ; \ +.popsection ; +#else /* !__ASSEMBLER__ */ +#include <linux/elf.h> +/* + * Use an anonymous structure which matches the shape of + * Elf{32,64}_Nhdr, but includes the name and desc data. The size and + * type of name and desc depend on the macro arguments. "name" must + * be a literal string, and "desc" must be passed by value. You may + * only define one note per line, since __LINE__ is used to generate + * unique symbols. + */ +#define _ELFNOTE_PASTE(a,b) a##b +#define _ELFNOTE(size, name, unique, type, desc) \ + static const struct { \ + struct elf##size##_note _nhdr; \ + unsigned char _name[sizeof(name)] \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + typeof(desc) _desc \ + __attribute__((aligned(sizeof(Elf##size##_Word)))); \ + } _ELFNOTE_PASTE(_note_, unique) \ + __attribute_used__ \ + __attribute__((section(".note." name), \ + aligned(sizeof(Elf##size##_Word)), \ + unused)) = { \ + { \ + sizeof(name), \ + sizeof(desc), \ + type, \ + }, \ + name, \ + desc \ + } +#define ELFNOTE(size, name, type, desc) \ + _ELFNOTE(size, name, __LINE__, type, desc) + +#define ELFNOTE32(name, type, desc) ELFNOTE(32, name, type, desc) +#define ELFNOTE64(name, type, desc) ELFNOTE(64, name, type, desc) +#endif /* __ASSEMBLER__ */ + +#endif /* _LINUX_ELFNOTE_H */ diff --git a/include/linux/gfp.h b/include/linux/gfp.h index cc9e6084448..8b34aabfe4c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -9,17 +9,16 @@ struct vm_area_struct; /* * GFP bitmasks.. + * + * Zone modifiers (see linux/mmzone.h - low three bits) + * + * Do not put any conditional on these. If necessary modify the definitions + * without the underscores and use the consistently. The definitions here may + * be used in bit comparisons. */ -/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */ #define __GFP_DMA ((__force gfp_t)0x01u) #define __GFP_HIGHMEM ((__force gfp_t)0x02u) -#ifdef CONFIG_DMA_IS_DMA32 -#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */ -#elif BITS_PER_LONG < 64 -#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */ -#else -#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */ -#endif +#define __GFP_DMA32 ((__force gfp_t)0x04u) /* * Action modifiers - doesn't change the zoning @@ -46,6 +45,7 @@ struct vm_area_struct; #define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ +#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) @@ -54,7 +54,7 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) @@ -67,6 +67,8 @@ struct vm_area_struct; #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) + /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ @@ -76,11 +78,19 @@ struct vm_area_struct; #define GFP_DMA32 __GFP_DMA32 -static inline int gfp_zone(gfp_t gfp) +static inline enum zone_type gfp_zone(gfp_t flags) { - int zone = GFP_ZONEMASK & (__force int) gfp; - BUG_ON(zone >= GFP_ZONETYPES); - return zone; + if (flags & __GFP_DMA) + return ZONE_DMA; +#ifdef CONFIG_ZONE_DMA32 + if (flags & __GFP_DMA32) + return ZONE_DMA32; +#endif +#ifdef CONFIG_HIGHMEM + if (flags & __GFP_HIGHMEM) + return ZONE_HIGHMEM; +#endif + return ZONE_NORMAL; } /* diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 85ce7ef9a51..fd7d12daa94 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -24,11 +24,15 @@ static inline void flush_kernel_dcache_page(struct page *page) /* declarations for linux/mm/highmem.c */ unsigned int nr_free_highpages(void); +extern unsigned long totalhigh_pages; #else /* CONFIG_HIGHMEM */ static inline unsigned int nr_free_highpages(void) { return 0; } +#define totalhigh_pages 0 + +#ifndef ARCH_HAS_KMAP static inline void *kmap(struct page *page) { might_sleep(); @@ -41,6 +45,7 @@ static inline void *kmap(struct page *page) #define kunmap_atomic(addr, idx) do { } while (0) #define kmap_atomic_pfn(pfn, idx) page_address(pfn_to_page(pfn)) #define kmap_atomic_to_page(ptr) virt_to_page(ptr) +#endif #endif /* CONFIG_HIGHMEM */ diff --git a/include/linux/irq.h b/include/linux/irq.h index fbf6d901e9c..48d3cb3b6a4 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -320,7 +320,9 @@ handle_irq_name(void fastcall (*handle)(unsigned int, struct irq_desc *, * Monolithic do_IRQ implementation. * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly) */ +#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ extern fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs); +#endif /* * Architectures call this to let the generic IRQ layer @@ -332,10 +334,14 @@ static inline void generic_handle_irq(unsigned int irq, struct pt_regs *regs) { struct irq_desc *desc = irq_desc + irq; +#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ + desc->handle_irq(irq, desc, regs); +#else if (likely(desc->handle_irq)) desc->handle_irq(irq, desc, regs); else __do_IRQ(irq, regs); +#endif } /* Handling of unhandled and spurious interrupts: */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1ff9609300b..4fa373bb18a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -33,6 +33,7 @@ extern const char linux_banner[]; #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) #define KERN_EMERG "<0>" /* system is unusable */ diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 2d229327959..bcd9cd173c2 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -20,6 +20,7 @@ #include <linux/types.h> #include <linux/list.h> #include <linux/sysfs.h> +#include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/rwsem.h> #include <linux/kref.h> @@ -71,12 +72,12 @@ static inline const char * kobject_name(const struct kobject * kobj) extern void kobject_init(struct kobject *); extern void kobject_cleanup(struct kobject *); -extern int kobject_add(struct kobject *); +extern int __must_check kobject_add(struct kobject *); extern void kobject_del(struct kobject *); -extern int kobject_rename(struct kobject *, const char *new_name); +extern int __must_check kobject_rename(struct kobject *, const char *new_name); -extern int kobject_register(struct kobject *); +extern int __must_check kobject_register(struct kobject *); extern void kobject_unregister(struct kobject *); extern struct kobject * kobject_get(struct kobject *); @@ -128,8 +129,8 @@ struct kset { extern void kset_init(struct kset * k); -extern int kset_add(struct kset * k); -extern int kset_register(struct kset * k); +extern int __must_check kset_add(struct kset * k); +extern int __must_check kset_register(struct kset * k); extern void kset_unregister(struct kset * k); static inline struct kset * to_kset(struct kobject * kobj) @@ -239,7 +240,7 @@ extern struct subsystem hypervisor_subsys; (obj)->subsys.kset.kobj.kset = &(_subsys).kset extern void subsystem_init(struct subsystem *); -extern int subsystem_register(struct subsystem *); +extern int __must_check subsystem_register(struct subsystem *); extern void subsystem_unregister(struct subsystem *); static inline struct subsystem * subsys_get(struct subsystem * s) @@ -258,7 +259,8 @@ struct subsys_attribute { ssize_t (*store)(struct subsystem *, const char *, size_t); }; -extern int subsys_create_file(struct subsystem * , struct subsys_attribute *); +extern int __must_check subsys_create_file(struct subsystem * , + struct subsys_attribute *); #if defined(CONFIG_HOTPLUG) void kobject_uevent(struct kobject *kobj, enum kobject_action action); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 72440f0a443..09f0f575ddf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -162,9 +162,9 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr); extern unsigned slab_node(struct mempolicy *policy); -extern int policy_zone; +extern enum zone_type policy_zone; -static inline void check_highest_zone(int k) +static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone) policy_zone = k; diff --git a/include/linux/mm.h b/include/linux/mm.h index 224178a000d..856f0ee7e84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -15,6 +15,7 @@ #include <linux/fs.h> #include <linux/mutex.h> #include <linux/debug_locks.h> +#include <linux/backing-dev.h> struct mempolicy; struct anon_vma; @@ -218,7 +219,8 @@ struct inode; * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the * moment. Note that we have no way to track which tasks are using - * a page. + * a page, though if it is a pagecache page, rmap structures can tell us + * who is mapping it. */ struct page { unsigned long flags; /* Atomic flags, some possibly @@ -278,6 +280,12 @@ struct page { */ #include <linux/page-flags.h> +#ifdef CONFIG_DEBUG_VM +#define VM_BUG_ON(cond) BUG_ON(cond) +#else +#define VM_BUG_ON(condition) do { } while(0) +#endif + /* * Methods to modify the page usage count. * @@ -292,12 +300,11 @@ struct page { */ /* - * Drop a ref, return true if the logical refcount fell to zero (the page has - * no users) + * Drop a ref, return true if the refcount fell to zero (the page has no users) */ static inline int put_page_testzero(struct page *page) { - BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON(atomic_read(&page->_count) == 0); return atomic_dec_and_test(&page->_count); } @@ -307,11 +314,10 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { + VM_BUG_ON(PageCompound(page)); return atomic_inc_not_zero(&page->_count); } -extern void FASTCALL(__page_cache_release(struct page *)); - static inline int page_count(struct page *page) { if (unlikely(PageCompound(page))) @@ -323,6 +329,7 @@ static inline void get_page(struct page *page) { if (unlikely(PageCompound(page))) page = (struct page *)page_private(page); + VM_BUG_ON(atomic_read(&page->_count) == 0); atomic_inc(&page->_count); } @@ -349,43 +356,55 @@ void split_page(struct page *page, unsigned int order); * For the non-reserved pages, page_count(page) denotes a reference count. * page_count() == 0 means the page is free. page->lru is then used for * freelist management in the buddy allocator. - * page_count() == 1 means the page is used for exactly one purpose - * (e.g. a private data page of one process). + * page_count() > 0 means the page has been allocated. + * + * Pages are allocated by the slab allocator in order to provide memory + * to kmalloc and kmem_cache_alloc. In this case, the management of the + * page, and the fields in 'struct page' are the responsibility of mm/slab.c + * unless a particular usage is carefully commented. (the responsibility of + * freeing the kmalloc memory is the caller's, of course). * - * A page may be used for kmalloc() or anyone else who does a - * __get_free_page(). In this case the page_count() is at least 1, and - * all other fields are unused but should be 0 or NULL. The - * management of this page is the responsibility of the one who uses - * it. + * A page may be used by anyone else who does a __get_free_page(). + * In this case, page_count still tracks the references, and should only + * be used through the normal accessor functions. The top bits of page->flags + * and page->virtual store page management information, but all other fields + * are unused and could be used privately, carefully. The management of this + * page is the responsibility of the one who allocated it, and those who have + * subsequently been given references to it. * - * The other pages (we may call them "process pages") are completely + * The other pages (we may call them "pagecache pages") are completely * managed by the Linux memory manager: I/O, buffers, swapping etc. * The following discussion applies only to them. * - * A page may belong to an inode's memory mapping. In this case, - * page->mapping is the pointer to the inode, and page->index is the - * file offset of the page, in units of PAGE_CACHE_SIZE. + * A pagecache page contains an opaque `private' member, which belongs to the + * page's address_space. Usually, this is the address of a circular list of + * the page's disk buffers. PG_private must be set to tell the VM to call + * into the filesystem to release these pages. * - * A page contains an opaque `private' member, which belongs to the - * page's address_space. Usually, this is the address of a circular - * list of the page's disk buffers. + * A page may belong to an inode's memory mapping. In this case, page->mapping + * is the pointer to the inode, and page->index is the file offset of the page, + * in units of PAGE_CACHE_SIZE. * - * For pages belonging to inodes, the page_count() is the number of - * attaches, plus 1 if `private' contains something, plus one for - * the page cache itself. + * If pagecache pages are not associated with an inode, they are said to be + * anonymous pages. These may become associated with the swapcache, and in that + * case PG_swapcache is set, and page->private is an offset into the swapcache. * - * Instead of keeping dirty/clean pages in per address-space lists, we instead - * now tag pages as dirty/under writeback in the radix tree. + * In either case (swapcache or inode backed), the pagecache itself holds one + * reference to the page. Setting PG_private should also increment the + * refcount. The each user mapping also has a reference to the page. * - * There is also a per-mapping radix tree mapping index to the page - * in memory if present. The tree is rooted at mapping->root. + * The pagecache pages are stored in a per-mapping radix tree, which is + * rooted at mapping->page_tree, and indexed by offset. + * Where 2.4 and early 2.6 kernels kept dirty/clean pages in per-address_space + * lists, we instead now tag pages as dirty/writeback in the radix tree. * - * All process pages can do I/O: + * All pagecache pages may be subject to I/O: * - inode pages may need to be read from disk, * - inode pages which have been modified and are MAP_SHARED may need - * to be written to disk, - * - private pages which have been modified may need to be swapped out - * to swap space and (later) to be read back into memory. + * to be written back to the inode on disk, + * - anonymous pages (including MAP_PRIVATE file mappings) which have been + * modified may need to be swapped out to swap space and (later) to be read + * back into memory. */ /* @@ -463,7 +482,7 @@ void split_page(struct page *page, unsigned int order); #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) #define ZONETABLE_MASK ((1UL << ZONETABLE_SHIFT) - 1) -static inline unsigned long page_zonenum(struct page *page) +static inline enum zone_type page_zonenum(struct page *page) { return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK; } @@ -480,23 +499,29 @@ static inline struct zone *page_zone(struct page *page) return zone_table[page_zone_id(page)]; } +static inline unsigned long zone_to_nid(struct zone *zone) +{ + return zone->zone_pgdat->node_id; +} + static inline unsigned long page_to_nid(struct page *page) { if (FLAGS_HAS_NODE) return (page->flags >> NODES_PGSHIFT) & NODES_MASK; else - return page_zone(page)->zone_pgdat->node_id; + return zone_to_nid(page_zone(page)); } static inline unsigned long page_to_section(struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } -static inline void set_page_zone(struct page *page, unsigned long zone) +static inline void set_page_zone(struct page *page, enum zone_type zone) { page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT); page->flags |= (zone & ZONES_MASK) << ZONES_PGSHIFT; } + static inline void set_page_node(struct page *page, unsigned long node) { page->flags &= ~(NODES_MASK << NODES_PGSHIFT); @@ -508,7 +533,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline void set_page_links(struct page *page, unsigned long zone, +static inline void set_page_links(struct page *page, enum zone_type zone, unsigned long node, unsigned long pfn) { set_page_zone(page, zone); @@ -802,6 +827,39 @@ struct shrinker; extern struct shrinker *set_shrinker(int, shrinker_t); extern void remove_shrinker(struct shrinker *shrinker); +/* + * Some shared mappigns will want the pages marked read-only + * to track write events. If so, we'll downgrade vm_page_prot + * to the private version (using protection_map[] without the + * VM_SHARED bit). + */ +static inline int vma_wants_writenotify(struct vm_area_struct *vma) +{ + unsigned int vm_flags = vma->vm_flags; + + /* If it was private or non-writable, the write bit is already clear */ + if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) + return 0; + + /* The backer wishes to know when pages are first written to? */ + if (vma->vm_ops && vma->vm_ops->page_mkwrite) + return 1; + + /* The open routine did something to the protections already? */ + if (pgprot_val(vma->vm_page_prot) != + pgprot_val(protection_map[vm_flags & + (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)])) + return 0; + + /* Specialty mapping? */ + if (vm_flags & (VM_PFNMAP|VM_INSERTPAGE)) + return 0; + + /* Can the mapping track the dirty pages? */ + return vma->vm_file && vma->vm_file->f_mapping && + mapping_cap_account_dirty(vma->vm_file->f_mapping); +} + extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f45163c528e..3693f1a5278 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -51,7 +51,8 @@ enum zone_stat_item { NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. only modified from process context */ NR_FILE_PAGES, - NR_SLAB, /* Pages used by slab allocator */ + NR_SLAB_RECLAIMABLE, + NR_SLAB_UNRECLAIMABLE, NR_PAGETABLE, /* used for pagetables */ NR_FILE_DIRTY, NR_WRITEBACK, @@ -88,53 +89,68 @@ struct per_cpu_pageset { #define zone_pcp(__z, __cpu) (&(__z)->pageset[(__cpu)]) #endif -#define ZONE_DMA 0 -#define ZONE_DMA32 1 -#define ZONE_NORMAL 2 -#define ZONE_HIGHMEM 3 - -#define MAX_NR_ZONES 4 /* Sync this with ZONES_SHIFT */ -#define ZONES_SHIFT 2 /* ceil(log2(MAX_NR_ZONES)) */ - +enum zone_type { + /* + * ZONE_DMA is used when there are devices that are not able + * to do DMA to all of addressable memory (ZONE_NORMAL). Then we + * carve out the portion of memory that is needed for these devices. + * The range is arch specific. + * + * Some examples + * + * Architecture Limit + * --------------------------- + * parisc, ia64, sparc <4G + * s390 <2G + * arm26 <48M + * arm Various + * alpha Unlimited or 0-16MB. + * + * i386, x86_64 and multiple other arches + * <16M. + */ + ZONE_DMA, +#ifdef CONFIG_ZONE_DMA32 + /* + * x86_64 needs two ZONE_DMAs because it supports devices that are + * only able to do DMA to the lower 16M but also 32 bit devices that + * can only do DMA areas below 4G. + */ + ZONE_DMA32, +#endif + /* + * Normal addressable memory is in ZONE_NORMAL. DMA operations can be + * performed on pages in ZONE_NORMAL if the DMA devices support + * transfers to all addressable memory. + */ + ZONE_NORMAL, +#ifdef CONFIG_HIGHMEM + /* + * A memory area that is only addressable by the kernel through + * mapping portions into its own address space. This is for example + * used by i386 to allow the kernel to address the memory beyond + * 900MB. The kernel will set up special mappings (page + * table entries on i386) for each page that the kernel needs to + * access. + */ + ZONE_HIGHMEM, +#endif + MAX_NR_ZONES +}; /* * When a memory allocation must conform to specific limitations (such * as being suitable for DMA) the caller will pass in hints to the * allocator in the gfp_mask, in the zone modifier bits. These bits * are used to select a priority ordered list of memory zones which - * match the requested limits. GFP_ZONEMASK defines which bits within - * the gfp_mask should be considered as zone modifiers. Each valid - * combination of the zone modifier bits has a corresponding list - * of zones (in node_zonelists). Thus for two zone modifiers there - * will be a maximum of 4 (2 ** 2) zonelists, for 3 modifiers there will - * be 8 (2 ** 3) zonelists. GFP_ZONETYPES defines the number of possible - * combinations of zone modifiers in "zone modifier space". - * - * As an optimisation any zone modifier bits which are only valid when - * no other zone modifier bits are set (loners) should be placed in - * the highest order bits of this field. This allows us to reduce the - * extent of the zonelists thus saving space. For example in the case - * of three zone modifier bits, we could require up to eight zonelists. - * If the left most zone modifier is a "loner" then the highest valid - * zonelist would be four allowing us to allocate only five zonelists. - * Use the first form for GFP_ZONETYPES when the left most bit is not - * a "loner", otherwise use the second. - * - * NOTE! Make sure this matches the zones in <linux/gfp.h> + * match the requested limits. See gfp_zone() in include/linux/gfp.h */ -#define GFP_ZONEMASK 0x07 -/* #define GFP_ZONETYPES (GFP_ZONEMASK + 1) */ /* Non-loner */ -#define GFP_ZONETYPES ((GFP_ZONEMASK + 1) / 2 + 1) /* Loner */ -/* - * On machines where it is needed (eg PCs) we divide physical memory - * into multiple physical zones. On a 32bit PC we have 4 zones: - * - * ZONE_DMA < 16 MB ISA DMA capable memory - * ZONE_DMA32 0 MB Empty - * ZONE_NORMAL 16-896 MB direct mapped by the kernel - * ZONE_HIGHMEM > 896 MB only page cache and user processes - */ +#if !defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_HIGHMEM) +#define ZONES_SHIFT 1 +#else +#define ZONES_SHIFT 2 +#endif struct zone { /* Fields commonly accessed by the page allocator */ @@ -154,7 +170,8 @@ struct zone { /* * zone reclaim becomes active if more unmapped pages exist. */ - unsigned long min_unmapped_ratio; + unsigned long min_unmapped_pages; + unsigned long min_slab_pages; struct per_cpu_pageset *pageset[NR_CPUS]; #else struct per_cpu_pageset pageset[NR_CPUS]; @@ -266,7 +283,6 @@ struct zone { char *name; } ____cacheline_internodealigned_in_smp; - /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the @@ -304,7 +320,7 @@ struct zonelist { struct bootmem_data; typedef struct pglist_data { struct zone node_zones[MAX_NR_ZONES]; - struct zonelist node_zonelists[GFP_ZONETYPES]; + struct zonelist node_zonelists[MAX_NR_ZONES]; int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP struct page *node_mem_map; @@ -373,12 +389,16 @@ static inline int populated_zone(struct zone *zone) return (!!zone->present_pages); } -static inline int is_highmem_idx(int idx) +static inline int is_highmem_idx(enum zone_type idx) { +#ifdef CONFIG_HIGHMEM return (idx == ZONE_HIGHMEM); +#else + return 0; +#endif } -static inline int is_normal_idx(int idx) +static inline int is_normal_idx(enum zone_type idx) { return (idx == ZONE_NORMAL); } @@ -391,7 +411,11 @@ static inline int is_normal_idx(int idx) */ static inline int is_highmem(struct zone *zone) { +#ifdef CONFIG_HIGHMEM return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM; +#else + return 0; +#endif } static inline int is_normal(struct zone *zone) @@ -401,7 +425,11 @@ static inline int is_normal(struct zone *zone) static inline int is_dma32(struct zone *zone) { +#ifdef CONFIG_ZONE_DMA32 return zone == zone->zone_pgdat->node_zones + ZONE_DMA32; +#else + return 0; +#endif } static inline int is_dma(struct zone *zone) @@ -421,6 +449,8 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file void __user *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, + struct file *, void __user *, size_t *, loff_t *); #include <linux/topology.h> /* Returns the number of the current Node. */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 5748642e9f3..9d7921dd50f 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -13,24 +13,25 @@ * PG_reserved is set for special pages, which can never be swapped out. Some * of them might not even exist (eg empty_bad_page)... * - * The PG_private bitflag is set if page->private contains a valid value. + * The PG_private bitflag is set on pagecache pages if they contain filesystem + * specific data (which is normally at page->private). It can be used by + * private allocations for its own usage. * - * During disk I/O, PG_locked is used. This bit is set before I/O and - * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks - * waiting for the I/O on this page to complete. + * During initiation of disk I/O, PG_locked is set. This bit is set before I/O + * and cleared when writeback _starts_ or when read _completes_. PG_writeback + * is set before writeback starts and cleared when it finishes. + * + * PG_locked also pins a page in pagecache, and blocks truncation of the file + * while it is held. + * + * page_waitqueue(page) is a wait queue of all tasks waiting for the page + * to become unlocked. * * PG_uptodate tells whether the page's contents is valid. When a read * completes, the page becomes uptodate, unless a disk I/O error happened. * - * For choosing which pages to swap out, inode pages carry a PG_referenced bit, - * which is set any time the system accesses that page through the (mapping, - * index) hash table. This referenced bit, together with the referenced bit - * in the page tables, is used to manipulate page->age and move the page across - * the active, inactive_dirty and inactive_clean lists. - * - * Note that the referenced bit, the page->lru list_head and the active, - * inactive_dirty and inactive_clean lists are protected by the - * zone->lru_lock, and *NOT* by the usual PG_locked bit! + * PG_referenced, PG_reclaim are used for page reclaim for anonymous and + * file-backed pagecache (see mm/vmscan.c). * * PG_error is set to indicate that an I/O error occurred on this page. * @@ -42,6 +43,10 @@ * space, they need to be kmapped separately for doing IO on the pages. The * struct page (these bits with information) are always mapped into kernel * address space... + * + * PG_buddy is set to indicate that the page is free and in the buddy system + * (see mm/page_alloc.c). + * */ /* @@ -74,7 +79,7 @@ #define PG_checked 8 /* kill me in 2.5.<early>. */ #define PG_arch_1 9 #define PG_reserved 10 -#define PG_private 11 /* Has something at ->private */ +#define PG_private 11 /* If pagecache, has fs-private data */ #define PG_writeback 12 /* Page is under writeback */ #define PG_nosave 13 /* Used for system suspend/resume */ @@ -83,7 +88,7 @@ #define PG_mappedtodisk 16 /* Has blocks allocated on-disk */ #define PG_reclaim 17 /* To be reclaimed asap */ -#define PG_nosave_free 18 /* Free, should not be written */ +#define PG_nosave_free 18 /* Used for system suspend/resume */ #define PG_buddy 19 /* Page is free, on buddy lists */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0a2f5d27f60..64f95092515 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -130,14 +130,29 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, } extern void FASTCALL(__lock_page(struct page *page)); +extern void FASTCALL(__lock_page_nosync(struct page *page)); extern void FASTCALL(unlock_page(struct page *page)); +/* + * lock_page may only be called if we have the page's inode pinned. + */ static inline void lock_page(struct page *page) { might_sleep(); if (TestSetPageLocked(page)) __lock_page(page); } + +/* + * lock_page_nosync should only be used if we can't pin the page's inode. + * Doesn't play quite so well with block device plugging. + */ +static inline void lock_page_nosync(struct page *page) +{ + might_sleep(); + if (TestSetPageLocked(page)) + __lock_page_nosync(page); +} /* * This is exported only for wait_on_page_locked/wait_on_page_writeback. diff --git a/include/linux/pci.h b/include/linux/pci.h index 8565b81d7fb..3ec72551ac3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -49,6 +49,7 @@ #include <linux/types.h> #include <linux/ioport.h> #include <linux/list.h> +#include <linux/compiler.h> #include <linux/errno.h> #include <linux/device.h> @@ -346,6 +347,8 @@ struct pci_driver { int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ + int (*suspend_late) (struct pci_dev *dev, pm_message_t state); + int (*resume_early) (struct pci_dev *dev); int (*resume) (struct pci_dev *dev); /* Device woken up */ int (*enable_wake) (struct pci_dev *dev, pci_power_t state, int enable); /* Enable wake event */ void (*shutdown) (struct pci_dev *dev); @@ -401,7 +404,7 @@ extern struct list_head pci_root_buses; /* list of all known PCI buses */ extern struct list_head pci_devices; /* list of all devices */ void pcibios_fixup_bus(struct pci_bus *); -int pcibios_enable_device(struct pci_dev *, int mask); +int __must_check pcibios_enable_device(struct pci_dev *, int mask); char *pcibios_setup (char *str); /* Used only when drivers/pci/setup.c is used */ @@ -488,19 +491,19 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val); } -int pci_enable_device(struct pci_dev *dev); -int pci_enable_device_bars(struct pci_dev *dev, int mask); +int __must_check pci_enable_device(struct pci_dev *dev); +int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask); void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); #define HAVE_PCI_SET_MWI -int pci_set_mwi(struct pci_dev *dev); +int __must_check pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); -int pci_assign_resource(struct pci_dev *dev, int i); -int pci_assign_resource_fixed(struct pci_dev *dev, int i); +int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_assign_resource_fixed(struct pci_dev *dev, int i); void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ @@ -526,23 +529,24 @@ void pdev_sort_resources(struct pci_dev *, struct resource_list *); void pci_fixup_irqs(u8 (*)(struct pci_dev *, u8 *), int (*)(struct pci_dev *, u8, u8)); #define HAVE_PCI_REQ_REGIONS 2 -int pci_request_regions(struct pci_dev *, const char *); +int __must_check pci_request_regions(struct pci_dev *, const char *); void pci_release_regions(struct pci_dev *); -int pci_request_region(struct pci_dev *, int, const char *); +int __must_check pci_request_region(struct pci_dev *, int, const char *); void pci_release_region(struct pci_dev *, int); /* drivers/pci/bus.c */ -int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, - resource_size_t size, resource_size_t align, - resource_size_t min, unsigned int type_mask, - void (*alignf)(void *, struct resource *, - resource_size_t, resource_size_t), - void *alignf_data); +int __must_check pci_bus_alloc_resource(struct pci_bus *bus, + struct resource *res, resource_size_t size, + resource_size_t align, resource_size_t min, + unsigned int type_mask, + void (*alignf)(void *, struct resource *, + resource_size_t, resource_size_t), + void *alignf_data); void pci_enable_bridges(struct pci_bus *bus); /* Proper probing supporting hot-pluggable devices */ -int __pci_register_driver(struct pci_driver *, struct module *); -static inline int pci_register_driver(struct pci_driver *driver) +int __must_check __pci_register_driver(struct pci_driver *, struct module *); +static inline int __must_check pci_register_driver(struct pci_driver *driver) { return __pci_register_driver(driver, THIS_MODULE); } diff --git a/include/linux/percpu.h b/include/linux/percpu.h index cb9039a21f2..3835a9642f1 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,9 +1,12 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H + #include <linux/spinlock.h> /* For preempt_disable() */ #include <linux/slab.h> /* For kmalloc() */ #include <linux/smp.h> #include <linux/string.h> /* For memset() */ +#include <linux/cpumask.h> + #include <asm/percpu.h> /* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ @@ -11,8 +14,14 @@ #define PERCPU_ENOUGH_ROOM 32768 #endif -/* Must be an lvalue. */ -#define get_cpu_var(var) (*({ preempt_disable(); &__get_cpu_var(var); })) +/* + * Must be an lvalue. Since @var must be a simple identifier, + * we force a syntax error here if it isn't. + */ +#define get_cpu_var(var) (*({ \ + extern int simple_indentifier_##var(void); \ + preempt_disable(); \ + &__get_cpu_var(var); })) #define put_cpu_var(var) preempt_enable() #ifdef CONFIG_SMP @@ -21,39 +30,77 @@ struct percpu_data { void *ptrs[NR_CPUS]; }; +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) /* - * Use this to get to a cpu's version of the per-cpu object allocated using - * alloc_percpu. Non-atomic access to the current CPU's version should + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should * probably be combined with get_cpu()/put_cpu(). */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = (struct percpu_data *)~(unsigned long)(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ +#define percpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) -extern void *__alloc_percpu(size_t size); -extern void free_percpu(const void *); +extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu); +extern void percpu_depopulate(void *__pdata, int cpu); +extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask); +extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask); +extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); +extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) + +static inline void percpu_depopulate(void *__pdata, int cpu) +{ +} + +static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask) +{ +} -static inline void *__alloc_percpu(size_t size) +static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, + int cpu) { - void *ret = kmalloc(size, GFP_KERNEL); - if (ret) - memset(ret, 0, size); - return ret; + return percpu_ptr(__pdata, cpu); } -static inline void free_percpu(const void *ptr) -{ - kfree(ptr); + +static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp, + cpumask_t *mask) +{ + return 0; +} + +static inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +{ + return kzalloc(size, gfp); +} + +static inline void percpu_free(void *__pdata) +{ + kfree(__pdata); } #endif /* CONFIG_SMP */ -/* Simple wrapper for the common case: zeros memory. */ -#define alloc_percpu(type) ((type *)(__alloc_percpu(sizeof(type)))) +#define percpu_populate_mask(__pdata, size, gfp, mask) \ + __percpu_populate_mask((__pdata), (size), (gfp), &(mask)) +#define percpu_depopulate_mask(__pdata, mask) \ + __percpu_depopulate_mask((__pdata), &(mask)) +#define percpu_alloc_mask(size, gfp, mask) \ + __percpu_alloc_mask((size), (gfp), &(mask)) + +#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) + +/* (legacy) interface for use without CPU hotplug handling */ + +#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ + cpu_possible_map) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define free_percpu(ptr) percpu_free((ptr)) +#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) #endif /* __LINUX_PERCPU_H */ diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 782090c6893..29cd6dee13d 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -49,6 +49,8 @@ struct platform_driver { int (*remove)(struct platform_device *); void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); + int (*suspend_late)(struct platform_device *, pm_message_t state); + int (*resume_early)(struct platform_device *); int (*resume)(struct platform_device *); struct device_driver driver; }; diff --git a/include/linux/pm.h b/include/linux/pm.h index 658c1b93d5b..6b27e07aef1 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -142,29 +142,61 @@ typedef struct pm_message { } pm_message_t; /* - * There are 4 important states driver can be in: - * ON -- driver is working - * FREEZE -- stop operations and apply whatever policy is applicable to a - * suspended driver of that class, freeze queues for block like IDE - * does, drop packets for ethernet, etc... stop DMA engine too etc... - * so a consistent image can be saved; but do not power any hardware - * down. - * SUSPEND - like FREEZE, but hardware is doing as much powersaving as - * possible. Roughly pci D3. + * Several driver power state transitions are externally visible, affecting + * the state of pending I/O queues and (for drivers that touch hardware) + * interrupts, wakeups, DMA, and other hardware state. There may also be + * internal transitions to various low power modes, which are transparent + * to the rest of the driver stack (such as a driver that's ON gating off + * clocks which are not in active use). * - * Unfortunately, current drivers only recognize numeric values 0 (ON) and 3 - * (SUSPEND). We'll need to fix the drivers. So yes, putting 3 to all different - * defines is intentional, and will go away as soon as drivers are fixed. Also - * note that typedef is neccessary, we'll probably want to switch to - * typedef struct pm_message_t { int event; int flags; } pm_message_t - * or something similar soon. + * One transition is triggered by resume(), after a suspend() call; the + * message is implicit: + * + * ON Driver starts working again, responding to hardware events + * and software requests. The hardware may have gone through + * a power-off reset, or it may have maintained state from the + * previous suspend() which the driver will rely on while + * resuming. On most platforms, there are no restrictions on + * availability of resources like clocks during resume(). + * + * Other transitions are triggered by messages sent using suspend(). All + * these transitions quiesce the driver, so that I/O queues are inactive. + * That commonly entails turning off IRQs and DMA; there may be rules + * about how to quiesce that are specific to the bus or the device's type. + * (For example, network drivers mark the link state.) Other details may + * differ according to the message: + * + * SUSPEND Quiesce, enter a low power device state appropriate for + * the upcoming system state (such as PCI_D3hot), and enable + * wakeup events as appropriate. + * + * FREEZE Quiesce operations so that a consistent image can be saved; + * but do NOT otherwise enter a low power device state, and do + * NOT emit system wakeup events. + * + * PRETHAW Quiesce as if for FREEZE; additionally, prepare for restoring + * the system from a snapshot taken after an earlier FREEZE. + * Some drivers will need to reset their hardware state instead + * of preserving it, to ensure that it's never mistaken for the + * state which that earlier snapshot had set up. + * + * A minimally power-aware driver treats all messages as SUSPEND, fully + * reinitializes its device during resume() -- whether or not it was reset + * during the suspend/resume cycle -- and can't issue wakeup events. + * + * More power-aware drivers may also use low power states at runtime as + * well as during system sleep states like PM_SUSPEND_STANDBY. They may + * be able to use wakeup events to exit from runtime low-power states, + * or from system low-power states such as standby or suspend-to-RAM. */ #define PM_EVENT_ON 0 #define PM_EVENT_FREEZE 1 #define PM_EVENT_SUSPEND 2 +#define PM_EVENT_PRETHAW 3 #define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_PRETHAW ((struct pm_message){ .event = PM_EVENT_PRETHAW, }) #define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) #define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) @@ -190,6 +222,7 @@ extern void device_resume(void); extern suspend_disk_method_t pm_disk_mode; extern int device_suspend(pm_message_t state); +extern int device_prepare_suspend(pm_message_t state); #define device_set_wakeup_enable(dev,val) \ ((dev)->power.should_wakeup = !!(val)) diff --git a/include/linux/resume-trace.h b/include/linux/resume-trace.h index a376bd4ade3..81e9299ca14 100644 --- a/include/linux/resume-trace.h +++ b/include/linux/resume-trace.h @@ -3,21 +3,25 @@ #ifdef CONFIG_PM_TRACE +extern int pm_trace_enabled; + struct device; extern void set_trace_device(struct device *); extern void generate_resume_trace(void *tracedata, unsigned int user); #define TRACE_DEVICE(dev) set_trace_device(dev) -#define TRACE_RESUME(user) do { \ - void *tracedata; \ - asm volatile("movl $1f,%0\n" \ - ".section .tracedata,\"a\"\n" \ - "1:\t.word %c1\n" \ - "\t.long %c2\n" \ - ".previous" \ - :"=r" (tracedata) \ - : "i" (__LINE__), "i" (__FILE__)); \ - generate_resume_trace(tracedata, user); \ +#define TRACE_RESUME(user) do { \ + if (pm_trace_enabled) { \ + void *tracedata; \ + asm volatile("movl $1f,%0\n" \ + ".section .tracedata,\"a\"\n" \ + "1:\t.word %c1\n" \ + "\t.long %c2\n" \ + ".previous" \ + :"=r" (tracedata) \ + : "i" (__LINE__), "i" (__FILE__)); \ + generate_resume_trace(tracedata, user); \ + } \ } while (0) #else diff --git a/include/linux/rmap.h b/include/linux/rmap.h index bf97b090001..db2c1df4fef 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -103,6 +103,14 @@ pte_t *page_check_address(struct page *, struct mm_struct *, */ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); +/* + * Cleans the PTEs of shared mappings. + * (and since clean PTEs should also be readonly, write protects them too) + * + * returns the number of cleaned PTEs. + */ +int page_mkclean(struct page *); + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -112,6 +120,12 @@ unsigned long page_address_in_vma(struct page *, struct vm_area_struct *); #define page_referenced(page,l) TestClearPageReferenced(page) #define try_to_unmap(page, refs) SWAP_FAIL +static inline int page_mkclean(struct page *page) +{ + return 0; +} + + #endif /* CONFIG_MMU */ /* diff --git a/include/linux/selinux.h b/include/linux/selinux.h index aad4e390d6a..d1b7ca6c1c5 100644 --- a/include/linux/selinux.h +++ b/include/linux/selinux.h @@ -46,7 +46,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); /** * selinux_audit_rule_match - determine if a context ID matches a rule. - * @ctxid: the context ID to check + * @sid: the context ID to check * @field: the field this rule refers to * @op: the operater the rule uses * @rule: pointer to the audit rule to check against @@ -55,7 +55,7 @@ void selinux_audit_rule_free(struct selinux_audit_rule *rule); * Returns 1 if the context id matches the rule, 0 if it does not, and * -errno on failure. */ -int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx); @@ -70,18 +70,8 @@ int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, void selinux_audit_set_callback(int (*callback)(void)); /** - * selinux_task_ctxid - determine a context ID for a process. - * @tsk: the task object - * @ctxid: ID value returned via this - * - * On return, ctxid will contain an ID for the context. This value - * should only be used opaquely. - */ -void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); - -/** - * selinux_ctxid_to_string - map a security context ID to a string - * @ctxid: security context ID to be converted. + * selinux_sid_to_string - map a security context ID to a string + * @sid: security context ID to be converted. * @ctx: address of context string to be returned * @ctxlen: length of returned context string. * @@ -89,7 +79,7 @@ void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid); * string will be allocated internally, and the caller must call * kfree() on it after use. */ -int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen); +int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen); /** * selinux_get_inode_sid - get the inode's security context ID @@ -154,7 +144,7 @@ static inline void selinux_audit_rule_free(struct selinux_audit_rule *rule) return; } -static inline int selinux_audit_rule_match(u32 ctxid, u32 field, u32 op, +static inline int selinux_audit_rule_match(u32 sid, u32 field, u32 op, struct selinux_audit_rule *rule, struct audit_context *actx) { @@ -166,12 +156,7 @@ static inline void selinux_audit_set_callback(int (*callback)(void)) return; } -static inline void selinux_task_ctxid(struct task_struct *tsk, u32 *ctxid) -{ - *ctxid = 0; -} - -static inline int selinux_ctxid_to_string(u32 ctxid, char **ctx, u32 *ctxlen) +static inline int selinux_sid_to_string(u32 sid, char **ctx, u32 *ctxlen) { *ctx = NULL; *ctxlen = 0; diff --git a/include/linux/slab.h b/include/linux/slab.h index 45ad55b70d1..66d6eb78d1c 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -67,7 +67,6 @@ extern void *kmem_cache_zalloc(struct kmem_cache *, gfp_t); extern void kmem_cache_free(kmem_cache_t *, void *); extern unsigned int kmem_cache_size(kmem_cache_t *); extern const char *kmem_cache_name(kmem_cache_t *); -extern kmem_cache_t *kmem_find_general_cachep(size_t size, gfp_t gfpflags); /* Size description struct for general caches. */ struct cache_sizes { @@ -203,7 +202,30 @@ extern int slab_is_available(void); #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node(kmem_cache_t *, gfp_t flags, int node); -extern void *kmalloc_node(size_t size, gfp_t flags, int node); +extern void *__kmalloc_node(size_t size, gfp_t flags, int node); + +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size)) { + int i = 0; +#define CACHE(x) \ + if (size <= x) \ + goto found; \ + else \ + i++; +#include "kmalloc_sizes.h" +#undef CACHE + { + extern void __you_cannot_kmalloc_that_much(void); + __you_cannot_kmalloc_that_much(); + } +found: + return kmem_cache_alloc_node((flags & GFP_DMA) ? + malloc_sizes[i].cs_dmacachep : + malloc_sizes[i].cs_cachep, flags, node); + } + return __kmalloc_node(size, flags, node); +} #else static inline void *kmem_cache_alloc_node(kmem_cache_t *cachep, gfp_t flags, int node) { @@ -223,7 +245,6 @@ extern int FASTCALL(kmem_ptr_validate(kmem_cache_t *cachep, void *ptr)); /* SLOB allocator routines */ void kmem_cache_init(void); -struct kmem_cache *kmem_find_general_cachep(size_t, gfp_t gfpflags); struct kmem_cache *kmem_cache_create(const char *c, size_t, size_t, unsigned long, void (*)(void *, struct kmem_cache *, unsigned long), @@ -263,8 +284,6 @@ extern kmem_cache_t *fs_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; -extern atomic_t slab_reclaim_pages; - #endif /* __KERNEL__ */ #endif /* _LINUX_SLAB_H */ diff --git a/include/linux/smp.h b/include/linux/smp.h index 837e8bce134..51649987f69 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -53,6 +53,9 @@ extern void smp_cpus_done(unsigned int max_cpus); */ int smp_call_function(void(*func)(void *info), void *info, int retry, int wait); +int smp_call_function_single(int cpuid, void (*func) (void *info), void *info, + int retry, int wait); + /* * Call a function on all processors */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 96e31aa64cc..b1237f16ecd 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -10,29 +10,11 @@ #include <linux/pm.h> /* page backup entry */ -typedef struct pbe { +struct pbe { unsigned long address; /* address of the copy */ unsigned long orig_address; /* original address of page */ struct pbe *next; -} suspend_pagedir_t; - -#define for_each_pbe(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = pbe->next) - -#define PBES_PER_PAGE (PAGE_SIZE/sizeof(struct pbe)) -#define PB_PAGE_SKIP (PBES_PER_PAGE-1) - -#define for_each_pb_page(pbe, pblist) \ - for (pbe = pblist ; pbe ; pbe = (pbe+PB_PAGE_SKIP)->next) - - -#define SWAP_FILENAME_MAXLENGTH 32 - - -extern dev_t swsusp_resume_device; - -/* mm/vmscan.c */ -extern int shrink_mem(void); +}; /* mm/page_alloc.c */ extern void drain_local_pages(void); @@ -53,18 +35,10 @@ static inline void pm_restore_console(void) {} static inline int software_suspend(void) { printk("Warning: fake suspend called\n"); - return -EPERM; + return -ENOSYS; } #endif /* CONFIG_PM */ -#ifdef CONFIG_SUSPEND_SMP -extern void disable_nonboot_cpus(void); -extern void enable_nonboot_cpus(void); -#else -static inline void disable_nonboot_cpus(void) {} -static inline void enable_nonboot_cpus(void) {} -#endif - void save_processor_state(void); void restore_processor_state(void); struct saved_context; diff --git a/include/linux/swap.h b/include/linux/swap.h index 5e59184c909..e7c36ba2a2d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -10,6 +10,10 @@ #include <asm/atomic.h> #include <asm/page.h> +struct notifier_block; + +struct bio; + #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 @@ -156,13 +160,14 @@ struct swap_list_t { /* linux/mm/oom_kill.c */ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order); +extern int register_oom_notifier(struct notifier_block *nb); +extern int unregister_oom_notifier(struct notifier_block *nb); /* linux/mm/memory.c */ extern void swapin_readahead(swp_entry_t, unsigned long, struct vm_area_struct *); /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; -extern unsigned long totalhigh_pages; extern unsigned long totalreserve_pages; extern long nr_swap_pages; extern unsigned int nr_free_pages(void); @@ -190,6 +195,7 @@ extern long vm_total_pages; #ifdef CONFIG_NUMA extern int zone_reclaim_mode; extern int sysctl_min_unmapped_ratio; +extern int sysctl_min_slab_ratio; extern int zone_reclaim(struct zone *, gfp_t, unsigned int); #else #define zone_reclaim_mode 0 @@ -212,7 +218,9 @@ extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); /* linux/mm/page_io.c */ extern int swap_readpage(struct file *, struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int rw_swap_page_sync(int, swp_entry_t, struct page *); +extern int rw_swap_page_sync(int rw, swp_entry_t entry, struct page *page, + struct bio **bio_chain); +extern int end_swap_bio_read(struct bio *bio, unsigned int bytes_done, int err); /* linux/mm/swap_state.c */ extern struct address_space swapper_space; diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 432778446ad..1b24bd45e08 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -193,6 +193,7 @@ enum VM_MIN_UNMAPPED=32, /* Set min percent of unmapped pages */ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */ + VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */ }; diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 1ea5d3cda6a..6d5c43d31de 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -10,6 +10,7 @@ #ifndef _SYSFS_H_ #define _SYSFS_H_ +#include <linux/compiler.h> #include <asm/atomic.h> struct kobject; @@ -86,40 +87,44 @@ struct sysfs_dirent { #ifdef CONFIG_SYSFS -extern int +extern int __must_check sysfs_create_dir(struct kobject *); extern void sysfs_remove_dir(struct kobject *); -extern int +extern int __must_check sysfs_rename_dir(struct kobject *, const char *new_name); -extern int +extern int __must_check sysfs_create_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_update_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode); extern void sysfs_remove_file(struct kobject *, const struct attribute *); -extern int +extern int __must_check sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name); extern void sysfs_remove_link(struct kobject *, const char * name); -int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr); -int sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr); +int __must_check sysfs_create_bin_file(struct kobject *kobj, + struct bin_attribute *attr); +void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr); -int sysfs_create_group(struct kobject *, const struct attribute_group *); +int __must_check sysfs_create_group(struct kobject *, + const struct attribute_group *); void sysfs_remove_group(struct kobject *, const struct attribute_group *); void sysfs_notify(struct kobject * k, char *dir, char *attr); +extern int __must_check sysfs_init(void); + #else /* CONFIG_SYSFS */ static inline int sysfs_create_dir(struct kobject * k) @@ -191,6 +196,11 @@ static inline void sysfs_notify(struct kobject * k, char *dir, char *attr) { } +static inline int __must_check sysfs_init(void) +{ + return 0; +} + #endif /* CONFIG_SYSFS */ #endif /* _SYSFS_H_ */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 71b6363caaa..dee88c6b6fa 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -44,8 +44,6 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot); -extern void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, - pgprot_t prot, int node); extern void vfree(void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 2d9b1b60798..176c7f79733 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -18,7 +18,19 @@ * generated will simply be the increment of a global address. */ -#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH +#ifdef CONFIG_ZONE_DMA32 +#define DMA32_ZONE(xx) xx##_DMA32, +#else +#define DMA32_ZONE(xx) +#endif + +#ifdef CONFIG_HIGHMEM +#define HIGHMEM_ZONE(xx) , xx##_HIGH +#else +#define HIGHMEM_ZONE(xx) +#endif + +#define FOR_ALL_ZONES(xx) xx##_DMA, DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), @@ -124,12 +136,10 @@ static inline unsigned long node_page_state(int node, struct zone *zones = NODE_DATA(node)->node_zones; return -#ifndef CONFIG_DMA_IS_NORMAL -#if !defined(CONFIG_DMA_IS_DMA32) && BITS_PER_LONG >= 64 +#ifdef CONFIG_ZONE_DMA32 zone_page_state(&zones[ZONE_DMA32], item) + #endif zone_page_state(&zones[ZONE_NORMAL], item) + -#endif #ifdef CONFIG_HIGHMEM zone_page_state(&zones[ZONE_HIGHMEM], item) + #endif diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 0422036af4e..56a23a0e7f2 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -116,6 +116,7 @@ int sync_page_range(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); int sync_page_range_nolock(struct inode *inode, struct address_space *mapping, loff_t pos, loff_t count); +void set_page_dirty_balance(struct page *page); /* pdflush.c */ extern int nr_pdflush_threads; /* Global so it can be exported to sysctl |