From e00320875d0cc5f8099a7227b2f25fbb3231268d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 08:48:23 +0100 Subject: x86: fix stackprotector canary updates during context switches fix a bug noticed and fixed by pageexec@freemail.hu. if built with -fstack-protector-all then we'll have canary checks built into the __switch_to() function. That does not work well with the canary-switching code there: while we already use the %rsp of the new task, we still call __switch_to() whith the previous task's canary value in the PDA, hence the __switch_to() ssp prologue instructions will store the previous canary. Then we update the PDA and upon return from __switch_to() the canary check triggers and we panic. so update the canary after we have called __switch_to(), where we are at the same stackframe level as the last stackframe of the next (and now freshly current) task. Note: this means that we call __switch_to() [and its sub-functions] still with the old canary, but that is not a problem, both the previous and the next task has a high-quality canary. The only (mostly academic) disadvantage is that the canary of one task may leak onto the stack of another task, increasing the risk of information leaks, were an attacker able to read the stack of specific tasks (but not that of others). To solve this we'll have to reorganize the way we switch tasks, and move the PDA setting into the switch_to() assembly code. That will happen in another patch. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 5395a6176f4..d6a51515878 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1096,10 +1096,9 @@ struct task_struct { pid_t pid; pid_t tgid; -#ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary; -#endif + /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with -- cgit v1.2.3 From 9b5609fd773e6ac0b1d6d6e1bf68f32cca64e06b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:41:09 +0100 Subject: stackprotector: include files create for core kernel files to include. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/stackprotector.h | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 include/linux/stackprotector.h (limited to 'include/linux') diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h new file mode 100644 index 00000000000..d3e8bbe602f --- /dev/null +++ b/include/linux/stackprotector.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_STACKPROTECTOR_H +#define _LINUX_STACKPROTECTOR_H 1 + +#ifdef CONFIG_CC_STACKPROTECTOR +# include +#endif + +#endif -- cgit v1.2.3 From 18aa8bb12dcb10adc3d7c9d69714d53667c0ab7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:42:02 +0100 Subject: stackprotector: add boot_init_stack_canary() add the boot_init_stack_canary() and make the secondary idle threads use it. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/stackprotector.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index d3e8bbe602f..422e71aafd0 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -3,6 +3,10 @@ #ifdef CONFIG_CC_STACKPROTECTOR # include +#else +static inline void boot_init_stack_canary(void) +{ +} #endif #endif -- cgit v1.2.3 From 420594296838fdc9a674470d710cda7d1487f9f4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 14 Feb 2008 09:44:08 +0100 Subject: x86: fix the stackprotector canary of the boot CPU Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/stackprotector.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h index 422e71aafd0..6f3e54c704c 100644 --- a/include/linux/stackprotector.h +++ b/include/linux/stackprotector.h @@ -1,6 +1,10 @@ #ifndef _LINUX_STACKPROTECTOR_H #define _LINUX_STACKPROTECTOR_H 1 +#include +#include +#include + #ifdef CONFIG_CC_STACKPROTECTOR # include #else -- cgit v1.2.3 From 7c9f8861e6c9c839f913e49b98c3854daca18f27 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 22 Apr 2008 16:38:23 -0500 Subject: stackprotector: use canary at end of stack to indicate overruns at oops time (Updated with a common max-stack-used checker that knows about the canary, as suggested by Joe Perches) Use a canary at the end of the stack to clearly indicate at oops time whether the stack has ever overflowed. This is a very simple implementation with a couple of drawbacks: 1) a thread may legitimately use exactly up to the last word on the stack -- but the chances of doing this and then oopsing later seem slim 2) it's possible that the stack usage isn't dense enough that the canary location could get skipped over -- but the worst that happens is that we don't flag the overrun -- though this happens fairly often in my testing :( With the code in place, an intentionally-bloated stack oops might do: BUG: unable to handle kernel paging request at ffff8103f84cc680 IP: [] update_curr+0x9a/0xa8 PGD 8063 PUD 0 Thread overran stack or stack corrupted Oops: 0000 [1] SMP CPU 0 ... ... unless the stack overrun is so bad that it corrupts some other thread. Signed-off-by: Eric Sandeen Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/magic.h | 1 + include/linux/sched.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/magic.h b/include/linux/magic.h index 1fa0c2ce4de..74e68e20116 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -42,4 +42,5 @@ #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA +#define STACK_END_MAGIC 0x57AC6E9D #endif /* __LINUX_MAGIC_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d6a51515878..c5181e77f30 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1969,6 +1969,19 @@ static inline unsigned long *end_of_stack(struct task_struct *p) extern void thread_info_cache_init(void); +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long stack_not_used(struct task_struct *p) +{ + unsigned long *n = end_of_stack(p); + + do { /* Skip over canary */ + n++; + } while (!*n); + + return (unsigned long)n - (unsigned long)end_of_stack(p); +} +#endif + /* set thread flags in other task's structures * - see asm/thread_info.h for TIF_xxxx flags available */ -- cgit v1.2.3 From bc22c17e12c130dc929218a95aa347e0f3fd05dc Mon Sep 17 00:00:00 2001 From: Alain Knaff Date: Sun, 4 Jan 2009 22:46:16 +0100 Subject: bzip2/lzma: library support for gzip, bzip2 and lzma decompression Impact: Replaces inflate.c with a wrapper around zlib_inflate; new library code This is the first part of the bzip2/lzma patch The bzip patch is based on an idea by Christian Ludwig, includes support for compressing the kernel with bzip2 or lzma rather than gzip. Both compressors give smaller sizes than gzip. Lzma's decompresses faster than bzip2. It also supports ramdisks and initramfs' compressed using these two compressors. The functionality has been successfully used for a couple of years by the udpcast project This version applies to "tip" kernel 2.6.28 This part contains: - changed inflate.c to accomodate rest of patch - implementation of bzip2 compression (not used at this stage yet) - implementation of lzma compression (not used at this stage yet) - Makefile routines to support bzip2 and lzma kernel compression Signed-off-by: Alain Knaff Signed-off-by: H. Peter Anvin --- include/linux/decompress/bunzip2.h | 10 +++++ include/linux/decompress/generic.h | 30 +++++++++++++ include/linux/decompress/inflate.h | 13 ++++++ include/linux/decompress/mm.h | 87 ++++++++++++++++++++++++++++++++++++++ include/linux/decompress/unlzma.h | 12 ++++++ 5 files changed, 152 insertions(+) create mode 100644 include/linux/decompress/bunzip2.h create mode 100644 include/linux/decompress/generic.h create mode 100644 include/linux/decompress/inflate.h create mode 100644 include/linux/decompress/mm.h create mode 100644 include/linux/decompress/unlzma.h (limited to 'include/linux') diff --git a/include/linux/decompress/bunzip2.h b/include/linux/decompress/bunzip2.h new file mode 100644 index 00000000000..115272137a9 --- /dev/null +++ b/include/linux/decompress/bunzip2.h @@ -0,0 +1,10 @@ +#ifndef DECOMPRESS_BUNZIP2_H +#define DECOMPRESS_BUNZIP2_H + +int bunzip2(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error)(char *x)); +#endif diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h new file mode 100644 index 00000000000..f847f514f78 --- /dev/null +++ b/include/linux/decompress/generic.h @@ -0,0 +1,30 @@ +#ifndef DECOMPRESS_GENERIC_H +#define DECOMPRESS_GENERIC_H + +/* Minimal chunksize to be read. + *Bzip2 prefers at least 4096 + *Lzma prefers 0x10000 */ +#define COMPR_IOBUF_SIZE 4096 + +typedef int (*decompress_fn) (unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*writebb)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x)); + +/* inbuf - input buffer + *len - len of pre-read data in inbuf + *fill - function to fill inbuf if empty + *writebb - function to write out outbug + *posp - if non-null, input position (number of bytes read) will be + * returned here + * + *If len != 0, the inbuf is initialized (with as much data), and fill + *should not be called + *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE + *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE + */ + + +#endif diff --git a/include/linux/decompress/inflate.h b/include/linux/decompress/inflate.h new file mode 100644 index 00000000000..f9b06ccc3e5 --- /dev/null +++ b/include/linux/decompress/inflate.h @@ -0,0 +1,13 @@ +#ifndef INFLATE_H +#define INFLATE_H + +/* Other housekeeping constants */ +#define INBUFSIZ 4096 + +int gunzip(unsigned char *inbuf, int len, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *pos, + void(*error_fn)(char *x)); +#endif diff --git a/include/linux/decompress/mm.h b/include/linux/decompress/mm.h new file mode 100644 index 00000000000..12ff8c3f1d0 --- /dev/null +++ b/include/linux/decompress/mm.h @@ -0,0 +1,87 @@ +/* + * linux/compr_mm.h + * + * Memory management for pre-boot and ramdisk uncompressors + * + * Authors: Alain Knaff + * + */ + +#ifndef DECOMPR_MM_H +#define DECOMPR_MM_H + +#ifdef STATIC + +/* Code active when included from pre-boot environment: */ + +/* A trivial malloc implementation, adapted from + * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994 + */ +static unsigned long malloc_ptr; +static int malloc_count; + +static void *malloc(int size) +{ + void *p; + + if (size < 0) + error("Malloc error"); + if (!malloc_ptr) + malloc_ptr = free_mem_ptr; + + malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ + + p = (void *)malloc_ptr; + malloc_ptr += size; + + if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr) + error("Out of memory"); + + malloc_count++; + return p; +} + +static void free(void *where) +{ + malloc_count--; + if (!malloc_count) + malloc_ptr = free_mem_ptr; +} + +#define large_malloc(a) malloc(a) +#define large_free(a) free(a) + +#define set_error_fn(x) + +#define INIT + +#else /* STATIC */ + +/* Code active when compiled standalone for use when loading ramdisk: */ + +#include +#include +#include +#include + +/* Use defines rather than static inline in order to avoid spurious + * warnings when not needed (indeed large_malloc / large_free are not + * needed by inflate */ + +#define malloc(a) kmalloc(a, GFP_KERNEL) +#define free(a) kfree(a) + +#define large_malloc(a) vmalloc(a) +#define large_free(a) vfree(a) + +static void(*error)(char *m); +#define set_error_fn(x) error = x; + +#define INIT __init +#define STATIC + +#include + +#endif /* STATIC */ + +#endif /* DECOMPR_MM_H */ diff --git a/include/linux/decompress/unlzma.h b/include/linux/decompress/unlzma.h new file mode 100644 index 00000000000..7796538f1bf --- /dev/null +++ b/include/linux/decompress/unlzma.h @@ -0,0 +1,12 @@ +#ifndef DECOMPRESS_UNLZMA_H +#define DECOMPRESS_UNLZMA_H + +int unlzma(unsigned char *, int, + int(*fill)(void*, unsigned int), + int(*flush)(void*, unsigned int), + unsigned char *output, + int *posp, + void(*error)(char *x) + ); + +#endif -- cgit v1.2.3 From 889c92d21db40be0b7d22a59395060237895bb85 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 8 Jan 2009 15:14:17 -0800 Subject: bzip2/lzma: centralize format detection Centralize the compression format detection to a common routine in the lib directory, and use it for both initramfs and initrd. Signed-off-by: H. Peter Anvin --- include/linux/decompress/generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h index f847f514f78..6dfb856327b 100644 --- a/include/linux/decompress/generic.h +++ b/include/linux/decompress/generic.h @@ -26,5 +26,8 @@ typedef int (*decompress_fn) (unsigned char *inbuf, int len, *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE */ +/* Utility routine to detect the decompression method */ +decompress_fn decompress_method(const unsigned char *inbuf, int len, + const char **name); #endif -- cgit v1.2.3 From 7f7ace0cda64c99599c23785f8979a072e118058 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: cpumask: update irq_desc to use cpumask_var_t Impact: reduce memory usage, use new cpumask API. Replace the affinity and pending_masks with cpumask_var_t's. This adds to the significant size reduction done with the SPARSE_IRQS changes. The added functions (init_alloc_desc_masks & init_copy_desc_masks) are in the include file so they can be inlined (and optimized out for the !CONFIG_CPUMASKS_OFFSTACK case.) [Naming chosen to be consistent with the other init*irq functions, as well as the backwards arg declaration of "from, to" instead of the more common "to, from" standard.] Includes a slight change to the declaration of struct irq_desc to embed the pending_mask within ifdef(CONFIG_SMP) to be consistent with other references, and some small changes to Xen. Tested: sparse/non-sparse/cpumask_offstack/non-cpumask_offstack/nonuma/nosmp on x86_64 Signed-off-by: Mike Travis Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: KOSAKI Motohiro Cc: Venkatesh Pallipadi Cc: virtualization@lists.osdl.org Cc: xen-devel@lists.xensource.com Cc: Yinghai Lu --- include/linux/irq.h | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index f899b502f18..fa27210f1df 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -182,11 +182,11 @@ struct irq_desc { unsigned int irqs_unhandled; spinlock_t lock; #ifdef CONFIG_SMP - cpumask_t affinity; + cpumask_var_t affinity; unsigned int cpu; -#endif #ifdef CONFIG_GENERIC_PENDING_IRQ - cpumask_t pending_mask; + cpumask_var_t pending_mask; +#endif #endif #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; @@ -422,4 +422,79 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); #endif /* !CONFIG_S390 */ +#ifdef CONFIG_SMP +/** + * init_alloc_desc_masks - allocate cpumasks for irq_desc + * @desc: pointer to irq_desc struct + * @boot: true if need bootmem + * + * Allocates affinity and pending_mask cpumask if required. + * Returns true if successful (or not required). + * Side effect: affinity has all bits set, pending_mask has all bits clear. + */ +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + if (boot) { + alloc_bootmem_cpumask_var(&desc->affinity); + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + alloc_bootmem_cpumask_var(&desc->pending_mask); + cpumask_clear(desc->pending_mask); +#endif + return true; + } + + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) + return false; + cpumask_setall(desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { + free_cpumask_var(desc->affinity); + return false; + } + cpumask_clear(desc->pending_mask); +#endif + return true; +} + +/** + * init_copy_desc_masks - copy cpumasks for irq_desc + * @old_desc: pointer to old irq_desc struct + * @new_desc: pointer to new irq_desc struct + * + * Insures affinity and pending_masks are copied to new irq_desc. + * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the + * irq_desc struct so the copy is redundant. + */ + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +#ifdef CONFIG_CPUMASKS_OFFSTACK + cpumask_copy(new_desc->affinity, old_desc->affinity); + +#ifdef CONFIG_GENERIC_PENDING_IRQ + cpumask_copy(new_desc->pending_mask, old_desc->pending_mask); +#endif +#endif +} + +#else /* !CONFIG_SMP */ + +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, + bool boot) +{ + return true; +} + +static inline void init_copy_desc_masks(struct irq_desc *old_desc, + struct irq_desc *new_desc) +{ +} + +#endif /* CONFIG_SMP */ + #endif /* _LINUX_IRQ_H */ -- cgit v1.2.3 From fbd59a8d1f7cf325fdb6828659f1fb76631e87b3 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Sat, 10 Jan 2009 21:58:08 -0800 Subject: cpumask: Use topology_core_cpumask()/topology_thread_cpumask() Impact: reduce stack usage, use new cpumask API. This actually uses topology_core_cpumask() and topology_thread_cpumask(), removing the only users of topology_core_siblings() and topology_thread_siblings() Signed-off-by: Rusty Russell Signed-off-by: Mike Travis Cc: linux-net-drivers@solarflare.com --- include/linux/topology.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index e632d29f054..a16b9e06f2e 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -193,5 +193,11 @@ int arch_update_cpu_topology(void); #ifndef topology_core_siblings #define topology_core_siblings(cpu) cpumask_of_cpu(cpu) #endif +#ifndef topology_thread_cpumask +#define topology_thread_cpumask(cpu) cpumask_of(cpu) +#endif +#ifndef topology_core_cpumask +#define topology_core_cpumask(cpu) cpumask_of(cpu) +#endif #endif /* _LINUX_TOPOLOGY_H */ -- cgit v1.2.3 From 802bf931f2688ad125b73db597ce63cc842fb27a Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 21:58:09 -0800 Subject: cpumask: fix bug in use cpumask_var_t in irq_desc Impact: fix bug where new irq_desc uses old cpumask pointers which are freed. As Yinghai pointed out, init_copy_one_irq_desc() copies the old desc to the new desc overwriting the cpumask pointers. Since the old_desc and the cpumask pointers are freed, then memory corruption will occur if these old pointers are used. Move the allocation of these pointers to after the copy. Signed-off-by: Mike Travis Cc: Yinghai Lu --- include/linux/irq.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index fa27210f1df..27a67536511 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -426,15 +426,18 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); /** * init_alloc_desc_masks - allocate cpumasks for irq_desc * @desc: pointer to irq_desc struct + * @cpu: cpu which will be handling the cpumasks * @boot: true if need bootmem * * Allocates affinity and pending_mask cpumask if required. * Returns true if successful (or not required). * Side effect: affinity has all bits set, pending_mask has all bits clear. */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { + int node; + if (boot) { alloc_bootmem_cpumask_var(&desc->affinity); cpumask_setall(desc->affinity); @@ -446,6 +449,8 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, return true; } + node = cpu_to_node(cpu); + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; cpumask_setall(desc->affinity); @@ -484,7 +489,7 @@ static inline void init_copy_desc_masks(struct irq_desc *old_desc, #else /* !CONFIG_SMP */ -static inline bool init_alloc_desc_masks(struct irq_desc *desc, int node, +static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu, bool boot) { return true; -- cgit v1.2.3 From 9332fccdedf8e09448f3b69b624211ae879f6c45 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sat, 10 Jan 2009 22:24:07 -0800 Subject: irq: initialize nr_irqs based on nr_cpu_ids Impact: Reduce memory usage. This is the second half of the changes to make the irq_desc_ptrs be variable sized based on nr_cpu_ids. This is done by adding a new "max_nr_irqs" macro to irq_vectors.h (and a dummy in irqnr.h) to return a max NR_IRQS value based on NR_CPUS or nr_cpu_ids. This necessitated moving the define of MAX_IO_APICS to a separate file (asm/apicnum.h) so it could be included without the baggage of the other asm/apicdef.h declarations. Signed-off-by: Mike Travis --- include/linux/irqnr.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index 86af92e9e84..de66e4e1040 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -20,11 +20,18 @@ # define for_each_irq_desc_reverse(irq, desc) \ for (irq = nr_irqs - 1; irq >= 0; irq--) + #else /* CONFIG_GENERIC_HARDIRQS */ +#include /* need possible max_nr_irqs() */ + extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); +# ifndef max_nr_irqs +# define max_nr_irqs(nr_cpus) NR_IRQS +# endif + # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ -- cgit v1.2.3 From 92296c6d6e908c35fca287a21af27be814af9c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Sun, 11 Jan 2009 09:22:58 -0800 Subject: cpumask, irq: non-x86 build failures Ingo Molnar wrote: > All non-x86 architectures fail to build: > > In file included from /home/mingo/tip/include/linux/random.h:11, > from /home/mingo/tip/include/linux/stackprotector.h:6, > from /home/mingo/tip/init/main.c:17: > /home/mingo/tip/include/linux/irqnr.h:26:63: error: asm/irq_vectors.h: No such file or directory Do not include asm/irq_vectors.h in generic code - it's not available on all architectures. Signed-off-by: Ingo Molnar --- include/linux/irqnr.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h index de66e4e1040..887477bc2ab 100644 --- a/include/linux/irqnr.h +++ b/include/linux/irqnr.h @@ -23,15 +23,9 @@ #else /* CONFIG_GENERIC_HARDIRQS */ -#include /* need possible max_nr_irqs() */ - extern int nr_irqs; extern struct irq_desc *irq_to_desc(unsigned int irq); -# ifndef max_nr_irqs -# define max_nr_irqs(nr_cpus) NR_IRQS -# endif - # define for_each_irq_desc(irq, desc) \ for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs; \ irq++, desc = irq_to_desc(irq)) \ -- cgit v1.2.3 From 4a046d1754ee6ebb6f399696805ed61ea0444d4c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 12 Jan 2009 17:39:24 -0800 Subject: x86: arch_probe_nr_irqs Impact: save RAM with large NR_CPUS, get smaller nr_irqs Signed-off-by: Yinghai Lu Signed-off-by: Mike Travis --- include/linux/interrupt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9127f6b51a3..472f11765f6 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -467,6 +467,7 @@ int show_interrupts(struct seq_file *p, void *v); struct irq_desc; extern int early_irq_init(void); +extern int arch_probe_nr_irqs(void); extern int arch_early_irq_init(void); extern int arch_init_chip_data(struct irq_desc *desc, int cpu); -- cgit v1.2.3 From 0bd74fa8e29dcad98f7e8ffe01ec05fb3326abaf Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Mon, 19 Jan 2009 12:21:27 +0900 Subject: percpu: refactor percpu.h Impact: cleanup Refactor the DEFINE_PER_CPU_* macros and add .data.percpu.first section. Signed-off-by: Brian Gerst Signed-off-by: Tejun Heo --- include/linux/percpu.h | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9f2a3751873..0e24202b5a4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -9,34 +9,39 @@ #include #ifdef CONFIG_SMP -#define DEFINE_PER_CPU(type, name) \ - __attribute__((__section__(".data.percpu"))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name +#define PER_CPU_BASE_SECTION ".data.percpu" #ifdef MODULE -#define SHARED_ALIGNED_SECTION ".data.percpu" +#define PER_CPU_SHARED_ALIGNED_SECTION "" #else -#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" +#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned" #endif +#define PER_CPU_FIRST_SECTION ".first" -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ - ____cacheline_aligned_in_smp +#else + +#define PER_CPU_BASE_SECTION ".data" +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_FIRST_SECTION "" + +#endif -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - __attribute__((__section__(".data.percpu.page_aligned"))) \ +#define DEFINE_PER_CPU_SECTION(type, name, section) \ + __attribute__((__section__(PER_CPU_BASE_SECTION section))) \ PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name -#else + #define DEFINE_PER_CPU(type, name) \ - PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name + DEFINE_PER_CPU_SECTION(type, name, "") -#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) +#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \ + ____cacheline_aligned_in_smp -#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ - DEFINE_PER_CPU(type, name) -#endif +#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, ".page_aligned") + +#define DEFINE_PER_CPU_FIRST(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION) #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -- cgit v1.2.3 From 7e7f4eae28711fbb7f4d5e4b0aa3195776194bc1 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:40:10 +0530 Subject: headers_check fix: linux/coda_psdev.h fix the following 'make headers_check' warning: usr/include/linux/coda_psdev.h:90: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/coda_psdev.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 07ae8f84605..6f06352cf55 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -24,7 +24,7 @@ static inline struct venus_comm *coda_vcp(struct super_block *sb) return (struct venus_comm *)((sb)->s_fs_info); } - +#ifdef __KERNEL__ /* upcalls */ int venus_rootfid(struct super_block *sb, struct CodaFid *fidp); int venus_getattr(struct super_block *sb, struct CodaFid *fid, @@ -64,6 +64,12 @@ int coda_downcall(int opcode, union outputArgs *out, struct super_block *sb); int venus_fsync(struct super_block *sb, struct CodaFid *fid); int venus_statfs(struct dentry *dentry, struct kstatfs *sfs); +/* + * Statistics + */ + +extern struct venus_comm coda_comms[]; +#endif /* __KERNEL__ */ /* messages between coda filesystem in kernel and Venus */ struct upc_req { @@ -82,11 +88,4 @@ struct upc_req { #define REQ_WRITE 0x4 #define REQ_ABORT 0x8 - -/* - * Statistics - */ - -extern struct venus_comm coda_comms[]; - #endif -- cgit v1.2.3 From 25d00fddf8d23234da2d45c051a14450939496d6 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:40:58 +0530 Subject: headers_check fix: linux/in6.h fix the following 'make headers_check' warnings: usr/include/linux/in6.h:47: extern's make no sense in userspace usr/include/linux/in6.h:49: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/in6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index bc492048c34..718bf21c575 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -44,11 +44,11 @@ struct in6_addr * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined * in network byte order, not in host byte order as are the IPv4 equivalents */ +#ifdef __KERNEL__ extern const struct in6_addr in6addr_any; #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } } extern const struct in6_addr in6addr_loopback; #define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } -#ifdef __KERNEL__ extern const struct in6_addr in6addr_linklocal_allnodes; #define IN6ADDR_LINKLOCAL_ALLNODES_INIT \ { { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } } -- cgit v1.2.3 From 9fe03bc3139503fbad66016bf714f4575babf651 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:41:41 +0530 Subject: headers_check fix: linux/nubus.h fix the following 'make headers_check' warnings: usr/include/linux/nubus.h:297: extern's make no sense in userspace usr/include/linux/nubus.h:299: extern's make no sense in userspace usr/include/linux/nubus.h:303: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/nubus.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 7382af37473..9be57d99269 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -296,6 +296,7 @@ struct nubus_dev { struct nubus_board* board; }; +#ifdef __KERNEL__ /* This is all NuBus devices (used to find devices later on) */ extern struct nubus_dev* nubus_devices; /* This is all NuBus cards */ @@ -351,6 +352,7 @@ void nubus_get_rsrc_mem(void* dest, void nubus_get_rsrc_str(void* dest, const struct nubus_dirent *dirent, int maxlen); +#endif /* __KERNEL__ */ /* We'd like to get rid of this eventually. Only daynaport.c uses it now. */ static inline void *nubus_slot_addr(int slot) -- cgit v1.2.3 From 7d7dc0d6b0565484e0623cb08b5dcdd56424697b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:44:09 +0530 Subject: headers_check fix: linux/socket.h fix the following 'make headers_check' warning: usr/include/linux/socket.h:29: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/socket.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/socket.h b/include/linux/socket.h index 20fc4bbfca4..afc01909a42 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -24,10 +24,12 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ -#ifdef CONFIG_PROC_FS +#ifdef __KERNEL__ +# ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); -#endif +# endif +#endif /* __KERNEL__ */ typedef unsigned short sa_family_t; -- cgit v1.2.3 From 11d9f653aff1d445b4300ae1d2e2d675a0e9172f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 2 Feb 2009 21:45:41 +0530 Subject: headers_check fix: linux/reinserfs_fs.h fix the following 'make headers_check' warnings: usr/include/linux/reiserfs_fs.h:687: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:995: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:997: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1467: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1760: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1764: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1766: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1769: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1771: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1805: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1948: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1949: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1950: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1951: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1962: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1963: extern's make no sense in userspace usr/include/linux/reiserfs_fs.h:1964: extern's make no sense in userspace Signed-off-by: Jaswinder Singh Rajput --- include/linux/reiserfs_fs.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index bc5114d35e9..a4db55fd1f6 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -698,7 +698,9 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key) /* object identifier for root dir */ #define REISERFS_ROOT_OBJECTID 2 #define REISERFS_ROOT_PARENT_OBJECTID 1 +#ifdef __KERNEL__ extern struct reiserfs_key root_key; +#endif /* __KERNEL__ */ /* * Picture represents a leaf of the S+tree @@ -1006,10 +1008,12 @@ struct reiserfs_de_head { #define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) +#ifdef __KERNEL__ extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); +#endif /* __KERNEL__ */ /* array of the entry headers */ /* get item body */ @@ -1478,7 +1482,9 @@ struct item_operations { void (*print_vi) (struct virtual_item * vi); }; +#ifdef __KERNEL__ extern struct item_operations *item_ops[TYPE_ANY + 1]; +#endif /* __KERNEL__ */ #define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) #define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) @@ -1679,6 +1685,7 @@ struct reiserfs_transaction_handle { struct list_head t_list; }; +#ifdef __KERNEL__ /* used to keep track of ordered and tail writes, attached to the buffer * head through b_journal_head. */ @@ -2203,4 +2210,5 @@ int reiserfs_unpack(struct inode *inode, struct file *filp); /* xattr stuff */ #define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem) +#endif /* __KERNEL__ */ #endif /* _LINUX_REISER_FS_H */ -- cgit v1.2.3 From f2cddb29ebfc02dfd2c4b439aa0433393ad15575 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 3 Feb 2009 19:21:38 +0530 Subject: headers_check fix cleanup: linux/coda_psdev.h These are only for kernel internals as pointed by Arnd Bergmann: struct kstatfs struct venus_comm coda_vcp() Signed-off-by: Jaswinder Singh Rajput --- include/linux/coda_psdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 6f06352cf55..5b5d4731f95 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -6,6 +6,7 @@ #define CODA_PSDEV_MAJOR 67 #define MAX_CODADEVS 5 /* how many do we allow */ +#ifdef __KERNEL__ struct kstatfs; /* communication pending/processing queues */ @@ -24,7 +25,6 @@ static inline struct venus_comm *coda_vcp(struct super_block *sb) return (struct venus_comm *)((sb)->s_fs_info); } -#ifdef __KERNEL__ /* upcalls */ int venus_rootfid(struct super_block *sb, struct CodaFid *fidp); int venus_getattr(struct super_block *sb, struct CodaFid *fid, -- cgit v1.2.3 From 5007b1fc4ef2c1b496536b2f026353c1d44d92ef Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 3 Feb 2009 19:28:24 +0530 Subject: headers_check fix cleanup: linux/nubus.h These are only for kernel internals as pointed by Arnd Bergmann: struct nubus_board struct nubus_dev Signed-off-by: Jaswinder Singh Rajput --- include/linux/nubus.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nubus.h b/include/linux/nubus.h index 9be57d99269..e137b3c486a 100644 --- a/include/linux/nubus.h +++ b/include/linux/nubus.h @@ -237,6 +237,7 @@ struct nubus_dirent int mask; }; +#ifdef __KERNEL__ struct nubus_board { struct nubus_board* next; struct nubus_dev* first_dev; @@ -296,7 +297,6 @@ struct nubus_dev { struct nubus_board* board; }; -#ifdef __KERNEL__ /* This is all NuBus devices (used to find devices later on) */ extern struct nubus_dev* nubus_devices; /* This is all NuBus cards */ -- cgit v1.2.3 From 750e1c18251345e662bb7e7062b5fd5c1ade36de Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Tue, 3 Feb 2009 19:40:03 +0530 Subject: headers_check fix cleanup: linux/reiserfs_fs.h Only REISERFS_IOC_* definitions are required for user space rest should be in #ifdef __KERNEL__ as pointed by Arnd Bergmann. Signed-off-by: Jaswinder Singh Rajput --- include/linux/reiserfs_fs.h | 62 ++++++++++++++++++++------------------------- 1 file changed, 28 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index a4db55fd1f6..e356c99f065 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -28,8 +28,6 @@ #include #endif -struct fid; - /* * include/linux/reiser_fs.h * @@ -37,6 +35,33 @@ struct fid; * */ +/* ioctl's command */ +#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) +/* define following flags to be the same as in ext2, so that chattr(1), + lsattr(1) will work with us. */ +#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS +#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS +#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION +#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION + +#ifdef __KERNEL__ +/* the 32 bit compat definitions with int argument */ +#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) +#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS +#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION +#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION + +/* Locking primitives */ +/* Right now we are still falling back to (un)lock_kernel, but eventually that + would evolve into real per-fs locks */ +#define reiserfs_write_lock( sb ) lock_kernel() +#define reiserfs_write_unlock( sb ) unlock_kernel() + +/* xattr stuff */ +#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem) +struct fid; + /* in reading the #defines, it may help to understand that they employ the following abbreviations: @@ -698,9 +723,8 @@ static inline void cpu_key_k_offset_dec(struct cpu_key *key) /* object identifier for root dir */ #define REISERFS_ROOT_OBJECTID 2 #define REISERFS_ROOT_PARENT_OBJECTID 1 -#ifdef __KERNEL__ + extern struct reiserfs_key root_key; -#endif /* __KERNEL__ */ /* * Picture represents a leaf of the S+tree @@ -1008,12 +1032,10 @@ struct reiserfs_de_head { #define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) -#ifdef __KERNEL__ extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); -#endif /* __KERNEL__ */ /* array of the entry headers */ /* get item body */ @@ -1482,9 +1504,7 @@ struct item_operations { void (*print_vi) (struct virtual_item * vi); }; -#ifdef __KERNEL__ extern struct item_operations *item_ops[TYPE_ANY + 1]; -#endif /* __KERNEL__ */ #define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) #define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) @@ -1546,7 +1566,6 @@ struct reiserfs_iget_args { /* FUNCTION DECLARATIONS */ /***************************************************************************/ -/*#ifdef __KERNEL__*/ #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) #define journal_trans_half(blocksize) \ @@ -1685,7 +1704,6 @@ struct reiserfs_transaction_handle { struct list_head t_list; }; -#ifdef __KERNEL__ /* used to keep track of ordered and tail writes, attached to the buffer * head through b_journal_head. */ @@ -2185,30 +2203,6 @@ long reiserfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); int reiserfs_unpack(struct inode *inode, struct file *filp); -/* ioctl's command */ -#define REISERFS_IOC_UNPACK _IOW(0xCD,1,long) -/* define following flags to be the same as in ext2, so that chattr(1), - lsattr(1) will work with us. */ -#define REISERFS_IOC_GETFLAGS FS_IOC_GETFLAGS -#define REISERFS_IOC_SETFLAGS FS_IOC_SETFLAGS -#define REISERFS_IOC_GETVERSION FS_IOC_GETVERSION -#define REISERFS_IOC_SETVERSION FS_IOC_SETVERSION - -/* the 32 bit compat definitions with int argument */ -#define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) -#define REISERFS_IOC32_GETFLAGS FS_IOC32_GETFLAGS -#define REISERFS_IOC32_SETFLAGS FS_IOC32_SETFLAGS -#define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION -#define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION - -/* Locking primitives */ -/* Right now we are still falling back to (un)lock_kernel, but eventually that - would evolve into real per-fs locks */ -#define reiserfs_write_lock( sb ) lock_kernel() -#define reiserfs_write_unlock( sb ) unlock_kernel() - -/* xattr stuff */ -#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem) #endif /* __KERNEL__ */ #endif /* _LINUX_REISER_FS_H */ -- cgit v1.2.3 From 65a4e574d2382d83f71b30ea92f86d2e40a6ef8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 03:36:17 +0100 Subject: smp, generic: introduce arch_disable_smp_support() instead of disable_ioapic_setup() Impact: cleanup disable_ioapic_setup() in init/main.c is ugly as the function is x86-specific. The #ifdef inline prototype there is ugly too. Replace it with a generic arch_disable_smp_support() function - which has a weak alias for non-x86 architectures and for non-ioapic x86 builds. Signed-off-by: Ingo Molnar --- include/linux/smp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 715196b09d6..d41a3a865fe 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,6 +66,12 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + /* * Call a function on all other processors */ -- cgit v1.2.3 From a146649bc19d5eba4f5bfac6720c5f252d517a71 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 31 Jan 2009 14:09:06 +0100 Subject: smp, generic: introduce arch_disable_smp_support(), build fix This function should be provided on UP too. Signed-off-by: Ingo Molnar --- include/linux/smp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index d41a3a865fe..bbacb7baa44 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -66,12 +66,6 @@ extern int __cpu_up(unsigned int cpunum); */ extern void smp_cpus_done(unsigned int max_cpus); -/* - * Callback to arch code if there's nosmp or maxcpus=0 on the - * boot command line: - */ -extern void arch_disable_smp_support(void); - /* * Call a function on all other processors */ @@ -182,6 +176,12 @@ static inline void init_call_single_data(void) #define put_cpu() preempt_enable() #define put_cpu_no_resched() preempt_enable_no_resched() +/* + * Callback to arch code if there's nosmp or maxcpus=0 on the + * boot command line: + */ +extern void arch_disable_smp_support(void); + void smp_setup_processor_id(void); #endif /* __LINUX_SMP_H */ -- cgit v1.2.3 From 527bdfee18ac6a4c026060c2c2b1144df9a5bf1f Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Fri, 6 Feb 2009 20:47:58 +0530 Subject: make linux/types.h as assembly safe Signed-off-by: Jaswinder Singh Rajput --- include/linux/types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 712ca53bc34..c30973ace89 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -1,6 +1,7 @@ #ifndef _LINUX_TYPES_H #define _LINUX_TYPES_H +#ifndef __ASSEMBLY__ #ifdef __KERNEL__ #define DECLARE_BITMAP(name,bits) \ @@ -212,5 +213,5 @@ struct ustat { }; #endif /* __KERNEL__ */ - +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ -- cgit v1.2.3 From 0fb807c3e573ff9de2965ca38c907605d4735d16 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Sun, 8 Feb 2009 11:00:25 +0530 Subject: unconditionally include asm/types.h from linux/types.h Reported-by: Sam Ravnborg Signed-off-by: Jaswinder Singh Rajput --- include/linux/types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index c30973ace89..fca82ed55f4 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -1,6 +1,8 @@ #ifndef _LINUX_TYPES_H #define _LINUX_TYPES_H +#include + #ifndef __ASSEMBLY__ #ifdef __KERNEL__ @@ -10,7 +12,6 @@ #endif #include -#include #ifndef __KERNEL_STRICT_NAMES -- cgit v1.2.3 From d3770449d3cb058b94ca1d050d5ced4a66c75ce4 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 8 Feb 2009 09:58:38 -0500 Subject: percpu: make PER_CPU_BASE_SECTION overridable by arches Impact: bug fix IA-64 needs to put percpu data in the seperate section even on UP. Fixes regression caused by "percpu: refactor percpu.h" Signed-off-by: Brian Gerst Acked-by: Tony Luck Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 0e24202b5a4..3577ffd90d4 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -8,8 +8,15 @@ #include +#ifndef PER_CPU_BASE_SECTION #ifdef CONFIG_SMP #define PER_CPU_BASE_SECTION ".data.percpu" +#else +#define PER_CPU_BASE_SECTION ".data" +#endif +#endif + +#ifdef CONFIG_SMP #ifdef MODULE #define PER_CPU_SHARED_ALIGNED_SECTION "" @@ -20,7 +27,6 @@ #else -#define PER_CPU_BASE_SECTION ".data" #define PER_CPU_SHARED_ALIGNED_SECTION "" #define PER_CPU_FIRST_SECTION "" -- cgit v1.2.3 From 7d97277b754d3ee098a5ec69b6aaafb00c94e2f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 7 Feb 2009 15:39:41 -0800 Subject: acpi/x86: introduce __apci_map_table, v4 to prevent wrongly overwriting fixmap that still want to use. ACPI used to rely on low mappings being all linearly mapped and grew a habit: it never really unmapped certain kinds of tables after use. This can cause problems - for example the hypothetical case when some spurious access still references it. v2: remove prev_map and prev_size in __apci_map_table v3: let acpi_os_unmap_memory() call early_iounmap too, so remove extral calling to early_acpi_os_unmap_memory v4: fix typo in one acpi_get_table_with_size calling Signed-off-by: Yinghai Lu Acked-by: Len Brown Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6fce2fc2d12..d59f0fa4d77 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); +void __init __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); -- cgit v1.2.3 From 6cd61c0baa8bce32271226198b46c67a7a05d108 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: elf: add ELF_CORE_COPY_KERNEL_REGS() ELF core dump is used for both user land core dump and kernel crash dump. Depending on architecture, register might need to be accessed differently for userland and kernel. Allow architectures to define ELF_CORE_COPY_KERNEL_REGS() and use different operation for kernel register dump. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/elfcore.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 5ca54d77079..7605c5e9589 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re #endif } +static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs) +{ +#ifdef ELF_CORE_COPY_KERNEL_REGS + ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs); +#else + elf_core_copy_regs(elfregs, regs); +#endif +} + static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) { #ifdef ELF_CORE_COPY_TASK_REGS -- cgit v1.2.3 From 970ec1a8213cd1a1ea29972ebbe4575a8b30bca1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 15 Feb 2009 14:06:13 -0800 Subject: [IA64] fix __apci_unmap_table Impact: fix build error to fix: tip/arch/ia64/kernel/acpi.c:203: error: conflicting types for '__acpi_unmap_table' tip/include/linux/acpi.h:82: error: previous declaration of '__acpi_unmap_table' was here tip/arch/ia64/kernel/acpi.c:203: error: conflicting types for '__acpi_unmap_table' tip/include/linux/acpi.h:82: error: previous declaration of '__acpi_unmap_table' was here Signed-off-by: Yinghai Lu Cc: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d59f0fa4d77..78199151c00 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -79,7 +79,7 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); char * __acpi_map_table (unsigned long phys_addr, unsigned long size); -void __init __acpi_unmap_table(char *map, unsigned long size); +void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); int acpi_boot_init (void); int acpi_boot_table_init (void); -- cgit v1.2.3 From b36128c830a8f5bd7d4981f5b0b69950f5928ee6 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: change percpu_ptr to per_cpu_ptr Impact: cleanup There are two allocated per-cpu accessor macros with almost identical spelling. The original and far more popular is per_cpu_ptr (44 files), so change over the other 4 files. tj: kill percpu_ptr() and update UP too Signed-off-by: Rusty Russell Cc: mingo@redhat.com Cc: lenb@kernel.org Cc: cpufreq@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/percpu.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3577ffd90d4..c80cfe1260e 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -81,23 +81,13 @@ struct percpu_data { }; #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define percpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); extern void percpu_free(void *__pdata); #else /* CONFIG_SMP */ -#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) { @@ -122,6 +112,15 @@ static inline void percpu_free(void *__pdata) cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) #define free_percpu(ptr) percpu_free((ptr)) -#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu)) +/* + * Use this to get to a cpu's version of the per-cpu object dynamically + * allocated. Non-atomic access to the current CPU's version should + * probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3 From 313e458f81ec3852106c5a83830fe0d4f405a71a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: add align argument to __alloc_percpu. This prepares for a real __alloc_percpu, by adding an alignment argument. Only one place uses __alloc_percpu directly, and that's for a string. tj: af_inet also uses __alloc_percpu(), update it. Signed-off-by: Rusty Russell Cc: Christoph Lameter Cc: Jens Axboe --- include/linux/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index c80cfe1260e..1fdaee93c04 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -108,9 +108,10 @@ static inline void percpu_free(void *__pdata) /* (legacy) interface for use without CPU hotplug handling */ -#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \ +#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ cpu_possible_map) -#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type)) +#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) #define free_percpu(ptr) percpu_free((ptr)) /* * Use this to get to a cpu's version of the per-cpu object dynamically -- cgit v1.2.3 From f2a8205c4ef1af917d175c36a4097ae5587791c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: kill percpu_alloc() and friends Impact: kill unused functions percpu_alloc() and its friends never saw much action. It was supposed to replace the cpu-mask unaware __alloc_percpu() but it never happened and in fact __percpu_alloc_mask() itself never really grew proper up/down handling interface either (no exported interface for populate/depopulate). percpu allocation is about to go through major reimplementation and there's no reason to carry this unused interface around. Replace it with __alloc_percpu() and free_percpu(). Signed-off-by: Tejun Heo --- include/linux/percpu.h | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1fdaee93c04..d99e24ae181 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -82,46 +82,43 @@ struct percpu_data { #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask); -extern void percpu_free(void *__pdata); +/* + * Use this to get to a cpu's version of the per-cpu object + * dynamically allocated. Non-atomic access to the current CPU's + * version should probably be combined with get_cpu()/put_cpu(). + */ +#define per_cpu_ptr(ptr, cpu) \ +({ \ + struct percpu_data *__p = __percpu_disguise(ptr); \ + (__typeof__(ptr))__p->ptrs[(cpu)]; \ +}) + +extern void *__alloc_percpu(size_t size, size_t align); +extern void free_percpu(void *__pdata); #else /* CONFIG_SMP */ #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); }) -static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask) +static inline void *__alloc_percpu(size_t size, size_t align) { + /* + * Can't easily make larger alignment work with kmalloc. WARN + * on it. Larger alignment should only be used for module + * percpu sections on SMP for which this path isn't used. + */ + WARN_ON_ONCE(align > __alignof__(unsigned long long)); return kzalloc(size, gfp); } -static inline void percpu_free(void *__pdata) +static inline void free_percpu(void *p) { - kfree(__pdata); + kfree(p); } #endif /* CONFIG_SMP */ -#define percpu_alloc_mask(size, gfp, mask) \ - __percpu_alloc_mask((size), (gfp), &(mask)) - -#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map) - -/* (legacy) interface for use without CPU hotplug handling */ - -#define __alloc_percpu(size, align) percpu_alloc_mask((size), GFP_KERNEL, \ - cpu_possible_map) #define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type), \ __alignof__(type)) -#define free_percpu(ptr) percpu_free((ptr)) -/* - * Use this to get to a cpu's version of the per-cpu object dynamically - * allocated. Non-atomic access to the current CPU's version should - * probably be combined with get_cpu()/put_cpu(). - */ -#define per_cpu_ptr(ptr, cpu) \ -({ \ - struct percpu_data *__p = __percpu_disguise(ptr); \ - (__typeof__(ptr))__p->ptrs[(cpu)]; \ -}) #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3 From f0aa6617903648077dffe5cfcf7c4458f4610fa7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: implement vm_area_register_early() Impact: allow multiple early vm areas There are places where kernel VM area needs to be allocated before vmalloc is initialized. This is done by allocating static vm_struct, initializing several fields and linking it to vmlist and later vmalloc initialization picking up these from vmlist. This is currently done manually and if there's more than one such areas, there's no defined way to arbitrate who gets which address. This patch implements vm_area_register_early(), which takes vm_area struct with flags and size initialized, assigns address to it and puts it on the vmlist. This way, multiple early vm areas can determine which addresses they should use. The only current user - alpha mm init - is converted to use it. Signed-off-by: Tejun Heo --- include/linux/vmalloc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 506e7620a98..bbc05139229 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -106,5 +106,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; +extern __init void vm_area_register_early(struct vm_struct *vm); #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3 From 8fc48985006da4ceba24508db64ec77fc0dfe3bb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: vmalloc: add un/map_kernel_range_noflush() Impact: two more public map/unmap functions Implement map_kernel_range_noflush() and unmap_kernel_range_noflush(). These functions respectively map and unmap address range in kernel VM area but doesn't do any vcache or tlb flushing. These will be used by new percpu allocator. Signed-off-by: Tejun Heo Cc: Nick Piggin --- include/linux/vmalloc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index bbc05139229..599ba798431 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -91,6 +91,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); +extern int map_kernel_range_noflush(unsigned long start, unsigned long size, + pgprot_t prot, struct page **pages); +extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); /* Allocate/destroy a 'vmalloc' VM area. */ -- cgit v1.2.3 From fbf59bc9d74d1fb30b8e0630743aff2806eafcea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: percpu: implement new dynamic percpu allocator Impact: new scalable dynamic percpu allocator which allows dynamic percpu areas to be accessed the same way as static ones Implement scalable dynamic percpu allocator which can be used for both static and dynamic percpu areas. This will allow static and dynamic areas to share faster direct access methods. This feature is optional and enabled only when CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is defined by arch. Please read comment on top of mm/percpu.c for details. Signed-off-by: Tejun Heo Cc: Andrew Morton --- include/linux/percpu.h | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d99e24ae181..18080995ff3 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -76,23 +76,37 @@ #ifdef CONFIG_SMP -struct percpu_data { - void *ptrs[1]; -}; +#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA -#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) +extern void *pcpu_base_addr; +typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); + +extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, + struct page **pages, size_t cpu_size); /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's * version should probably be combined with get_cpu()/put_cpu(). */ +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) + +#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + +struct percpu_data { + void *ptrs[1]; +}; + +#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) + #define per_cpu_ptr(ptr, cpu) \ ({ \ struct percpu_data *__p = __percpu_disguise(ptr); \ (__typeof__(ptr))__p->ptrs[(cpu)]; \ }) +#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ + extern void *__alloc_percpu(size_t size, size_t align); extern void free_percpu(void *__pdata); -- cgit v1.2.3 From b814d41f0987c7648d7ed07471258101c95c026b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:32:10 +0100 Subject: x86, mm: fault.c, simplify kmmio_fault() Impact: cleanup Remove an #ifdef from kmmio_fault() - we can do this by providing default implementations for is_kmmio_active() and kmmio_handler(). The compiler optimizes it all away in the !CONFIG_MMIOTRACE case. Also, while at it, clean up mmiotrace.h a bit: - standard header guards - standard vertical spaces for structure definitions No code changed (both with mmiotrace on and off in the config): text data bss dec hex filename 2947 12 12 2971 b9b fault.o.before 2947 12 12 2971 b9b fault.o.after Cc: Pekka Paalanen Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/mmiotrace.h | 78 ++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmiotrace.h b/include/linux/mmiotrace.h index 139d7c88d9c..3d1b7bde128 100644 --- a/include/linux/mmiotrace.h +++ b/include/linux/mmiotrace.h @@ -1,5 +1,5 @@ -#ifndef MMIOTRACE_H -#define MMIOTRACE_H +#ifndef _LINUX_MMIOTRACE_H +#define _LINUX_MMIOTRACE_H #include #include @@ -13,28 +13,34 @@ typedef void (*kmmio_post_handler_t)(struct kmmio_probe *, unsigned long condition, struct pt_regs *); struct kmmio_probe { - struct list_head list; /* kmmio internal list */ - unsigned long addr; /* start location of the probe point */ - unsigned long len; /* length of the probe region */ - kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */ - kmmio_post_handler_t post_handler; /* Called after addr is executed */ - void *private; + /* kmmio internal list: */ + struct list_head list; + /* start location of the probe point: */ + unsigned long addr; + /* length of the probe region: */ + unsigned long len; + /* Called before addr is executed: */ + kmmio_pre_handler_t pre_handler; + /* Called after addr is executed: */ + kmmio_post_handler_t post_handler; + void *private; }; +extern unsigned int kmmio_count; + +extern int register_kmmio_probe(struct kmmio_probe *p); +extern void unregister_kmmio_probe(struct kmmio_probe *p); + +#ifdef CONFIG_MMIOTRACE /* kmmio is active by some kmmio_probes? */ static inline int is_kmmio_active(void) { - extern unsigned int kmmio_count; return kmmio_count; } -extern int register_kmmio_probe(struct kmmio_probe *p); -extern void unregister_kmmio_probe(struct kmmio_probe *p); - /* Called from page fault handler. */ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr); -#ifdef CONFIG_MMIOTRACE /* Called from ioremap.c */ extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr); @@ -43,7 +49,17 @@ extern void mmiotrace_iounmap(volatile void __iomem *addr); /* For anyone to insert markers. Remember trailing newline. */ extern int mmiotrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); -#else +#else /* !CONFIG_MMIOTRACE: */ +static inline int is_kmmio_active(void) +{ + return 0; +} + +static inline int kmmio_handler(struct pt_regs *regs, unsigned long addr) +{ + return 0; +} + static inline void mmiotrace_ioremap(resource_size_t offset, unsigned long size, void __iomem *addr) { @@ -63,28 +79,28 @@ static inline int mmiotrace_printk(const char *fmt, ...) #endif /* CONFIG_MMIOTRACE */ enum mm_io_opcode { - MMIO_READ = 0x1, /* struct mmiotrace_rw */ - MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ - MMIO_PROBE = 0x3, /* struct mmiotrace_map */ - MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ - MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ + MMIO_READ = 0x1, /* struct mmiotrace_rw */ + MMIO_WRITE = 0x2, /* struct mmiotrace_rw */ + MMIO_PROBE = 0x3, /* struct mmiotrace_map */ + MMIO_UNPROBE = 0x4, /* struct mmiotrace_map */ + MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */ }; struct mmiotrace_rw { - resource_size_t phys; /* PCI address of register */ - unsigned long value; - unsigned long pc; /* optional program counter */ - int map_id; - unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ - unsigned char width; /* size of register access in bytes */ + resource_size_t phys; /* PCI address of register */ + unsigned long value; + unsigned long pc; /* optional program counter */ + int map_id; + unsigned char opcode; /* one of MMIO_{READ,WRITE,UNKNOWN_OP} */ + unsigned char width; /* size of register access in bytes */ }; struct mmiotrace_map { - resource_size_t phys; /* base address in PCI space */ - unsigned long virt; /* base virtual address */ - unsigned long len; /* mapping size */ - int map_id; - unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ + resource_size_t phys; /* base address in PCI space */ + unsigned long virt; /* base virtual address */ + unsigned long len; /* mapping size */ + int map_id; + unsigned char opcode; /* MMIO_PROBE or MMIO_UNPROBE */ }; /* in kernel/trace/trace_mmiotrace.c */ @@ -94,4 +110,4 @@ extern void mmio_trace_rw(struct mmiotrace_rw *rw); extern void mmio_trace_mapping(struct mmiotrace_map *map); extern int mmio_trace_printk(const char *fmt, va_list args); -#endif /* MMIOTRACE_H */ +#endif /* _LINUX_MMIOTRACE_H */ -- cgit v1.2.3 From b18018126f422f5b706fd750373425e10e84b486 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 20 Feb 2009 22:42:57 +0100 Subject: x86, mm, kprobes: fault.c, simplify notify_page_fault() Impact: cleanup Remove an #ifdef from notify_page_fault(). The function still compiles to nothing in the !CONFIG_KPROBES case. Introduce kprobes_built_in() and kprobe_fault_handler() helpers to allow this - they returns 0 if !CONFIG_KPROBES. No code changed: text data bss dec hex filename 4618 32 24 4674 1242 fault.o.before 4618 32 24 4674 1242 fault.o.after Cc: Masami Hiramatsu Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 32851eef48f..2ec6cc14a11 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -182,6 +182,14 @@ struct kprobe_blackpoint { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +/* + * For #ifdef avoidance: + */ +static inline int kprobes_built_in(void) +{ + return 1; +} + #ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); @@ -271,8 +279,16 @@ void unregister_kretprobes(struct kretprobe **rps, int num); void kprobe_flush_task(struct task_struct *tk); void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head); -#else /* CONFIG_KPROBES */ +#else /* !CONFIG_KPROBES: */ +static inline int kprobes_built_in(void) +{ + return 0; +} +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + return 0; +} static inline struct kprobe *get_kprobe(void *addr) { return NULL; @@ -329,5 +345,5 @@ static inline void unregister_kretprobes(struct kretprobe **rps, int num) static inline void kprobe_flush_task(struct task_struct *tk) { } -#endif /* CONFIG_KPROBES */ -#endif /* _LINUX_KPROBES_H */ +#endif /* CONFIG_KPROBES */ +#endif /* _LINUX_KPROBES_H */ -- cgit v1.2.3 From c132937556f56ee4b831ef4b23f1846e05fde102 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:20 +0900 Subject: bootmem: clean up arch-specific bootmem wrapping Impact: cleaner and consistent bootmem wrapping By setting CONFIG_HAVE_ARCH_BOOTMEM_NODE, archs can define arch-specific wrappers for bootmem allocation. However, this is done a bit strangely in that only the high level convenience macros can be changed while lower level, but still exported, interface functions can't be wrapped. This not only is messy but also leads to strange situation where alloc_bootmem() does what the arch wants it to do but the equivalent __alloc_bootmem() call doesn't although they should be able to be used interchangeably. This patch updates bootmem such that archs can override / wrap the backend function - alloc_bootmem_core() instead of the highlevel interface functions to allow simpler and consistent wrapping. Also, HAVE_ARCH_BOOTMEM_NODE is renamed to HAVE_ARCH_BOOTMEM. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 95837bfb525..3a87f93081e 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -69,10 +69,9 @@ extern int reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE -extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -#endif - +extern int reserve_bootmem(unsigned long addr, + unsigned long size, + int flags); extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); @@ -94,7 +93,7 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); -#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE + #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ @@ -113,7 +112,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ extern int reserve_bootmem_generic(unsigned long addr, unsigned long size, int flags); -- cgit v1.2.3 From 2d0aae41695257603fc281b519677131ab5a752b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: bootmem: reorder interface functions and add a missing one Impact: cleanup and addition of missing interface wrapper The interface functions in bootmem.h was ordered in not so orderly manner. Reorder them such that * functions allocating the same area group together - ie. alloc_bootmem group and alloc_bootmem_low group. * functions w/o node parameter come before the ones w/ node parameter. * nopanic variants are immediately below their panicky counterparts. While at it, add alloc_bootmem_pages_node_nopanic() which was missing. Signed-off-by: Tejun Heo Cc: Johannes Weiner --- include/linux/bootmem.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 3a87f93081e..455d83219fa 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -65,22 +65,20 @@ extern void free_bootmem(unsigned long addr, unsigned long size); #define BOOTMEM_DEFAULT 0 #define BOOTMEM_EXCLUSIVE (1<<0) -extern int reserve_bootmem_node(pg_data_t *pgdat, - unsigned long physaddr, - unsigned long size, - int flags); extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags); -extern void *__alloc_bootmem_nopanic(unsigned long size, +extern int reserve_bootmem_node(pg_data_t *pgdat, + unsigned long physaddr, + unsigned long size, + int flags); + +extern void *__alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem(unsigned long size, +extern void *__alloc_bootmem_nopanic(unsigned long size, unsigned long align, unsigned long goal); -extern void *__alloc_bootmem_low(unsigned long size, - unsigned long align, - unsigned long goal); extern void *__alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -89,6 +87,9 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +extern void *__alloc_bootmem_low(unsigned long size, + unsigned long align, + unsigned long goal); extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, @@ -98,18 +99,21 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low(x) \ - __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) #define alloc_bootmem_pages(x) \ __alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_nopanic(x) \ __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) -#define alloc_bootmem_low_pages(x) \ - __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) + +#define alloc_bootmem_low(x) \ + __alloc_bootmem_low(x, SMP_CACHE_BYTES, 0) +#define alloc_bootmem_low_pages(x) \ + __alloc_bootmem_low(x, PAGE_SIZE, 0) #define alloc_bootmem_low_pages_node(pgdat, x) \ __alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0) -- cgit v1.2.3 From c0c0a29379b5848aec2e8f1c58d853d3cb7118b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: vmalloc: add @align to vm_area_register_early() Impact: allow larger alignment for early vmalloc area allocation Some early vmalloc users might want larger alignment, for example, for custom large page mapping. Add @align to vm_area_register_early(). While at it, drop docbook comment on non-existent @size. Signed-off-by: Tejun Heo Cc: Nick Piggin Cc: Ivan Kokshaysky --- include/linux/vmalloc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 599ba798431..2f6994fdf0e 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -109,6 +109,6 @@ extern long vwrite(char *buf, char *addr, unsigned long count); */ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; -extern __init void vm_area_register_early(struct vm_struct *vm); +extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3 From 8d408b4be37bc49c9086531f2ebe411cf5731746 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Feb 2009 11:57:21 +0900 Subject: percpu: give more latitude to arch specific first chunk initialization Impact: more latitude for first percpu chunk allocation The first percpu chunk serves the kernel static percpu area and may or may not contain extra room for further dynamic allocation. Initialization of the first chunk needs to be done before normal memory allocation service is up, so it has its own init path - pcpu_setup_static(). It seems archs need more latitude while initializing the first chunk for example to take advantage of large page mapping. This patch makes the following changes to allow this. * Define PERCPU_DYNAMIC_RESERVE to give arch hint about how much space to reserve in the first chunk for further dynamic allocation. * Rename pcpu_setup_static() to pcpu_setup_first_chunk(). * Make pcpu_setup_first_chunk() much more flexible by fetching page pointer by callback and adding optional @unit_size, @free_size and @base_addr arguments which allow archs to selectively part of chunk initialization to their likings. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 18080995ff3..910beb0abea 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -78,12 +78,47 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +/* minimum unit size, also is the maximum supported allocation size */ +#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) + +/* + * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy + * back on the first chunk if arch is manually allocating and mapping + * it for faster access (as a part of large page mapping for example). + * Note that dynamic percpu allocator covers both static and dynamic + * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * + * On typical configuration with modules, the following values leave + * about 8k of free space on the first chunk after boot on both x86_32 + * and 64 when module support is enabled. When module support is + * disabled, it's much tighter. + */ +#ifndef PERCPU_DYNAMIC_RESERVE +# if BITS_PER_LONG > 32 +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# endif +# else +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# endif +# endif +#endif /* PERCPU_DYNAMIC_RESERVE */ + extern void *pcpu_base_addr; +typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); -extern size_t __init pcpu_setup_static(pcpu_populate_pte_fn_t populate_pte_fn, - struct page **pages, size_t cpu_size); +extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); + /* * Use this to get to a cpu's version of the per-cpu object * dynamically allocated. Non-atomic access to the current CPU's -- cgit v1.2.3 From 3255aa2eb636a508fc82a73fabbb8aaf2ff23c0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 08:21:52 +0100 Subject: x86, mm: pass in 'total' to __copy_from_user_*nocache() Impact: cleanup, enable future change Add a 'total bytes copied' parameter to __copy_from_user_*nocache(), and update all the callsites. The parameter is not used yet - architecture code can use it to more intelligently decide whether the copy should be cached or non-temporal. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6b58367d145..6f3c603b0d6 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) + const void __user *from, unsigned long n, unsigned long total) { return __copy_from_user(to, from, n); } -- cgit v1.2.3 From 17581ad812a9abb0182260374ef2e52d4a808a64 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:14 -0800 Subject: gpu/drm, x86, PAT: PAT support for io_mapping_* Make io_mapping_create_wc and io_mapping_free go through PAT to make sure that there are no memory type aliases. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index cbc2f0cd631..f1ed66c4378 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,8 +49,9 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; + pgprot_t prot; - if (!is_io_mapping_possible(base, size)) + if (!reserve_io_memtype_wc(base, size, &prot)) return NULL; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); @@ -59,13 +60,14 @@ io_mapping_create_wc(resource_size_t base, unsigned long size) iomap->base = base; iomap->size = size; - iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + iomap->prot = prot; return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { + free_io_memtype(mapping->base, mapping->size); kfree(mapping); } -- cgit v1.2.3 From d2b0261506602bd969164879206027b30358ffdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 14:36:45 +0100 Subject: alloc_percpu: fix UP build Impact: build fix the !SMP branch had a 'gfp' leftover: include/linux/percpu.h: In function '__alloc_percpu': include/linux/percpu.h:160: error: 'gfp' undeclared (first use in this function) include/linux/percpu.h:160: error: (Each undeclared identifier is reported only once include/linux/percpu.h:160: error: for each function it appears in.) Use GFP_KERNEL like the SMP version does. Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 910beb0abea..d8e5a9abbce 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -157,7 +157,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * percpu sections on SMP for which this path isn't used. */ WARN_ON_ONCE(align > __alignof__(unsigned long long)); - return kzalloc(size, gfp); + return kzalloc(size, GFP_KERNEL); } static inline void free_percpu(void *p) -- cgit v1.2.3 From e317603694bfd17b28a40de9d65e1a4ec12f816e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 26 Feb 2009 10:54:17 +0900 Subject: percpu: fix too low alignment restriction on UP UP __alloc_percpu() triggered WARN_ON_ONCE() if the requested alignment is larger than that of unsigned long long, which is too small for all the cacheline aligned allocations. Bump it up to SMP_CACHE_BYTES which kmalloc allocations generally guarantee. Signed-off-by: Tejun Heo Reported-by: Ingo Molnar --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index d8e5a9abbce..545b068bcb7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -156,7 +156,7 @@ static inline void *__alloc_percpu(size_t size, size_t align) * on it. Larger alignment should only be used for module * percpu sections on SMP for which this path isn't used. */ - WARN_ON_ONCE(align > __alignof__(unsigned long long)); + WARN_ON_ONCE(align > SMP_CACHE_BYTES); return kzalloc(size, GFP_KERNEL); } -- cgit v1.2.3 From f5c1aa1537be39d8b9bb5279b5881d81898fd3cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 1 Mar 2009 12:32:08 +0100 Subject: Revert "gpu/drm, x86, PAT: PAT support for io_mapping_*" This reverts commit 17581ad812a9abb0182260374ef2e52d4a808a64. Sitsofe Wheeler reported that /dev/dri/card0 is MIA on his EeePC 900 and bisected it to this commit. Graphics card is an i915 in an EeePC 900: 00:02.0 VGA compatible controller [0300]: Intel Corporation Mobile 915GM/GMS/910GML Express Graphics Controller [8086:2592] (rev 04) ( Most likely the ioremap() of the driver failed and hence the card did not initialize. ) Reported-by: Sitsofe Wheeler Bisected-by: Sitsofe Wheeler Cc: Venkatesh Pallipadi Cc: Suresh Siddha Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index f1ed66c4378..cbc2f0cd631 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -49,9 +49,8 @@ static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { struct io_mapping *iomap; - pgprot_t prot; - if (!reserve_io_memtype_wc(base, size, &prot)) + if (!is_io_mapping_possible(base, size)) return NULL; iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); @@ -60,14 +59,13 @@ io_mapping_create_wc(resource_size_t base, unsigned long size) iomap->base = base; iomap->size = size; - iomap->prot = prot; + iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { - free_io_memtype(mapping->base, mapping->size); kfree(mapping); } -- cgit v1.2.3 From f180053694b43d5714bf56cb95499a3c32ff155c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 2 Mar 2009 11:00:57 +0100 Subject: x86, mm: dont use non-temporal stores in pagecache accesses Impact: standardize IO on cached ops On modern CPUs it is almost always a bad idea to use non-temporal stores, as the regression in this commit has shown it: 30d697f: x86: fix performance regression in write() syscall The kernel simply has no good information about whether using non-temporal stores is a good idea or not - and trying to add heuristics only increases complexity and inserts fragility. The regression on cached write()s took very long to be found - over two years. So dont take any chances and let the hardware decide how it makes use of its caches. The only exception is drivers/gpu/drm/i915/i915_gem.c: there were we are absolutely sure that another entity (the GPU) will pick up the dirty data immediately and that the CPU will not touch that data before the GPU will. Also, keep the _nocache() primitives to make it easier for people to experiment with these details. There may be more clear-cut cases where non-cached copies can be used, outside of filemap.c. Cc: Salman Qazi Cc: Nick Piggin Cc: Linus Torvalds Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 6f3c603b0d6..6b58367d145 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -41,13 +41,13 @@ static inline void pagefault_enable(void) #ifndef ARCH_HAS_NOCACHE_UACCESS static inline unsigned long __copy_from_user_inatomic_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user_inatomic(to, from, n); } static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n, unsigned long total) + const void __user *from, unsigned long n) { return __copy_from_user(to, from, n); } -- cgit v1.2.3 From 6a242909b01120f6f3d571c0b75e20ec61f0d8d3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:58 +0900 Subject: percpu: clean up percpu constants Impact: cleaup Make the following cleanups. * There isn't much arch-specific about PERCPU_MODULE_RESERVE. Always define it whether arch overrides PERCPU_ENOUGH_ROOM or not. * blackfin overrides PERCPU_ENOUGH_ROOM to align static area size. Do it by default. * percpu allocation sizes doesn't have much to do with the page size. Don't use PAGE_SHIFT in their definition. Signed-off-by: Tejun Heo Cc: Bryan Wu --- include/linux/percpu.h | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 545b068bcb7..2d34b038fe7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -5,6 +5,7 @@ #include /* For kmalloc() */ #include #include +#include #include @@ -52,17 +53,18 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ -#ifndef PERCPU_ENOUGH_ROOM +/* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE 8192 +#define PERCPU_MODULE_RESERVE (8 << 10) #else -#define PERCPU_MODULE_RESERVE 0 +#define PERCPU_MODULE_RESERVE 0 #endif +#ifndef PERCPU_ENOUGH_ROOM #define PERCPU_ENOUGH_ROOM \ - (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) -#endif /* PERCPU_ENOUGH_ROOM */ + (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ + PERCPU_MODULE_RESERVE) +#endif /* * Must be an lvalue. Since @var must be a simple identifier, @@ -79,7 +81,7 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) +#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy @@ -96,15 +98,15 @@ #ifndef PERCPU_DYNAMIC_RESERVE # if BITS_PER_LONG > 32 # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (24 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # endif # else # ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (16 << 10) # else -# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# define PERCPU_DYNAMIC_RESERVE (8 << 10) # endif # endif #endif /* PERCPU_DYNAMIC_RESERVE */ -- cgit v1.2.3 From 2441d15c97d498b18f03ae9fba262ffeae42a08b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: cosmetic renames in pcpu_setup_first_chunk() Impact: cosmetic, preparation for future changes Make the following renames in pcpur_setup_first_chunk() in preparation for future changes. * s/free_size/dyn_size/ * s/static_vm/first_vm/ * s/static_chunk/schunk/ Signed-off-by: Tejun Heo --- include/linux/percpu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 2d34b038fe7..a0b4ea2a335 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -118,7 +118,7 @@ typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, size_t static_size, size_t unit_size, - size_t free_size, void *base_addr, + size_t dyn_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* -- cgit v1.2.3 From cafe8816b217b98dc3f268d3b77445da498beb4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu: use negative for auto for pcpu_setup_first_chunk() arguments Impact: argument semantic cleanup In pcpu_setup_first_chunk(), zero @unit_size and @dyn_size meant auto-sizing. It's okay for @unit_size as 0 doesn't make sense but 0 dynamic reserve size is valid. Alos, if arch @dyn_size is calculated from other parameters, it might end up passing in 0 @dyn_size and malfunction when the size is automatically adjusted. This patch makes both @unit_size and @dyn_size ssize_t and use -1 for auto sizing. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a0b4ea2a335..a96fc53bbd6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,8 +117,9 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t unit_size, - size_t dyn_size, void *base_addr, + size_t static_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn); /* -- cgit v1.2.3 From edcb463997ed7b2ffa3bac76e3e75957318f2e01 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: percpu, module: implement reserved allocation and use it for module percpu variables Impact: add reserved allocation functionality and use it for module percpu variables This patch implements reserved allocation from the first chunk. When setting up the first chunk, arch can ask to set aside certain number of bytes right after the core static area which is available only through a separate reserved allocator. This will be used primarily for module static percpu variables on architectures with limited relocation range to ensure that the module perpcu symbols are inside the relocatable range. If reserved area is requested, the first chunk becomes reserved and isn't available for regular allocation. If the first chunk also includes piggy-back dynamic allocation area, a separate chunk mapping the same region is created to serve dynamic allocation. The first one is called static first chunk and the second dynamic first chunk. Although they share the page map, their different area map initializations guarantee they serve disjoint areas according to their purposes. If arch doesn't setup reserved area, reserved allocation is handled like any other allocation. Signed-off-by: Tejun Heo --- include/linux/percpu.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index a96fc53bbd6..8ff15153ae2 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -117,10 +117,10 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t reserved_size, + ssize_t unit_size, ssize_t dyn_size, + void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -129,6 +129,8 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +extern void *__alloc_reserved_percpu(size_t size, size_t align); + #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { -- cgit v1.2.3 From 6b19b0c2400437a3c10059ede0e59b517092e1bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Mar 2009 14:33:59 +0900 Subject: x86, percpu: setup reserved percpu area for x86_64 Impact: fix relocation overflow during module load x86_64 uses 32bit relocations for symbol access and static percpu symbols whether in core or modules must be inside 2GB of the percpu segement base which the dynamic percpu allocator doesn't guarantee. This patch makes x86_64 reserve PERCPU_MODULE_RESERVE bytes in the first chunk so that module percpu areas are always allocated from the first chunk which is always inside the relocatable range. This problem exists for any percpu allocator but is easily triggered when using the embedding allocator because the second chunk is located beyond 2GB on it. This patch also changes the meaning of PERCPU_DYNAMIC_RESERVE such that it only indicates the size of the area to reserve for dynamic allocation as static and dynamic areas can be separate. New PERCPU_DYNAMIC_RESERVED is increased by 4k for both 32 and 64bits as the reserved area separation eats away some allocatable space and having slightly more headroom (currently between 4 and 8k after minimal boot sans module area) makes sense for common case performance. x86_32 can address anywhere from anywhere and doesn't need reserving. Mike Galbraith first reported the problem first and bisected it to the embedding percpu allocator commit. Signed-off-by: Tejun Heo Reported-by: Mike Galbraith Reported-by: Jaswinder Singh Rajput --- include/linux/percpu.h | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8ff15153ae2..54a968b4b92 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -85,31 +85,20 @@ /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk if arch is manually allocating and mapping - * it for faster access (as a part of large page mapping for example). - * Note that dynamic percpu allocator covers both static and dynamic - * areas, so these values are bigger than PERCPU_MODULE_RESERVE. + * back on the first chunk for dynamic percpu allocation if arch is + * manually allocating and mapping it for faster access (as a part of + * large page mapping for example). * - * On typical configuration with modules, the following values leave - * about 8k of free space on the first chunk after boot on both x86_32 - * and 64 when module support is enabled. When module support is - * disabled, it's much tighter. + * The following values give between one and two pages of free space + * after typical minimal boot (2-way SMP, single disk and NIC) with + * both defconfig and a distro config on x86_64 and 32. More + * intelligent way to determine this would be nice. */ -#ifndef PERCPU_DYNAMIC_RESERVE -# if BITS_PER_LONG > 32 -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (24 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# endif -# else -# ifdef CONFIG_MODULES -# define PERCPU_DYNAMIC_RESERVE (16 << 10) -# else -# define PERCPU_DYNAMIC_RESERVE (8 << 10) -# endif -# endif -#endif /* PERCPU_DYNAMIC_RESERVE */ +#if BITS_PER_LONG > 32 +#define PERCPU_DYNAMIC_RESERVE (20 << 10) +#else +#define PERCPU_DYNAMIC_RESERVE (12 << 10) +#endif extern void *pcpu_base_addr; -- cgit v1.2.3