diff options
Diffstat (limited to 'include/linux')
32 files changed, 501 insertions, 65 deletions
diff --git a/include/linux/audit.h b/include/linux/audit.h index 6272a395d43..8f0672d13eb 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -391,6 +391,7 @@ extern int audit_classify_arch(int arch); #ifdef CONFIG_AUDITSYSCALL /* These are defined in auditsc.c */ /* Public API */ +extern void audit_finish_fork(struct task_struct *child); extern int audit_alloc(struct task_struct *task); extern void audit_free(struct task_struct *task); extern void audit_syscall_entry(int arch, @@ -434,7 +435,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); -extern void auditsc_get_stamp(struct audit_context *ctx, +extern int auditsc_get_stamp(struct audit_context *ctx, struct timespec *t, unsigned int *serial); extern int audit_set_loginuid(struct task_struct *task, uid_t loginuid); #define audit_get_loginuid(t) ((t)->loginuid) @@ -504,6 +505,7 @@ static inline int audit_mq_getsetattr(mqd_t mqdes, struct mq_attr *mqstat) extern int audit_n_rules; extern int audit_signals; #else +#define audit_finish_fork(t) #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) #define audit_syscall_entry(ta,a,b,c,d,e) do { ; } while (0) @@ -516,7 +518,7 @@ extern int audit_signals; #define audit_inode(n,d) do { ; } while (0) #define audit_inode_child(d,i,p) do { ; } while (0) #define audit_core_dumps(i) do { ; } while (0) -#define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) +#define auditsc_get_stamp(c,t,s) (0) #define audit_get_loginuid(t) (-1) #define audit_get_sessionid(t) (-1) #define audit_log_task_context(b) do { ; } while (0) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a135256b272..031a315c050 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -662,6 +662,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; * default timeout for SG_IO if none specified */ #define BLK_DEFAULT_SG_TIMEOUT (60 * HZ) +#define BLK_MIN_SG_TIMEOUT (7 * HZ) #ifdef CONFIG_BOUNCE extern int init_emergency_isa_pool(void); @@ -786,6 +787,8 @@ static inline void blk_run_address_space(struct address_space *mapping) blk_run_backing_dev(mapping->backing_dev_info, NULL); } +extern void blkdev_dequeue_request(struct request *req); + /* * blk_end_request() and friends. * __blk_end_request() and end_request() must be called with @@ -820,11 +823,6 @@ extern void blk_update_request(struct request *rq, int error, extern unsigned int blk_rq_bytes(struct request *rq); extern unsigned int blk_rq_cur_bytes(struct request *rq); -static inline void blkdev_dequeue_request(struct request *req) -{ - elv_dequeue_request(req->q, req); -} - /* * Access functions for manipulating queue properties */ @@ -921,6 +919,8 @@ extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter); #define MAX_SEGMENT_SIZE 65536 +#define BLK_SEG_BOUNDARY_MASK 0xFFFFFFFFUL + #define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist) static inline int queue_hardsect_size(struct request_queue *q) diff --git a/include/linux/can/core.h b/include/linux/can/core.h index e9ca210ffa5..f50785ad478 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -19,7 +19,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> -#define CAN_VERSION "20071116" +#define CAN_VERSION "20081130" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "8" diff --git a/include/linux/compat.h b/include/linux/compat.h index f061a1ea1b7..e88f3ecf38b 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -252,12 +252,10 @@ extern int compat_ptrace_request(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); -#ifdef __ARCH_WANT_COMPAT_SYS_PTRACE extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t addr, compat_ulong_t data); asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, compat_long_t addr, compat_long_t data); -#endif /* __ARCH_WANT_COMPAT_SYS_PTRACE */ /* * epoll (fs/eventpoll.c) compat bits follow ... diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 4aaa4afb1cb..096476f1fb3 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -17,7 +17,7 @@ extern int debug_locks_off(void); ({ \ int __ret = 0; \ \ - if (unlikely(c)) { \ + if (!oops_in_progress && unlikely(c)) { \ if (debug_locks_off() && !debug_locks_silent) \ WARN_ON(1); \ __ret = 1; \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 0dcdd9458f4..4a853ef6fd3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -63,23 +63,24 @@ extern int dir_notify_enable; #define MAY_ACCESS 16 #define MAY_OPEN 32 -#define FMODE_READ ((__force fmode_t)1) -#define FMODE_WRITE ((__force fmode_t)2) - -/* Internal kernel extensions */ -#define FMODE_LSEEK ((__force fmode_t)4) -#define FMODE_PREAD ((__force fmode_t)8) -#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ - -/* File is being opened for execution. Primary users of this flag are - distributed filesystems that can use it to achieve correct ETXTBUSY - behavior for cross-node execution/opening_for_writing of files */ -#define FMODE_EXEC ((__force fmode_t)16) - -#define FMODE_NDELAY ((__force fmode_t)32) -#define FMODE_EXCL ((__force fmode_t)64) +/* file is open for reading */ +#define FMODE_READ ((__force fmode_t)1) +/* file is open for writing */ +#define FMODE_WRITE ((__force fmode_t)2) +/* file is seekable */ +#define FMODE_LSEEK ((__force fmode_t)4) +/* file can be accessed using pread/pwrite */ +#define FMODE_PREAD ((__force fmode_t)8) +#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* File is opened for execution with sys_execve / sys_uselib */ +#define FMODE_EXEC ((__force fmode_t)16) +/* File is opened with O_NDELAY (only set for block devices) */ +#define FMODE_NDELAY ((__force fmode_t)32) +/* File is opened with O_EXCL (only set for block devices) */ +#define FMODE_EXCL ((__force fmode_t)64) +/* File is opened using open(.., 3, ..) and is writeable only for ioctls + (specialy hack for floppy.c) */ #define FMODE_WRITE_IOCTL ((__force fmode_t)128) -#define FMODE_NDELAY_NOW ((__force fmode_t)256) #define RW_MASK 1 #define RWA_MASK 2 diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2..9c5bc6be2b0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -6,6 +6,7 @@ #include <linux/ktime.h> #include <linux/init.h> #include <linux/types.h> +#include <linux/module.h> #include <linux/kallsyms.h> #ifdef CONFIG_FUNCTION_TRACER @@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* usecs */ ktime_t calltime; diff --git a/include/linux/futex.h b/include/linux/futex.h index 586ab56a3ec..3bf5bb5a34f 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -25,7 +25,8 @@ union ktime; #define FUTEX_WAKE_BITSET 10 #define FUTEX_PRIVATE_FLAG 128 -#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG +#define FUTEX_CLOCK_REALTIME 256 +#define FUTEX_CMD_MASK ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME) #define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG) #define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG) @@ -164,6 +165,8 @@ union futex_key { } both; }; +#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } + #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); extern void exit_pi_state_list(struct task_struct *curr); diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 181006cc94a..9b70b923169 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -118,13 +118,17 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif -#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) +#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) extern void rcu_irq_enter(void); extern void rcu_irq_exit(void); +extern void rcu_nmi_enter(void); +extern void rcu_nmi_exit(void); #else # define rcu_irq_enter() do { } while (0) # define rcu_irq_exit() do { } while (0) -#endif /* CONFIG_PREEMPT_RCU */ +# define rcu_nmi_enter() do { } while (0) +# define rcu_nmi_exit() do { } while (0) +#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */ /* * It is safe to do non-atomic ops on ->hardirq_context, @@ -134,7 +138,6 @@ extern void rcu_irq_exit(void); */ #define __irq_enter() \ do { \ - rcu_irq_enter(); \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ @@ -153,7 +156,6 @@ extern void irq_enter(void); trace_hardirq_exit(); \ account_system_vtime(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ - rcu_irq_exit(); \ } while (0) /* @@ -161,7 +163,7 @@ extern void irq_enter(void); */ extern void irq_exit(void); -#define nmi_enter() do { lockdep_off(); __irq_enter(); } while (0) -#define nmi_exit() do { __irq_exit(); lockdep_on(); } while (0) +#define nmi_enter() do { lockdep_off(); rcu_nmi_enter(); __irq_enter(); } while (0) +#define nmi_exit() do { __irq_exit(); rcu_nmi_exit(); lockdep_on(); } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 7dcbc82f3b7..13875ce9112 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -63,12 +63,14 @@ static inline void *kmap_atomic(struct page *page, enum km_type idx) #endif /* CONFIG_HIGHMEM */ /* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ +#ifndef clear_user_highpage static inline void clear_user_highpage(struct page *page, unsigned long vaddr) { void *addr = kmap_atomic(page, KM_USER0); clear_user_page(addr, vaddr, page); kunmap_atomic(addr, KM_USER0); } +#endif #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /** diff --git a/include/linux/ide.h b/include/linux/ide.h index 54525be4b5f..010fb26a157 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1296,6 +1296,13 @@ extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *o #define ide_pci_register_driver(d) pci_register_driver(d) #endif +static inline int ide_pci_is_in_compatibility_mode(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE && (dev->class & 5) != 5) + return 1; + return 0; +} + void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, hw_regs_t *, hw_regs_t **); void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *); @@ -1375,6 +1382,7 @@ enum { IDE_HFLAG_IO_32BIT = (1 << 24), /* unmask IRQs */ IDE_HFLAG_UNMASK_IRQS = (1 << 25), + IDE_HFLAG_BROKEN_ALTSTATUS = (1 << 26), /* serialize ports if DMA is possible (for sl82c105) */ IDE_HFLAG_SERIALIZE_DMA = (1 << 27), /* force host out of "simplex" mode */ diff --git a/include/linux/idr.h b/include/linux/idr.h index fa035f96f2a..dd846df8cd3 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -52,13 +52,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ struct idr_layer *ary[1<<IDR_BITS]; int count; /* When zero, we can release it */ + int layer; /* distance from leaf */ struct rcu_head rcu_head; }; struct idr { struct idr_layer *top; struct idr_layer *id_free; - int layers; + int layers; /* only valid without concurrent changes */ int id_free_cnt; spinlock_t lock; }; diff --git a/include/linux/irq.h b/include/linux/irq.h index d058c57be02..3dddfa703eb 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -63,7 +63,8 @@ typedef void (*irq_flow_handler_t)(unsigned int irq, #define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ #define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ #define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ -#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ +#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */ +#define IRQ_AFFINITY_SET 0x02000000 /* IRQ affinity was set from userspace*/ #ifdef CONFIG_IRQ_PER_CPU # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) @@ -130,7 +131,7 @@ struct irq_chip { /** * struct irq_desc - interrupt descriptor - * + * @irq: interrupt number for this descriptor * @handle_irq: highlevel irq-events handler [if NULL, __do_IRQ()] * @chip: low level interrupt hardware access * @msi_desc: MSI descriptor @@ -149,7 +150,6 @@ struct irq_chip { * @cpu: cpu index useful for balancing * @pending_mask: pending rebalanced interrupts * @dir: /proc/irq/ procfs entry - * @affinity_entry: /proc/irq/smp_affinity procfs entry on SMP * @name: flow handler name for /proc/interrupts output */ struct irq_desc { @@ -210,7 +210,6 @@ extern int setup_irq(unsigned int irq, struct irqaction *new); #ifdef CONFIG_GENERIC_PENDING_IRQ -void set_pending_irq(unsigned int irq, cpumask_t mask); void move_native_irq(int irq); void move_masked_irq(int irq); @@ -228,10 +227,6 @@ static inline void move_masked_irq(int irq) { } -static inline void set_pending_irq(unsigned int irq, cpumask_t mask) -{ -} - #endif /* CONFIG_GENERIC_PENDING_IRQ */ #else /* CONFIG_SMP */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index dc7e0d0a647..269df5a17b3 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -141,6 +141,15 @@ extern int _cond_resched(void); (__x < 0) ? -__x : __x; \ }) +#ifdef CONFIG_PROVE_LOCKING +void might_fault(void); +#else +static inline void might_fault(void) +{ + might_sleep(); +} +#endif + extern struct atomic_notifier_head panic_notifier_list; extern long (*panic_blink)(long time); NORET_TYPE void panic(const char * fmt, ...) @@ -188,6 +197,8 @@ extern unsigned long long memparse(const char *ptr, char **retptr); extern int core_kernel_text(unsigned long addr); extern int __kernel_text_address(unsigned long addr); extern int kernel_text_address(unsigned long addr); +extern int func_ptr_is_kernel_text(void *ptr); + struct pid; extern struct pid *session_of_pgrp(struct pid *pgrp); diff --git a/include/linux/libata.h b/include/linux/libata.h index 59b0f1c807b..ed3f26eb5df 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -375,6 +375,7 @@ enum { ATA_HORKAGE_BRIDGE_OK = (1 << 10), /* no bridge limits */ ATA_HORKAGE_ATAPI_MOD16_DMA = (1 << 11), /* use ATAPI DMA for commands not multiple of 16 bytes */ + ATA_HORKAGE_FIRMWARE_WARN = (1 << 12), /* firwmare update warning */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 29aec6e1002..37a0361f468 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -73,6 +73,8 @@ struct lock_class_key { struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; }; +#define LOCKSTAT_POINTS 4 + /* * The lock-class itself: */ @@ -119,7 +121,8 @@ struct lock_class { int name_version; #ifdef CONFIG_LOCK_STAT - unsigned long contention_point[4]; + unsigned long contention_point[LOCKSTAT_POINTS]; + unsigned long contending_point[LOCKSTAT_POINTS]; #endif }; @@ -144,6 +147,7 @@ enum bounce_type { struct lock_class_stats { unsigned long contention_point[4]; + unsigned long contending_point[4]; struct lock_time read_waittime; struct lock_time write_waittime; struct lock_time read_holdtime; @@ -165,6 +169,7 @@ struct lockdep_map { const char *name; #ifdef CONFIG_LOCK_STAT int cpu; + unsigned long ip; #endif }; @@ -309,8 +314,15 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass, extern void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); -extern void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, - unsigned long ip); +extern void lock_set_class(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, unsigned int subclass, + unsigned long ip); + +static inline void lock_set_subclass(struct lockdep_map *lock, + unsigned int subclass, unsigned long ip) +{ + lock_set_class(lock, lock->name, lock->key, subclass, ip); +} # define INIT_LOCKDEP .lockdep_recursion = 0, @@ -328,6 +340,7 @@ static inline void lockdep_on(void) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) +# define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_info() do { } while (0) @@ -356,7 +369,7 @@ struct lock_class_key { }; #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); -extern void lock_acquired(struct lockdep_map *lock); +extern void lock_acquired(struct lockdep_map *lock, unsigned long ip); #define LOCK_CONTENDED(_lock, try, lock) \ do { \ @@ -364,13 +377,13 @@ do { \ lock_contended(&(_lock)->dep_map, _RET_IP_); \ lock(_lock); \ } \ - lock_acquired(&(_lock)->dep_map); \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) -#define lock_acquired(lockdep_map) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) @@ -481,4 +494,22 @@ static inline void print_irqtrace_events(struct task_struct *curr) # define lock_map_release(l) do { } while (0) #endif +#ifdef CONFIG_PROVE_LOCKING +# define might_lock(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +# define might_lock_read(lock) \ +do { \ + typecheck(struct lockdep_map *, &(lock)->dep_map); \ + lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \ + lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ +} while (0) +#else +# define might_lock(lock) do { } while (0) +# define might_lock_read(lock) do { } while (0) +#endif + #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/memory.h b/include/linux/memory.h index 2f5f8a5ef2a..36c82c9e6ea 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -91,7 +91,7 @@ extern int memory_notify(unsigned long val, void *v); #ifdef CONFIG_MEMORY_HOTPLUG #define hotplug_memory_notifier(fn, pri) { \ - static struct notifier_block fn##_mem_nb = \ + static __meminitdata struct notifier_block fn##_mem_nb =\ { .notifier_call = fn, .priority = pri }; \ register_memory_notifier(&fn##_mem_nb); \ } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index bd9977b8949..371086fd946 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -179,6 +179,7 @@ struct mlx4_caps { int num_ports; int vl_cap[MLX4_MAX_PORTS + 1]; int ib_mtu_cap[MLX4_MAX_PORTS + 1]; + __be32 ib_port_def_cap[MLX4_MAX_PORTS + 1]; u64 def_mac[MLX4_MAX_PORTS + 1]; int eth_mtu_cap[MLX4_MAX_PORTS + 1]; int gid_table_len[MLX4_MAX_PORTS + 1]; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index bc6da10ceee..7a0e5c4f807 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock); /* * NOTE: mutex_trylock() follows the spin_trylock() convention, * not the down_trylock() convention! + * + * Returns 1 if the mutex has been acquired successfully, and 0 on contention. */ extern int mutex_trylock(struct mutex *lock); extern void mutex_unlock(struct mutex *lock); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9d77b1d7dca..e26f5495289 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -319,6 +319,7 @@ enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_DISABLE, /* Disable pending */ + NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ }; extern void __napi_schedule(struct napi_struct *n); @@ -1497,6 +1498,12 @@ static inline void netif_rx_complete(struct net_device *dev, { unsigned long flags; + /* + * don't let napi dequeue from the cpu poll list + * just in case its running on a different cpu + */ + if (unlikely(test_bit(NAPI_STATE_NPSVC, &napi->state))) + return; local_irq_save(flags); __netif_rx_complete(dev, napi); local_irq_restore(flags); diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index c19595c8930..29fe9ea1d34 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -141,6 +141,7 @@ enum ctattr_protonat { #define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) enum ctattr_natseq { + CTA_NAT_SEQ_UNSPEC, CTA_NAT_SEQ_CORRECTION_POS, CTA_NAT_SEQ_OFFSET_BEFORE, CTA_NAT_SEQ_OFFSET_AFTER, diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be41b609c88..e52ce475d19 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -251,7 +251,7 @@ struct xt_target_param { */ struct xt_tgchk_param { const char *table; - void *entryinfo; + const void *entryinfo; const struct xt_target *target; void *targinfo; unsigned int hook_mask; diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index f546ad6fc02..1e6d34bfa09 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -17,7 +17,7 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ }; -void __init pgdat_page_cgroup_init(struct pglist_data *pgdat); +void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat); void __init page_cgroup_init(void); struct page_cgroup *lookup_page_cgroup(struct page *page); @@ -91,7 +91,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) #else /* CONFIG_CGROUP_MEM_RES_CTLR */ struct page_cgroup; -static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat) +static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) { } diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 5f89b62e698..301dda829e3 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -41,7 +41,7 @@ #include <linux/seqlock.h> #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */ #define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 86f1f5e43e3..bfd289aff57 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,11 +52,15 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -#ifdef CONFIG_CLASSIC_RCU +#if defined(CONFIG_CLASSIC_RCU) #include <linux/rcuclassic.h> -#else /* #ifdef CONFIG_CLASSIC_RCU */ +#elif defined(CONFIG_TREE_RCU) +#include <linux/rcutree.h> +#elif defined(CONFIG_PREEMPT_RCU) #include <linux/rcupreempt.h> -#endif /* #else #ifdef CONFIG_CLASSIC_RCU */ +#else +#error "Unknown RCU implementation specified to kernel configuration" +#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */ #define RCU_HEAD_INIT { .next = NULL, .func = NULL } #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h new file mode 100644 index 00000000000..d4368b7975c --- /dev/null +++ b/include/linux/rcutree.h @@ -0,0 +1,329 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2008 + * + * Author: Dipankar Sarma <dipankar@in.ibm.com> + * Paul E. McKenney <paulmck@linux.vnet.ibm.com> Hierarchical algorithm + * + * Based on the original work by Paul McKenney <paulmck@us.ibm.com> + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU + */ + +#ifndef __LINUX_RCUTREE_H +#define __LINUX_RCUTREE_H + +#include <linux/cache.h> +#include <linux/spinlock.h> +#include <linux/threads.h> +#include <linux/percpu.h> +#include <linux/cpumask.h> +#include <linux/seqlock.h> + +/* + * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this has not been tested, so there is probably some + * bug somewhere. + */ +#define MAX_RCU_LVLS 3 +#define RCU_FANOUT (CONFIG_RCU_FANOUT) +#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) +#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT +# define NUM_RCU_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (NR_CPUS) +# define NUM_RCU_LVL_2 0 +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_SQ +# define NUM_RCU_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT) +# define NUM_RCU_LVL_2 (NR_CPUS) +# define NUM_RCU_LVL_3 0 +#elif NR_CPUS <= RCU_FANOUT_CUBE +# define NUM_RCU_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ) +# define NUM_RCU_LVL_2 (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT)) +# define NUM_RCU_LVL_3 NR_CPUS +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT */ + +#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) + +/* + * Dynticks per-CPU state. + */ +struct rcu_dynticks { + int dynticks_nesting; /* Track nesting level, sort of. */ + int dynticks; /* Even value for dynticks-idle, else odd. */ + int dynticks_nmi; /* Even value for either dynticks-idle or */ + /* not in nmi handler, else odd. So this */ + /* remains even for nmi from irq handler. */ +}; + +/* + * Definition for node within the RCU grace-period-detection hierarchy. + */ +struct rcu_node { + spinlock_t lock; + unsigned long qsmask; /* CPUs or groups that need to switch in */ + /* order for current grace period to proceed.*/ + unsigned long qsmaskinit; + /* Per-GP initialization for qsmask. */ + unsigned long grpmask; /* Mask to apply to parent qsmask. */ + int grplo; /* lowest-numbered CPU or group here. */ + int grphi; /* highest-numbered CPU or group here. */ + u8 grpnum; /* CPU/group number for next level up. */ + u8 level; /* root is at level 0. */ + struct rcu_node *parent; +} ____cacheline_internodealigned_in_smp; + +/* Index values for nxttail array in struct rcu_data. */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_NEXT_SIZE 4 + +/* Per-CPU data for read-copy update. */ +struct rcu_data { + /* 1) quiescent-state and grace-period handling : */ + long completed; /* Track rsp->completed gp number */ + /* in order to detect GP end. */ + long gpnum; /* Highest gp number that this CPU */ + /* is aware of having started. */ + long passed_quiesc_completed; + /* Value of completed at time of qs. */ + bool passed_quiesc; /* User-mode/idle loop etc. */ + bool qs_pending; /* Core waits for quiesc state. */ + bool beenonline; /* CPU online at least once. */ + struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ + unsigned long grpmask; /* Mask to apply to leaf qsmask. */ + + /* 2) batch handling */ + /* + * If nxtlist is not NULL, it is partitioned as follows. + * Any of the partitions might be empty, in which case the + * pointer to that partition will be equal to the pointer for + * the following partition. When the list is empty, all of + * the nxttail elements point to nxtlist, which is NULL. + * + * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]): + * Entries that might have arrived after current GP ended + * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): + * Entries known to have arrived before current GP ended + * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): + * Entries that batch # <= ->completed - 1: waiting for current GP + * [nxtlist, *nxttail[RCU_DONE_TAIL]): + * Entries that batch # <= ->completed + * The grace period for these entries has completed, and + * the other grace-period-completed entries may be moved + * here temporarily in rcu_process_callbacks(). + */ + struct rcu_head *nxtlist; + struct rcu_head **nxttail[RCU_NEXT_SIZE]; + long qlen; /* # of queued callbacks */ + long blimit; /* Upper limit on a processed batch */ + +#ifdef CONFIG_NO_HZ + /* 3) dynticks interface. */ + struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ + int dynticks_snap; /* Per-GP tracking for dynticks. */ + int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ +#endif /* #ifdef CONFIG_NO_HZ */ + + /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ +#ifdef CONFIG_NO_HZ + unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ +#endif /* #ifdef CONFIG_NO_HZ */ + unsigned long offline_fqs; /* Kicked due to being offline. */ + unsigned long resched_ipi; /* Sent a resched IPI. */ + + /* 5) state to allow this CPU to force_quiescent_state on others */ + long n_rcu_pending; /* rcu_pending() calls since boot. */ + long n_rcu_pending_force_qs; /* when to force quiescent states. */ + + int cpu; +}; + +/* Values for signaled field in struct rcu_state. */ +#define RCU_GP_INIT 0 /* Grace period being initialized. */ +#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */ +#define RCU_FORCE_QS 2 /* Need to force quiescent state. */ +#ifdef CONFIG_NO_HZ +#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK +#else /* #ifdef CONFIG_NO_HZ */ +#define RCU_SIGNAL_INIT RCU_FORCE_QS +#endif /* #else #ifdef CONFIG_NO_HZ */ + +#define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ + +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + +/* + * RCU global state, including node hierarchy. This hierarchy is + * represented in "heap" form in a dense array. The root (first level) + * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second + * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]), + * and the third level in ->node[m+1] and following (->node[m+1] referenced + * by ->level[2]). The number of levels is determined by the number of + * CPUs and by CONFIG_RCU_FANOUT. Small systems will have a "hierarchy" + * consisting of a single rcu_node. + */ +struct rcu_state { + struct rcu_node node[NUM_RCU_NODES]; /* Hierarchy. */ + struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ + u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ + u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ + struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ + + /* The following fields are guarded by the root rcu_node's lock. */ + + u8 signaled ____cacheline_internodealigned_in_smp; + /* Force QS state. */ + long gpnum; /* Current gp number. */ + long completed; /* # of last completed gp. */ + spinlock_t onofflock; /* exclude on/offline and */ + /* starting new GP. */ + spinlock_t fqslock; /* Only one task forcing */ + /* quiescent states. */ + unsigned long jiffies_force_qs; /* Time at which to invoke */ + /* force_quiescent_state(). */ + unsigned long n_force_qs; /* Number of calls to */ + /* force_quiescent_state(). */ + unsigned long n_force_qs_lh; /* ~Number of calls leaving */ + /* due to lock unavailable. */ + unsigned long n_force_qs_ngp; /* Number of calls leaving */ + /* due to no GP active. */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR + unsigned long gp_start; /* Time at which GP started, */ + /* but in jiffies. */ + unsigned long jiffies_stall; /* Time at which to check */ + /* for CPU stalls. */ +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#ifdef CONFIG_NO_HZ + long dynticks_completed; /* Value of completed @ snap. */ +#endif /* #ifdef CONFIG_NO_HZ */ +}; + +extern struct rcu_state rcu_state; +DECLARE_PER_CPU(struct rcu_data, rcu_data); + +extern struct rcu_state rcu_bh_state; +DECLARE_PER_CPU(struct rcu_data, rcu_bh_data); + +/* + * Increment the quiescent state counter. + * The counter is a bit degenerated: We do not need to know + * how many quiescent states passed, just if there was at least + * one since the start of the grace period. Thus just a flag. + */ +static inline void rcu_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} +static inline void rcu_bh_qsctr_inc(int cpu) +{ + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); + rdp->passed_quiesc = 1; + rdp->passed_quiesc_completed = rdp->completed; +} + +extern int rcu_pending(int cpu); +extern int rcu_needs_cpu(int cpu); + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +extern struct lockdep_map rcu_lock_map; +# define rcu_read_acquire() \ + lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_) +# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) +#else +# define rcu_read_acquire() do { } while (0) +# define rcu_read_release() do { } while (0) +#endif + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); + __acquire(RCU); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock(void) +{ + rcu_read_release(); + __release(RCU); + preempt_enable(); +} +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); + __acquire(RCU_BH); + rcu_read_acquire(); +} +static inline void __rcu_read_unlock_bh(void) +{ + rcu_read_release(); + __release(RCU_BH); + local_bh_enable(); +} + +#define __synchronize_sched() synchronize_rcu() + +#define call_rcu_sched(head, func) call_rcu(head, func) + +static inline void rcu_init_sched(void) +{ +} + +extern void __rcu_init(void); +extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_restart_cpu(int cpu); + +extern long rcu_batches_completed(void); +extern long rcu_batches_completed_bh(void); + +#ifdef CONFIG_NO_HZ +void rcu_enter_nohz(void); +void rcu_exit_nohz(void); +#else /* CONFIG_NO_HZ */ +static inline void rcu_enter_nohz(void) +{ +} +static inline void rcu_exit_nohz(void) +{ +} +#endif /* CONFIG_NO_HZ */ + +#endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 644ffbda17c..55e30d11447 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -630,6 +630,10 @@ struct user_struct { atomic_t inotify_watches; /* How many inotify watches does this user have? */ atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ #endif +#ifdef CONFIG_EPOLL + atomic_t epoll_devs; /* The number of epoll descriptors currently open */ + atomic_t epoll_watches; /* The number of file descriptors currently watched */ +#endif #ifdef CONFIG_POSIX_MQUEUE /* protected by mq_lock */ unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ diff --git a/include/linux/security.h b/include/linux/security.h index c13f1cec9ab..e3d4ecda267 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1818,17 +1818,21 @@ static inline int security_settime(struct timespec *ts, struct timezone *tz) static inline int security_vm_enough_memory(long pages) { + WARN_ON(current->mm == NULL); return cap_vm_enough_memory(current->mm, pages); } -static inline int security_vm_enough_memory_kern(long pages) +static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) { - return cap_vm_enough_memory(current->mm, pages); + WARN_ON(mm == NULL); + return cap_vm_enough_memory(mm, pages); } -static inline int security_vm_enough_memory_mm(struct mm_struct *mm, long pages) +static inline int security_vm_enough_memory_kern(long pages) { - return cap_vm_enough_memory(mm, pages); + /* If current->mm is a kernel thread then we will pass NULL, + for this specific case that is fine */ + return cap_vm_enough_memory(current->mm, pages); } static inline int security_bprm_alloc(struct linux_binprm *bprm) diff --git a/include/linux/smp.h b/include/linux/smp.h index 3f9a60043a9..6e7ba16ff45 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -146,6 +146,8 @@ static inline void smp_send_reschedule(int cpu) { } }) #define smp_call_function_mask(mask, func, info, wait) \ (up_smp_call_function(func, info)) +#define smp_call_function_many(mask, func, info, wait) \ + (up_smp_call_function(func, info)) static inline void init_call_single_data(void) { } diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index b18ec5533e8..325af1de035 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -7,9 +7,31 @@ struct device; struct dma_attrs; struct scatterlist; +/* + * Maximum allowable number of contiguous slabs to map, + * must be a power of 2. What is the appropriate value ? + * The complexity of {map,unmap}_single is linearly dependent on this value. + */ +#define IO_TLB_SEGSIZE 128 + + +/* + * log of the size of each IO TLB slab. The number of slabs is command line + * controllable. + */ +#define IO_TLB_SHIFT 11 + extern void swiotlb_init(void); +extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs); +extern void *swiotlb_alloc(unsigned order, unsigned long nslabs); + +extern dma_addr_t swiotlb_phys_to_bus(phys_addr_t address); +extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address); + +extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size); + extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index fec6decfb98..6b58367d145 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to, \ set_fs(KERNEL_DS); \ pagefault_disable(); \ - ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ + ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ pagefault_enable(); \ set_fs(old_fs); \ ret; \ diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 73a2f4eb1f7..9b42baed390 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -158,8 +158,12 @@ struct usb_ctrlrequest { * (rarely) accepted by SET_DESCRIPTOR. * * Note that all multi-byte values here are encoded in little endian - * byte order "on the wire". But when exposed through Linux-USB APIs, - * they've been converted to cpu byte order. + * byte order "on the wire". Within the kernel and when exposed + * through the Linux-USB APIs, they are not converted to cpu byte + * order; it is the responsibility of the client code to do this. + * The single exception is when device and configuration descriptors (but + * not other descriptors) are read from usbfs (i.e. /proc/bus/usb/BBB/DDD); + * in this case the fields are converted to host endianness by the kernel. */ /* |