From 62752ee198dca9209b7dee504763e51b11e9e0ca Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 17 Oct 2006 10:31:38 +0200 Subject: [PATCH] Take i_mutex in splice_from_pipe() The splice_actor may be calling ->prepare_write() and ->commit_write(). We want i_mutex on the inode being written to before calling those so that we don't race i_size changes. The double locking behavior is done elsewhere in splice.c, and if we eventually want _nolock variants of generic_file_splice_write(), fs modules might have to replicate the nasty locking code. We introduce inode_double_lock() and inode_double_unlock() to consolidate the locking rules into one set of functions. Signed-off-by: Mark Fasheh Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 661c7c57214..853a02f2393 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class I_MUTEX_QUOTA }; +extern void inode_double_lock(struct inode *inode1, struct inode *inode2); +extern void inode_double_unlock(struct inode *inode1, struct inode *inode2); + /* * NOTE: in a 32bit arch with a preemptable kernel and * an UP compile the i_size_read/write must be atomic -- cgit v1.2.3 From 6da61809822c22634a3de2dcb3c60283b836a88a Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 17 Oct 2006 18:43:07 +0200 Subject: [PATCH] Introduce generic_file_splice_write_nolock() This allows file systems to manage their own i_mutex locking while still re-using the generic_file_splice_write() logic. OCFS2 in particular wants this so that it can order cluster locks within i_mutex. Signed-off-by: Mark Fasheh Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 853a02f2393..d695ba2346a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1758,6 +1758,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); +extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *, + struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -- cgit v1.2.3 From 01de85e057328ecbef36e108673b1e81059d54c1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Oct 2006 19:50:36 +0200 Subject: [PATCH] Add lockless helpers for remove_suid() Right now users have to grab i_mutex before calling remove_suid(), in the unlikely event that a call to ->setattr() may be needed. Split up the function in two parts: - One to check if we need to remove suid - One to actually remove it The first we can call lockless. Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d695ba2346a..2fe6e3f900b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1712,6 +1712,8 @@ extern void __iget(struct inode * inode); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); +extern int __remove_suid(struct dentry *, int); +extern int should_remove_suid(struct dentry *); extern int remove_suid(struct dentry *); extern void remove_dquot_ref(struct super_block *, int, struct list_head *); -- cgit v1.2.3 From 690a973f48b6ba2954465992c08e65059c8374fe Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sat, 21 Oct 2006 18:37:01 +0200 Subject: [PATCH] x86-64: Speed up dwarf2 unwinder This changes the dwarf2 unwinder to do a binary search for CIEs instead of a linear work. The linker is unfortunately not able to build a proper lookup table at link time, instead it creates one at runtime as soon as the bootmem allocator is usable (so you'll continue using the linear lookup for the first [hopefully] few calls). The code should be ready to utilize a build-time created table once a fixed linker becomes available. Signed-off-by: Jan Beulich Signed-off-by: Andi Kleen --- include/linux/unwind.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/unwind.h b/include/linux/unwind.h index 73e1751d03d..749928c161f 100644 --- a/include/linux/unwind.h +++ b/include/linux/unwind.h @@ -26,6 +26,7 @@ struct module; * Initialize unwind support. */ extern void unwind_init(void); +extern void unwind_setup(void); #ifdef CONFIG_MODULES @@ -73,6 +74,7 @@ extern int unwind_to_user(struct unwind_frame_info *); struct unwind_frame_info {}; static inline void unwind_init(void) {} +static inline void unwind_setup(void) {} #ifdef CONFIG_MODULES -- cgit v1.2.3 From a1bae67243512ca30ceda48e3e24e25b543f8ab7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 21 Oct 2006 18:37:02 +0200 Subject: [PATCH] i386: Disable nmi watchdog on all ThinkPads Even newer Thinkpads have bugs in SMM code that causes hangs with NMI watchdog. Signed-off-by: Andi Kleen --- include/linux/dmi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 38dc403be70..904bf3d2d90 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -69,6 +69,7 @@ extern struct dmi_device * dmi_find_device(int type, const char *name, struct dmi_device *from); extern void dmi_scan_machine(void); extern int dmi_get_year(int field); +extern int dmi_name_in_vendors(char *str); #else @@ -77,6 +78,7 @@ static inline char * dmi_get_system_info(int field) { return NULL; } static inline struct dmi_device * dmi_find_device(int type, const char *name, struct dmi_device *from) { return NULL; } static inline int dmi_get_year(int year) { return 0; } +static inline int dmi_name_in_vendors(char *s) { return 0; } #endif -- cgit v1.2.3 From 3343571d9f88a0de542d33aea9ab881f00ff866d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 19 Oct 2006 14:44:53 +0900 Subject: [PATCH] libata: typo fix Typo fix in commment. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index d0a7ad5ed51..b03d5a340dc 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -143,7 +143,7 @@ enum { ATA_DFLAG_CFG_MASK = (1 << 8) - 1, ATA_DFLAG_PIO = (1 << 8), /* device limited to PIO mode */ - ATA_DFLAG_NCQ_OFF = (1 << 9), /* devied limited to non-NCQ mode */ + ATA_DFLAG_NCQ_OFF = (1 << 9), /* device limited to non-NCQ mode */ ATA_DFLAG_SUSPENDED = (1 << 10), /* device suspended */ ATA_DFLAG_INIT_MASK = (1 << 16) - 1, -- cgit v1.2.3 From da3ed32fe568148ede256975d40825ffcdac767b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 21 Oct 2006 10:24:08 -0700 Subject: [PATCH] md: add another COMPAT_IOCTL for md .. so that you can use bitmaps with 32bit userspace on a 64 bit kernel. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat_ioctl.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index cfdb4f6a89d..c26c3adcfac 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -131,6 +131,7 @@ COMPATIBLE_IOCTL(RUN_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY) COMPATIBLE_IOCTL(STOP_ARRAY_RO) COMPATIBLE_IOCTL(RESTART_ARRAY_RW) +COMPATIBLE_IOCTL(GET_BITMAP_FILE) ULONG_IOCTL(SET_BITMAP_FILE) /* DM */ COMPATIBLE_IOCTL(DM_VERSION_32) -- cgit v1.2.3 From 1c05b4bc22cd640d3a534bd2851a8413d5df3709 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 21 Oct 2006 10:24:08 -0700 Subject: [PATCH] md: endian annotation for v1 superblock access Includes a couple of bugfixes found by sparse. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md_p.h | 56 +++++++++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index b6ebc69bae5..3f2cd98c508 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -206,52 +206,52 @@ static inline __u64 md_event(mdp_super_t *sb) { */ struct mdp_superblock_1 { /* constant array information - 128 bytes */ - __u32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ - __u32 major_version; /* 1 */ - __u32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ - __u32 pad0; /* always set to 0 when writing */ + __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ + __le32 major_version; /* 1 */ + __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ + __le32 pad0; /* always set to 0 when writing */ __u8 set_uuid[16]; /* user-space generated. */ char set_name[32]; /* set and interpreted by user-space */ - __u64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __u32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ - __u32 layout; /* only for raid5 and raid10 currently */ - __u64 size; /* used size of component devices, in 512byte sectors */ + __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ + __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ + __le32 layout; /* only for raid5 and raid10 currently */ + __le64 size; /* used size of component devices, in 512byte sectors */ - __u32 chunksize; /* in 512byte sectors */ - __u32 raid_disks; - __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts + __le32 chunksize; /* in 512byte sectors */ + __le32 raid_disks; + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts * NOTE: signed, so bitmap can be before superblock * only meaningful of feature_map[0] is set. */ /* These are only valid with feature bit '4' */ - __u32 new_level; /* new level we are reshaping to */ - __u64 reshape_position; /* next address in array-space for reshape */ - __u32 delta_disks; /* change in number of raid_disks */ - __u32 new_layout; /* new layout */ - __u32 new_chunk; /* new chunk size (bytes) */ + __le32 new_level; /* new level we are reshaping to */ + __le64 reshape_position; /* next address in array-space for reshape */ + __le32 delta_disks; /* change in number of raid_disks */ + __le32 new_layout; /* new layout */ + __le32 new_chunk; /* new chunk size (bytes) */ __u8 pad1[128-124]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ - __u64 data_offset; /* sector start of data, often 0 */ - __u64 data_size; /* sectors in this device that can be used for data */ - __u64 super_offset; /* sector start of this superblock */ - __u64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ - __u32 dev_number; /* permanent identifier of this device - not role in raid */ - __u32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ + __le64 data_offset; /* sector start of data, often 0 */ + __le64 data_size; /* sectors in this device that can be used for data */ + __le64 super_offset; /* sector start of this superblock */ + __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + __le32 dev_number; /* permanent identifier of this device - not role in raid */ + __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ __u8 devflags; /* per-device flags. Only one defined...*/ #define WriteMostly1 1 /* mask for writemostly flag in above */ __u8 pad2[64-57]; /* set to 0 when writing */ /* array state information - 64 bytes */ - __u64 utime; /* 40 bits second, 24 btes microseconds */ - __u64 events; /* incremented when superblock updated */ - __u64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ - __u32 sb_csum; /* checksum upto devs[max_dev] */ - __u32 max_dev; /* size of devs[] array to consider */ + __le64 utime; /* 40 bits second, 24 btes microseconds */ + __le64 events; /* incremented when superblock updated */ + __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ + __le32 sb_csum; /* checksum upto devs[max_dev] */ + __le32 max_dev; /* size of devs[] array to consider */ __u8 pad3[64-32]; /* set to 0 when writing */ /* device state information. Indexed by dev_number. @@ -260,7 +260,7 @@ struct mdp_superblock_1 { * into the 'roles' value. If a device is spare or faulty, then it doesn't * have a meaningful role. */ - __u16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ + __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ }; /* feature_map bits */ -- cgit v1.2.3 From 4f2e639af4bd5e152fc79256e333643d3dd6c10f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 21 Oct 2006 10:24:09 -0700 Subject: [PATCH] md: endian annotations for the bitmap superblock And a couple of bug fixes found by sparse. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/bitmap.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index 84d88775185..ebd42a3710b 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -146,16 +146,16 @@ enum bitmap_state { /* the superblock at the front of the bitmap file -- little endian */ typedef struct bitmap_super_s { - __u32 magic; /* 0 BITMAP_MAGIC */ - __u32 version; /* 4 the bitmap major for now, could change... */ - __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ - __u64 events; /* 24 event counter for the bitmap (1)*/ - __u64 events_cleared;/*32 event counter when last bit cleared (2) */ - __u64 sync_size; /* 40 the size of the md device's sync range(3) */ - __u32 state; /* 48 bitmap state information */ - __u32 chunksize; /* 52 the bitmap chunk size in bytes */ - __u32 daemon_sleep; /* 56 seconds between disk flushes */ - __u32 write_behind; /* 60 number of outstanding write-behind writes */ + __le32 magic; /* 0 BITMAP_MAGIC */ + __le32 version; /* 4 the bitmap major for now, could change... */ + __u8 uuid[16]; /* 8 128 bit uuid - must match md device uuid */ + __le64 events; /* 24 event counter for the bitmap (1)*/ + __le64 events_cleared;/*32 event counter when last bit cleared (2) */ + __le64 sync_size; /* 40 the size of the md device's sync range(3) */ + __le32 state; /* 48 bitmap state information */ + __le32 chunksize; /* 52 the bitmap chunk size in bytes */ + __le32 daemon_sleep; /* 56 seconds between disk flushes */ + __le32 write_behind; /* 60 number of outstanding write-behind writes */ __u8 pad[256 - 64]; /* set to zero */ } bitmap_super_t; -- cgit v1.2.3 From d42552c3ace1fa1f16ae02ce642f4c733cec40ca Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 21 Oct 2006 10:24:12 -0700 Subject: [PATCH] pci: declare pci_get_device_reverse() We seem to have lost the declaration of pci_get_device_reverse(), if we ever had one. Add a CONFIG_PCI=0 stub too. Acked-by: Alan Cox Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 4689e2a699c..09be0f81b27 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -455,7 +455,11 @@ int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap); int pci_find_ext_capability (struct pci_dev *dev, int cap); struct pci_bus *pci_find_next_bus(const struct pci_bus *from); -struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from); +struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, + struct pci_dev *from); +struct pci_dev *pci_get_device_reverse(unsigned int vendor, unsigned int device, + struct pci_dev *from); + struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device, unsigned int ss_vendor, unsigned int ss_device, struct pci_dev *from); @@ -660,7 +664,12 @@ static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn) { return NULL; } -static inline struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from) +static inline struct pci_dev *pci_get_device(unsigned int vendor, + unsigned int device, struct pci_dev *from) +{ return NULL; } + +static inline struct pci_dev *pci_get_device_reverse(unsigned int vendor, + unsigned int device, struct pci_dev *from) { return NULL; } static inline struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device, -- cgit v1.2.3 From 7516795739bd53175629b90fab0ad488d7a6a9f7 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Sat, 21 Oct 2006 10:24:14 -0700 Subject: [PATCH] Reintroduce NODES_SPAN_OTHER_NODES for powerpc Reintroduce NODES_SPAN_OTHER_NODES for powerpc Revert "[PATCH] Remove SPAN_OTHER_NODES config definition" This reverts commit f62859bb6871c5e4a8e591c60befc8caaf54db8c. Revert "[PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES" This reverts commit a94b3ab7eab4edcc9b2cb474b188f774c331adf7. Also update the comments to indicate that this is still required and where its used. Signed-off-by: Andy Whitcroft Cc: Paul Mackerras Cc: Mike Kravetz Cc: Benjamin Herrenschmidt Acked-by: Mel Gorman Acked-by: Will Schmidt Cc: Christoph Lameter Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 59855b8718a..ed0762b283a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -674,6 +674,12 @@ void sparse_init(void); #define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ +#ifdef CONFIG_NODES_SPAN_OTHER_NODES +#define early_pfn_in_nid(pfn, nid) (early_pfn_to_nid(pfn) == (nid)) +#else +#define early_pfn_in_nid(pfn, nid) (1) +#endif + #ifndef early_pfn_valid #define early_pfn_valid(pfn) (1) #endif -- cgit v1.2.3 From faf6bbcf94caee10ba34adb86db4ecca96bfd3bf Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Sat, 21 Oct 2006 10:24:17 -0700 Subject: [PATCH] cpuset: mempolicy migration typo fix Mistyped an ifdef CONFIG_CPUSETS - fixed. I doubt that anyone ever noticed. The impact of this typo was that if someone: 1) was using MPOL_BIND to force off node allocations 2) while using cpusets to constrain memory placement 3) when that cpuset was migrating that jobs memory 4) while the tasks in that job were actively forking then there was a rare chance that future allocations using that MPOL_BIND policy would be node local, not off node. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 09f0f575ddf..daabb3aa1ec 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -150,7 +150,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern void mpol_fix_fork_child_flag(struct task_struct *p); #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x)) -#ifdef CONFIG_CPUSET +#ifdef CONFIG_CPUSETS #define current_cpuset_is_being_rebound() \ (cpuset_being_rebound == current->cpuset) #else -- cgit v1.2.3 From 04fed361dadb7921507a470947ac23d2f26352cf Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 22 Oct 2006 15:57:18 +0100 Subject: [PATCH] Remove __must_check for device_for_each_child() Eliminate more __must_check madness. The return code from device_for_each_child() depends on the values which the helper function returns. If the helper function always returns zero, it's utterly pointless to check the return code from device_for_each_child(). The only code which knows if the return value should be checked is the caller itself, so forcing the return code to always be checked is silly. Hence, remove the __must_check annotation. Signed-off-by: Russell King Signed-off-by: Linus Torvalds --- include/linux/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 662e6a10144..9d4f6a96393 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -393,7 +393,7 @@ extern void device_unregister(struct device * dev); extern void device_initialize(struct device * dev); extern int __must_check device_add(struct device * dev); extern void device_del(struct device * dev); -extern int __must_check device_for_each_child(struct device *, void *, +extern int device_for_each_child(struct device *, void *, int (*fn)(struct device *, void *)); extern int device_rename(struct device *dev, char *new_name); -- cgit v1.2.3 From 735a7ffb739b6efeaeb1e720306ba308eaaeb20e Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 27 Oct 2006 11:42:37 -0700 Subject: [PATCH] drivers: wait for threaded probes between initcall levels The multithreaded-probing code has a problem: after one initcall level (eg, core_initcall) has been processed, we will then start processing the next level (postcore_initcall) while the kernel threads which are handling core_initcall are still executing. This breaks the guarantees which the layered initcalls previously gave us. IOW, we want to be multithreaded _within_ an initcall level, but not between different levels. Fix that up by causing the probing code to wait for all outstanding probes at one level to complete before we start processing the next level. Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init.h | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index e92b1455d7a..ff40ea118e3 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -84,19 +84,29 @@ extern void setup_arch(char **); * by link order. * For backwards compatibility, initcall() puts the call in * the device init subsection. + * + * The `id' arg to __define_initcall() is needed so that multiple initcalls + * can point at the same handler without causing duplicate-symbol build errors. */ -#define __define_initcall(level,fn) \ - static initcall_t __initcall_##fn __attribute_used__ \ +#define __define_initcall(level,fn,id) \ + static initcall_t __initcall_##fn##id __attribute_used__ \ __attribute__((__section__(".initcall" level ".init"))) = fn -#define core_initcall(fn) __define_initcall("1",fn) -#define postcore_initcall(fn) __define_initcall("2",fn) -#define arch_initcall(fn) __define_initcall("3",fn) -#define subsys_initcall(fn) __define_initcall("4",fn) -#define fs_initcall(fn) __define_initcall("5",fn) -#define device_initcall(fn) __define_initcall("6",fn) -#define late_initcall(fn) __define_initcall("7",fn) +#define core_initcall(fn) __define_initcall("1",fn,1) +#define core_initcall_sync(fn) __define_initcall("1s",fn,1s) +#define postcore_initcall(fn) __define_initcall("2",fn,2) +#define postcore_initcall_sync(fn) __define_initcall("2s",fn,2s) +#define arch_initcall(fn) __define_initcall("3",fn,3) +#define arch_initcall_sync(fn) __define_initcall("3s",fn,3s) +#define subsys_initcall(fn) __define_initcall("4",fn,4) +#define subsys_initcall_sync(fn) __define_initcall("4s",fn,4s) +#define fs_initcall(fn) __define_initcall("5",fn,5) +#define fs_initcall_sync(fn) __define_initcall("5s",fn,5s) +#define device_initcall(fn) __define_initcall("6",fn,6) +#define device_initcall_sync(fn) __define_initcall("6s",fn,6s) +#define late_initcall(fn) __define_initcall("7",fn,7) +#define late_initcall_sync(fn) __define_initcall("7s",fn,7s) #define __initcall(fn) device_initcall(fn) -- cgit v1.2.3 From 2ae88149a27cadf2840e0ab8155bef13be285c03 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 28 Oct 2006 10:38:23 -0700 Subject: [PATCH] mm: clean up pagecache allocation - Consolidate page_cache_alloc - Fix splice: only the pagecache pages and filesystem data need to use mapping_gfp_mask. - Fix grab_cache_page_nowait: same as splice, also honour NUMA placement. Signed-off-by: Nick Piggin Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 64f95092515..c3e255bf859 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -52,19 +52,23 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) void release_pages(struct page **pages, int nr, int cold); #ifdef CONFIG_NUMA -extern struct page *page_cache_alloc(struct address_space *x); -extern struct page *page_cache_alloc_cold(struct address_space *x); +extern struct page *__page_cache_alloc(gfp_t gfp); #else +static inline struct page *__page_cache_alloc(gfp_t gfp) +{ + return alloc_pages(gfp, 0); +} +#endif + static inline struct page *page_cache_alloc(struct address_space *x) { - return alloc_pages(mapping_gfp_mask(x), 0); + return __page_cache_alloc(mapping_gfp_mask(x)); } static inline struct page *page_cache_alloc_cold(struct address_space *x) { - return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0); + return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); } -#endif typedef int filler_t(void *, struct page *); -- cgit v1.2.3 From 3bb1a852ab6c9cdf211a2f4a2f502340c8c38eca Mon Sep 17 00:00:00 2001 From: Martin Bligh Date: Sat, 28 Oct 2006 10:38:24 -0700 Subject: [PATCH] vmscan: Fix temp_priority race The temp_priority field in zone is racy, as we can walk through a reclaim path, and just before we copy it into prev_priority, it can be overwritten (say with DEF_PRIORITY) by another reclaimer. The same bug is contained in both try_to_free_pages and balance_pgdat, but it is fixed slightly differently. In balance_pgdat, we keep a separate priority record per zone in a local array. In try_to_free_pages there is no need to do this, as the priority level is the same for all zones that we reclaim from. Impact of this bug is that temp_priority is copied into prev_priority, and setting this artificially high causes reclaimers to set distress artificially low. They then fail to reclaim mapped pages, when they are, in fact, under severe memory pressure (their priority may be as low as 0). This causes the OOM killer to fire incorrectly. From: Andrew Morton __zone_reclaim() isn't modifying zone->prev_priority. But zone->prev_priority is used in the decision whether or not to bring mapped pages onto the inactive list. Hence there's a risk here that __zone_reclaim() will fail because zone->prev_priority ir large (ie: low urgency) and lots of mapped pages end up stuck on the active list. Fix that up by decreasing (ie making more urgent) zone->prev_priority as __zone_reclaim() scans the zone's pages. This bug perhaps explains why ZONE_RECLAIM_PRIORITY was created. It should be possible to remove that now, and to just start out at DEF_PRIORITY? Cc: Nick Piggin Cc: Christoph Lameter Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index ed0762b283a..e06683e2bea 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -218,13 +218,9 @@ struct zone { * under - it drives the swappiness decision: whether to unmap mapped * pages. * - * temp_priority is used to remember the scanning priority at which - * this zone was successfully refilled to free_pages == pages_high. - * - * Access to both these fields is quite racy even on uniprocessor. But + * Access to both this field is quite racy even on uniprocessor. But * it is expected to average out OK. */ - int temp_priority; int prev_priority; -- cgit v1.2.3 From 52fd24ca1db3a741f144bbc229beefe044202cac Mon Sep 17 00:00:00 2001 From: Giridhar Pemmasani Date: Sat, 28 Oct 2006 10:38:34 -0700 Subject: [PATCH] __vmalloc with GFP_ATOMIC causes 'sleeping from invalid context' If __vmalloc is called to allocate memory with GFP_ATOMIC in atomic context, the chain of calls results in __get_vm_area_node allocating memory for vm_struct with GFP_KERNEL, causing the 'sleeping from invalid context' warning. This patch fixes it by passing the gfp flags along so __get_vm_area_node allocates memory for vm_struct with the same flags. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ce5f1482e6b..dc9a29d84ab 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -60,7 +60,8 @@ extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end); extern struct vm_struct *get_vm_area_node(unsigned long size, - unsigned long flags, int node); + unsigned long flags, int node, + gfp_t gfp_mask); extern struct vm_struct *remove_vm_area(void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages); -- cgit v1.2.3 From 5fa3839a64203b2ab727dcb37da9b2d7079fca28 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Sat, 28 Oct 2006 10:38:46 -0700 Subject: [PATCH] Constify compat_get_bitmap argument This means we can call it when the bitmap we want to fetch is declared const. Signed-off-by: Stephen Rothwell Cc: Christoph Lameter Cc: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f4ebf96f530..f1553196826 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -196,7 +196,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, #define BITS_TO_COMPAT_LONGS(bits) \ (((bits)+BITS_PER_COMPAT_LONG-1)/BITS_PER_COMPAT_LONG) -long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask, +long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask, unsigned long bitmap_size); long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask, unsigned long bitmap_size); -- cgit v1.2.3 From 093a8e8aecd77b2799934996a55a6838e1e2b8f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:51 -0700 Subject: [PATCH] taskstats_tgid_free: fix usage taskstats_tgid_free() is called on copy_process's error path. This is wrong. IF (clone_flags & CLONE_THREAD) We should not clear ->signal->taskstats, current uses it, it probably has a valid accumulated info. ELSE taskstats_tgid_init() set ->signal->taskstats = NULL, there is nothing to free. Move the callsite to __exit_signal(). We don't need any locking, entire thread group is exiting, nobody should have a reference to soon to be released ->signal. Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats_kern.h | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 16894b7edcc..a437ca0d226 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -49,17 +49,8 @@ static inline void taskstats_tgid_alloc(struct signal_struct *sig) static inline void taskstats_tgid_free(struct signal_struct *sig) { - struct taskstats *stats = NULL; - unsigned long flags; - - spin_lock_irqsave(&sig->stats_lock, flags); - if (sig->stats) { - stats = sig->stats; - sig->stats = NULL; - } - spin_unlock_irqrestore(&sig->stats_lock, flags); - if (stats) - kmem_cache_free(taskstats_cache, stats); + if (sig->stats) + kmem_cache_free(taskstats_cache, sig->stats); } extern void taskstats_exit_alloc(struct taskstats **, unsigned int *); -- cgit v1.2.3 From 17b02695b254aa2ef0e53df9c8e6548f86e66a9d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:52 -0700 Subject: [PATCH] taskstats_tgid_alloc: optimization Every subthread (except first) does unneeded kmem_cache_alloc/kmem_cache_free. Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats_kern.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index a437ca0d226..664224008fb 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -32,6 +32,9 @@ static inline void taskstats_tgid_alloc(struct signal_struct *sig) struct taskstats *stats; unsigned long flags; + if (sig->stats != NULL) + return; + stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); if (!stats) return; -- cgit v1.2.3 From b8534d7bd89df0cd41cd47bcd6733a05ea9a691a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sat, 28 Oct 2006 10:38:53 -0700 Subject: [PATCH] taskstats: kill ->taskstats_lock in favor of ->siglock signal_struct is (mostly) protected by ->sighand->siglock, I think we don't need ->taskstats_lock to protect ->stats. This also allows us to simplify the locking in fill_tgid(). Signed-off-by: Oleg Nesterov Cc: Shailabh Nagar Cc: Balbir Singh Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - include/linux/taskstats_kern.h | 15 ++++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6735c1cf334..eafe4a7b823 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -466,7 +466,6 @@ struct signal_struct { struct pacct_struct pacct; /* per-process accounting information */ #endif #ifdef CONFIG_TASKSTATS - spinlock_t stats_lock; struct taskstats *stats; #endif }; diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 664224008fb..6562a2050a2 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -23,28 +23,26 @@ static inline void taskstats_exit_free(struct taskstats *tidstats) static inline void taskstats_tgid_init(struct signal_struct *sig) { - spin_lock_init(&sig->stats_lock); sig->stats = NULL; } -static inline void taskstats_tgid_alloc(struct signal_struct *sig) +static inline void taskstats_tgid_alloc(struct task_struct *tsk) { + struct signal_struct *sig = tsk->signal; struct taskstats *stats; - unsigned long flags; if (sig->stats != NULL) return; + /* No problem if kmem_cache_zalloc() fails */ stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL); - if (!stats) - return; - spin_lock_irqsave(&sig->stats_lock, flags); + spin_lock_irq(&tsk->sighand->siglock); if (!sig->stats) { sig->stats = stats; stats = NULL; } - spin_unlock_irqrestore(&sig->stats_lock, flags); + spin_unlock_irq(&tsk->sighand->siglock); if (stats) kmem_cache_free(taskstats_cache, stats); @@ -59,7 +57,6 @@ static inline void taskstats_tgid_free(struct signal_struct *sig) extern void taskstats_exit_alloc(struct taskstats **, unsigned int *); extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int); extern void taskstats_init_early(void); -extern void taskstats_tgid_alloc(struct signal_struct *); #else static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu) {} @@ -71,7 +68,7 @@ static inline void taskstats_exit_send(struct task_struct *tsk, {} static inline void taskstats_tgid_init(struct signal_struct *sig) {} -static inline void taskstats_tgid_alloc(struct signal_struct *sig) +static inline void taskstats_tgid_alloc(struct task_struct *tsk) {} static inline void taskstats_tgid_free(struct signal_struct *sig) {} -- cgit v1.2.3 From 7259f0d05d595b73ef312a082e628627c6414969 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 29 Oct 2006 22:46:36 -0800 Subject: [PATCH] lockdep: annotate DECLARE_WAIT_QUEUE_HEAD kernel: INFO: trying to register non-static key. kernel: the code is fine but needs lockdep annotation. kernel: turning off the locking correctness validator. kernel: [] show_trace_log_lvl+0x58/0x16a kernel: [] show_trace+0xd/0x10 kernel: [] dump_stack+0x19/0x1b kernel: [] __lock_acquire+0xf0/0x90d kernel: [] lock_acquire+0x4b/0x6b kernel: [] _spin_lock_irqsave+0x22/0x32 kernel: [] prepare_to_wait+0x17/0x4b kernel: [] lpfc_do_work+0xdd/0xcc2 [lpfc] kernel: [] kthread+0xc3/0xf2 kernel: [] kernel_thread_helper+0x5/0xb Another case of non-static lockdep keys; duplicate the paradigm set by DECLARE_COMPLETION_ONSTACK and introduce DECLARE_WAIT_QUEUE_HEAD_ONSTACK. Signed-off-by: Peter Zijlstra Cc: Greg KH Cc: Markus Lidel Acked-by: Ingo Molnar Cc: Arjan van de Ven Cc: James Bottomley Cc: Marcel Holtmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/wait.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index b3b9048421d..e820d00e138 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -79,6 +79,15 @@ struct task_struct; extern void init_waitqueue_head(wait_queue_head_t *q); +#ifdef CONFIG_LOCKDEP +# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ + ({ init_waitqueue_head(&name); name; }) +# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \ + wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) +#else +# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) +#endif + static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) { q->flags = 0; -- cgit v1.2.3 From 351edd240d0ba8620789ca9e24f5a38b62157f23 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 29 Oct 2006 22:46:40 -0800 Subject: [PATCH] MTD: fix last kernel-doc warning Fix the last current kernel-doc warning: Warning(/var/linsrc/linux-2619-rc3g5//include/linux/mtd/nand.h:416): No description found for parameter 'write_page' Signed-off-by: Randy Dunlap Cc: David Woodhouse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 70420bbae82..8b3ef418721 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -355,7 +355,7 @@ struct nand_buffers { * @priv: [OPTIONAL] pointer to private chip date * @errstat: [OPTIONAL] hardware specific function to perform additional error status checks * (determine if errors are correctable) - * @write_page [REPLACEABLE] High-level page write function + * @write_page: [REPLACEABLE] High-level page write function */ struct nand_chip { -- cgit v1.2.3 From 6e42acc4115bc376b8523acbcba2b2b7cc27d016 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 27 Oct 2006 19:08:42 -0700 Subject: [PATCH] libata: unexport ata_dev_revalidate() ata_dev_revalidate() isn't used outside of libata core. Unexport it. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index b03d5a340dc..abd2debebca 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -702,7 +702,6 @@ extern int ata_std_prereset(struct ata_port *ap); extern int ata_std_softreset(struct ata_port *ap, unsigned int *classes); extern int sata_std_hardreset(struct ata_port *ap, unsigned int *class); extern void ata_std_postreset(struct ata_port *ap, unsigned int *classes); -extern int ata_dev_revalidate(struct ata_device *dev, int post_reset); extern void ata_port_disable(struct ata_port *); extern void ata_std_ports(struct ata_ioports *ioaddr); #ifdef CONFIG_PCI -- cgit v1.2.3 From 4fa2eeeac5e13a8579ee45bc172eed690d28fbb7 Mon Sep 17 00:00:00 2001 From: Peer Chen Date: Thu, 2 Nov 2006 18:55:48 -0500 Subject: pci_ids.h: Add NVIDIA PCI ID Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index f3a168f3c9d..fa4e1d79978 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1213,6 +1213,7 @@ #define PCI_DEVICE_ID_NVIDIA_NVENET_21 0x0451 #define PCI_DEVICE_ID_NVIDIA_NVENET_22 0x0452 #define PCI_DEVICE_ID_NVIDIA_NVENET_23 0x0453 +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE 0x0560 #define PCI_VENDOR_ID_IMS 0x10e0 #define PCI_DEVICE_ID_IMS_TT128 0x9128 -- cgit v1.2.3 From 86f4f0f9ba6e35fbbc409dfc3d8615c1a9822482 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 2 Nov 2006 22:07:05 -0800 Subject: [PATCH] fix UFS superblock alignment issues ufs2 fails to mount on x86_64, claiming bad magic. This is because ufs_super_block_third's fs_un1 member is padded out by 4 bytes for 8-byte alignment, pushing down the rest of the struct. Forcing this to be packed solves it. I took a quick look over other on-disk structures and didn't immediately find other problems. I was able to mount & ls a populated ufs2 filesystem w/ this change. Signed-off-by: Eric Sandeen Cc: Evgeniy Dushistov Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ufs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h index 61eef508b04..28967eda9d7 100644 --- a/include/linux/ufs_fs.h +++ b/include/linux/ufs_fs.h @@ -908,7 +908,7 @@ struct ufs_super_block_third { __fs64 fs_csaddr; /* blk addr of cyl grp summary area */ __fs64 fs_pendingblocks;/* blocks in process of being freed */ __fs32 fs_pendinginodes;/*inodes in process of being freed */ - } fs_u2; + } __attribute__ ((packed)) fs_u2; } fs_un1; union { struct { -- cgit v1.2.3 From f46c483357c2d87606bbefb511321e3efd4baae0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 2 Nov 2006 22:07:16 -0800 Subject: [PATCH] Add printk_timed_ratelimit() printk_ratelimit() has global state which makes it not useful for callers which wish to perform ratelimiting at a particular frequency. Add a printk_timed_ratelimit() which utilises caller-provided state storage to permit more flexibility. This function can in fact be used for things other than printk ratelimiting and is perhaps poorly named. Cc: Ulrich Drepper Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 80f39cab470..24b611147ad 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -171,6 +171,8 @@ __attribute_const__ roundup_pow_of_two(unsigned long x) extern int printk_ratelimit(void); extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); +extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, + unsigned int interval_msec); static inline void console_silent(void) { -- cgit v1.2.3 From b918f6e62cd46774f9fc0a3fbba6bd10ad85ee14 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 2 Nov 2006 22:07:19 -0800 Subject: [PATCH] swsusp: debugging Add a swsusp debugging mode. This does everything that's needed for a suspend except for actually suspending. So we can look in the log messages and work out a) what code is being slow and b) which drivers are misbehaving. (1) # echo testproc > /sys/power/disk # echo disk > /sys/power/state This should turn off the non-boot CPU, freeze all processes, wait for 5 seconds and then thaw the processes and the CPU. (2) # echo test > /sys/power/disk # echo disk > /sys/power/state This should turn off the non-boot CPU, freeze all processes, shrink memory, suspend all devices, wait for 5 seconds, resume the devices etc. Cc: Pavel Machek Cc: Stefan Seyfried Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 6b27e07aef1..070394e846d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -116,7 +116,9 @@ typedef int __bitwise suspend_disk_method_t; #define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 2) #define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 3) #define PM_DISK_REBOOT ((__force suspend_disk_method_t) 4) -#define PM_DISK_MAX ((__force suspend_disk_method_t) 5) +#define PM_DISK_TEST ((__force suspend_disk_method_t) 5) +#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 6) +#define PM_DISK_MAX ((__force suspend_disk_method_t) 7) struct pm_ops { suspend_disk_method_t pm_disk_mode; -- cgit v1.2.3 From 3fd593979802f81ff6452596ac61e3840f917589 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 2 Nov 2006 22:07:24 -0800 Subject: [PATCH] Create compat_sys_migrate_pages This is needed on bigendian 64bit architectures. Signed-off-by: Stephen Rothwell Acked-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index f1553196826..80b17f440ec 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -230,5 +230,9 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern int compat_printk(const char *fmt, ...); extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); +asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, + compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, + const compat_ulong_t __user *new_nodes); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ -- cgit v1.2.3 From 4833ed094097323f5f219820f6ebdc8dd66f501f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 3 Nov 2006 00:27:06 -0800 Subject: [IPX]: Trivial parts of endianness annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/ipx.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipx.h b/include/linux/ipx.h index 4f29c60964c..eb19b4ea84f 100644 --- a/include/linux/ipx.h +++ b/include/linux/ipx.h @@ -7,8 +7,8 @@ struct sockaddr_ipx { sa_family_t sipx_family; - __u16 sipx_port; - __u32 sipx_network; + __be16 sipx_port; + __be32 sipx_network; unsigned char sipx_node[IPX_NODE_LEN]; __u8 sipx_type; unsigned char sipx_zero; /* 16 byte fill */ @@ -23,13 +23,13 @@ struct sockaddr_ipx { #define IPX_CRTITF 1 struct ipx_route_definition { - __u32 ipx_network; - __u32 ipx_router_network; + __be32 ipx_network; + __be32 ipx_router_network; unsigned char ipx_router_node[IPX_NODE_LEN]; }; struct ipx_interface_definition { - __u32 ipx_network; + __be32 ipx_network; unsigned char ipx_device[16]; unsigned char ipx_dlink_type; #define IPX_FRAME_NONE 0 @@ -55,8 +55,8 @@ struct ipx_config_data { */ struct ipx_route_def { - __u32 ipx_network; - __u32 ipx_router_network; + __be32 ipx_network; + __be32 ipx_router_network; #define IPX_ROUTE_NO_ROUTER 0 unsigned char ipx_router_node[IPX_NODE_LEN]; unsigned char ipx_device[16]; -- cgit v1.2.3 From d99f160ac53e51090f015a8f0617cea25f81a191 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 5 Nov 2006 23:52:12 -0800 Subject: [PATCH] sysctl: allow a zero ctl_name in the middle of a sysctl table Since it is becoming clear that there are just enough users of the binary sysctl interface that completely removing the binary interface from the kernel will not be an option for foreseeable future, we need to find a way to address the sysctl maintenance issues. The basic problem is that sysctl requires one central authority to allocate sysctl numbers, or else conflicts and ABI breakage occur. The proc interface to sysctl does not have that problem, as names are not densely allocated. By not terminating a sysctl table until I have neither a ctl_name nor a procname, it becomes simple to add sysctl entries that don't show up in the binary sysctl interface. Which allows people to avoid allocating a binary sysctl value when not needed. I have audited the kernel code and in my reading I have not found a single sysctl table that wasn't terminated by a completely zero filled entry. So this change in behavior should not affect anything. I think this mechanism eases the pain enough that combined with a little disciple we can solve the reoccurring sysctl ABI breakage. Signed-off-by: Eric W. Biederman Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1b24bd45e08..c184732a70f 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -961,8 +961,8 @@ extern ctl_handler sysctl_ms_jiffies; /* * Register a set of sysctl names by calling register_sysctl_table * with an initialised array of ctl_table's. An entry with zero - * ctl_name terminates the table. table->de will be set up by the - * registration and need not be initialised in advance. + * ctl_name and NULL procname terminates the table. table->de will be + * set up by the registration and need not be initialised in advance. * * sysctl names can be mirrored automatically under /proc/sys. The * procname supplied controls /proc naming. @@ -973,7 +973,10 @@ extern ctl_handler sysctl_ms_jiffies; * Leaf nodes in the sysctl tree will be represented by a single file * under /proc; non-leaf nodes will be represented by directories. A * null procname disables /proc mirroring at this node. - * + * + * sysctl entries with a zero ctl_name will not be available through + * the binary sysctl interface. + * * sysctl(2) can automatically manage read and write requests through * the sysctl table. The data and maxlen fields of the ctl_table * struct enable minimal validation of the values being written to be -- cgit v1.2.3 From 7cc13edc139108bb527b692f0548dce6bc648572 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 5 Nov 2006 23:52:13 -0800 Subject: [PATCH] sysctl: implement CTL_UNNUMBERED This patch takes the CTL_UNNUMBERD concept from NFS and makes it available to all new sysctl users. At the same time the sysctl binary interface maintenance documentation is updated to mention and to describe what is needed to successfully maintain the sysctl binary interface. Signed-off-by: Eric W. Biederman Acked-by: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sysctl.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index c184732a70f..d98562f1df7 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -6,10 +6,17 @@ **************************************************************** **************************************************************** ** + ** WARNING: ** The values in this file are exported to user space via - ** the sysctl() binary interface. However this interface - ** is unstable and deprecated and will be removed in the future. - ** For a stable interface use /proc/sys. + ** the sysctl() binary interface. Do *NOT* change the + ** numbering of any existing values here, and do not change + ** any numbers within any one set of values. If you have to + ** have to redefine an existing interface, use a new number for it. + ** The kernel will then return -ENOTDIR to any application using + ** the old binary interface. + ** + ** For new interfaces unless you really need a binary number + ** please use CTL_UNNUMBERED. ** **************************************************************** **************************************************************** @@ -48,6 +55,7 @@ struct __sysctl_args { #ifdef __KERNEL__ #define CTL_ANY -1 /* Matches any name */ #define CTL_NONE 0 +#define CTL_UNNUMBERED CTL_NONE /* sysctl without a binary number */ #endif enum -- cgit v1.2.3 From 81ac95c5569d7a60ab5db6c1ccec56c12b3ebcb5 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Nov 2006 17:44:40 -0800 Subject: [PATCH] nfsd4: fix open-create permissions In the case where an open creates the file, we shouldn't be rechecking permissions to open the file; the open succeeds regardless of what the new file's mode bits say. This patch fixes the problem, but only by introducing yet another parameter to nfsd_create_v3. This is ugly. This will be fixed by later patches. Signed-off-by: J. Bruce Fields Acked-by: Neil Brown Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nfsd/nfsd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index eb231143d57..edb54c3171b 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -89,7 +89,7 @@ __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); __be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, - u32 *verifier, int *truncp); + u32 *verifier, int *truncp, int *created); __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, loff_t, unsigned long); #endif /* CONFIG_NFSD_V3 */ -- cgit v1.2.3 From 46d52b09fa6a2d1e313cb75ca352d6f466e67bd1 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 8 Nov 2006 17:44:55 -0800 Subject: [PATCH] IPMI: retry messages on certain error returns Some more errors from the IPMI send message command are retryable, but are not being retried by the IPMI code. Make sure they get retried. Signed-off-by: Corey Minyard Cc: Frederic Lelievre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi_msgdefs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h index 22f5e2afda4..4d04d8b58a0 100644 --- a/include/linux/ipmi_msgdefs.h +++ b/include/linux/ipmi_msgdefs.h @@ -75,6 +75,8 @@ #define IPMI_INVALID_COMMAND_ERR 0xc1 #define IPMI_ERR_MSG_TRUNCATED 0xc6 #define IPMI_LOST_ARBITRATION_ERR 0x81 +#define IPMI_BUS_ERR 0x82 +#define IPMI_NAK_ON_WRITE_ERR 0x83 #define IPMI_ERR_UNSPECIFIED 0xff #define IPMI_CHANNEL_PROTOCOL_IPMB 1 -- cgit v1.2.3 From ec68307cc5a8dc499e48693843bb42f6b6028458 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 8 Nov 2006 17:44:57 -0800 Subject: [PATCH] htirq: refactor so we only have one function that writes to the chip This refactoring actually optimizes the code a little by caching the value that we think the device is programmed with instead of reading it back from the hardware. Which simplifies the code a little and should speed things up a bit. This patch introduces the concept of a ht_irq_msg and modifies the architecture read/write routines to update this code. There is a minor consistency fix here as well as x86_64 forgot to initialize the htirq as masked. Signed-off-by: Eric W. Biederman Cc: Andi Kleen Acked-by: Bryan O'Sullivan Cc: Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/htirq.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 1f15ce279a2..108f0d91e11 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -1,11 +1,14 @@ #ifndef LINUX_HTIRQ_H #define LINUX_HTIRQ_H +struct ht_irq_msg { + u32 address_lo; /* low 32 bits of the ht irq message */ + u32 address_hi; /* high 32 bits of the it irq message */ +}; + /* Helper functions.. */ -void write_ht_irq_low(unsigned int irq, u32 data); -void write_ht_irq_high(unsigned int irq, u32 data); -u32 read_ht_irq_low(unsigned int irq); -u32 read_ht_irq_high(unsigned int irq); +void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); +void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); void mask_ht_irq(unsigned int irq); void unmask_ht_irq(unsigned int irq); -- cgit v1.2.3 From 43539c38cd8edb915d1f0e1f55dcb70638b4cc8e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 8 Nov 2006 17:44:57 -0800 Subject: [PATCH] htirq: allow buggy drivers of buggy hardware to write the registers This patch adds a variant of ht_create_irq __ht_create_irq that takes an aditional parameter update that is a function that is called whenever we want to write to a drivers htirq configuration registers. This is needed to support the ipath_iba6110 because it's registers in the proper location are not actually conected to the hardware that controlls interrupt delivery. [bos@serpentine.com: fixes] Signed-off-by: Eric W. Biederman Cc: Andi Kleen Cc: Cc: Roland Dreier Signed-off-by: Bryan O'Sullivan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/htirq.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/htirq.h b/include/linux/htirq.h index 108f0d91e11..c96ea46737d 100644 --- a/include/linux/htirq.h +++ b/include/linux/htirq.h @@ -15,4 +15,9 @@ void unmask_ht_irq(unsigned int irq); /* The arch hook for getting things started */ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev); +/* For drivers of buggy hardware */ +typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, + struct ht_irq_msg *msg); +int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); + #endif /* LINUX_HTIRQ_H */ -- cgit v1.2.3 From 2b4ac44e7c7e16cf9411b81693ff3e604f332bf1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 10 Nov 2006 12:27:48 -0800 Subject: [PATCH] vmalloc: optimization, cleanup, bugfixes - reorder 'struct vm_struct' to speedup lookups on CPUS with small cache lines. The fields 'next,addr,size' should be now in the same cache line, to speedup lookups. - One minor cleanup in __get_vm_area_node() - Bugfixes in vmalloc_user() and vmalloc_32_user() NULL returns from __vmalloc() and __find_vm_area() were not tested. [akpm@osdl.org: remove redundant BUG_ONs] Signed-off-by: Eric Dumazet Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index dc9a29d84ab..924e502905d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -23,13 +23,14 @@ struct vm_area_struct; #endif struct vm_struct { + /* keep next,addr,size together to speedup lookups */ + struct vm_struct *next; void *addr; unsigned long size; unsigned long flags; struct page **pages; unsigned int nr_pages; unsigned long phys_addr; - struct vm_struct *next; }; /* -- cgit v1.2.3 From d8b295f29091310d746509bb6d5828aaf4907a18 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 10 Nov 2006 12:27:53 -0800 Subject: [PATCH] Fix missing parens in set_personality() If you call set_personality() with an expression such as: set_personality(foo ? PERS_FOO1 : PERS_FOO2); then this evaluates to: ((current->personality == foo ? PERS_FOO1 : PERS_FOO2) ? ... which is obviously not the intended result. Add the missing parents to ensure this gets evaluated as expected: ((current->personality == (foo ? PERS_FOO1 : PERS_FOO2)) ? ... Signed-off-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/personality.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/personality.h b/include/linux/personality.h index bf4cf2080e5..012cd558189 100644 --- a/include/linux/personality.h +++ b/include/linux/personality.h @@ -114,7 +114,7 @@ struct exec_domain { * Change personality of the currently running process. */ #define set_personality(pers) \ - ((current->personality == pers) ? 0 : __set_personality(pers)) + ((current->personality == (pers)) ? 0 : __set_personality(pers)) #endif /* __KERNEL__ */ -- cgit v1.2.3 From 68589bc353037f233fe510ad9ff432338c95db66 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 14 Nov 2006 02:03:32 -0800 Subject: [PATCH] hugetlb: prepare_hugepage_range check offset too (David:) If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example, because the given file offset is not hugepage aligned - then do_mmap_pgoff will go to the unmap_and_free_vma backout path. But at this stage the vma hasn't been marked as hugepage, and the backout path will call unmap_region() on it. That will eventually call down to the non-hugepage version of unmap_page_range(). On ppc64, at least, that will cause serious problems if there are any existing hugepage pagetable entries in the vicinity - for example if there are any other hugepage mappings under the same PUD. unmap_page_range() will trigger a bad_pud() on the hugepage pud entries. I suspect this will also cause bad problems on ia64, though I don't have a machine to test it on. (Hugh:) prepare_hugepage_range() should check file offset alignment when it checks virtual address and length, to stop MAP_FIXED with a bad huge offset from unmapping before it fails further down. PowerPC should apply the same prepare_hugepage_range alignment checks as ia64 and all the others do. Then none of the alignment checks in hugetlbfs_file_mmap are required (nor is the check for too small a mapping); but even so, move up setting of VM_HUGETLB and add a comment to warn of what David Gibson discovered - if hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region when unwinding from error will go the non-huge way, which may cause bad behaviour on architectures (powerpc and ia64) which segregate their huge mappings into a separate region of the address space. Signed-off-by: Hugh Dickins Cc: "Luck, Tony" Cc: "David S. Miller" Acked-by: Adam Litke Acked-by: David Gibson Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5081d27bfa2..ace64e57e17 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -60,8 +60,11 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr, * If the arch doesn't supply something else, assume that hugepage * size aligned regions are ok without further preparation. */ -static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) +static inline int prepare_hugepage_range(unsigned long addr, unsigned long len, + pgoff_t pgoff) { + if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT)) + return -EINVAL; if (len & ~HPAGE_MASK) return -EINVAL; if (addr & ~HPAGE_MASK) @@ -69,7 +72,8 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len) return 0; } #else -int prepare_hugepage_range(unsigned long addr, unsigned long len); +int prepare_hugepage_range(unsigned long addr, unsigned long len, + pgoff_t pgoff); #endif #ifndef ARCH_HAS_SETCLEAR_HUGE_PTE @@ -107,7 +111,7 @@ static inline unsigned long hugetlb_total_pages(void) #define hugetlb_report_meminfo(buf) 0 #define hugetlb_report_node_meminfo(n, buf) 0 #define follow_huge_pmd(mm, addr, pmd, write) NULL -#define prepare_hugepage_range(addr, len) (-EINVAL) +#define prepare_hugepage_range(addr,len,pgoff) (-EINVAL) #define pmd_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) -- cgit v1.2.3 From b96e7ecbd052a0916b6078e7600604d7e276a336 Mon Sep 17 00:00:00 2001 From: Yasuyuki Kozakai Date: Tue, 14 Nov 2006 19:48:48 -0800 Subject: [NETFILTER]: ip6_tables: fixed conflicted optname for getsockopt 66 and 67 for getsockopt on IPv6 socket is doubly used for IPv6 Advanced API and ip6tables. This moves numbers for ip6tables to 68 and 69. This also kills XT_SO_* because {ip,ip6,arp}_tables doesn't have so much common numbers now. The old userland tools keep to behave as ever, because old kernel always calls functions of IPv6 Advanced API for their numbers. Signed-off-by: Yasuyuki Kozakai Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/in6.h | 12 +++++++++++- include/linux/netfilter/x_tables.h | 16 ---------------- include/linux/netfilter_arp/arp_tables.h | 25 +++++++++++++------------ include/linux/netfilter_ipv4/ip_tables.h | 27 +++++++++++++++------------ include/linux/netfilter_ipv6/ip6_tables.h | 27 +++++++++++++++------------ 5 files changed, 54 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index 9be6a4756f0..f28621f638e 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -225,7 +225,7 @@ struct in6_flowlabel_req #endif /* - * Netfilter + * Netfilter (1) * * Following socket options are used in ip6_tables; * see include/linux/netfilter_ipv6/ip6_tables.h. @@ -240,4 +240,14 @@ struct in6_flowlabel_req #define IPV6_RECVTCLASS 66 #define IPV6_TCLASS 67 +/* + * Netfilter (2) + * + * Following socket options are used in ip6_tables; + * see include/linux/netfilter_ipv6/ip6_tables.h. + * + * IP6T_SO_GET_REVISION_MATCH 68 + * IP6T_SO_GET_REVISION_TARGET 69 + */ + #endif diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 04319a76103..022edfa97ed 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -96,22 +96,6 @@ struct _xt_align /* Error verdict. */ #define XT_ERROR_TARGET "ERROR" -/* - * New IP firewall options for [gs]etsockopt at the RAW IP level. - * Unlike BSD Linux inherits IP options so you don't have to use a raw - * socket for this. Instead we check rights in the calls. */ -#define XT_BASE_CTL 64 /* base for firewall socket options */ - -#define XT_SO_SET_REPLACE (XT_BASE_CTL) -#define XT_SO_SET_ADD_COUNTERS (XT_BASE_CTL + 1) -#define XT_SO_SET_MAX XT_SO_SET_ADD_COUNTERS - -#define XT_SO_GET_INFO (XT_BASE_CTL) -#define XT_SO_GET_ENTRIES (XT_BASE_CTL + 1) -#define XT_SO_GET_REVISION_MATCH (XT_BASE_CTL + 2) -#define XT_SO_GET_REVISION_TARGET (XT_BASE_CTL + 3) -#define XT_SO_GET_MAX XT_SO_GET_REVISION_TARGET - #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index 44e39b61d9e..0be235418a2 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -112,19 +112,20 @@ struct arpt_entry * New IP firewall options for [gs]etsockopt at the RAW IP level. * Unlike BSD Linux inherits IP options so you don't have to use a raw * socket for this. Instead we check rights in the calls. + * + * ATTENTION: check linux/in.h before adding new number here. */ -#define ARPT_CTL_OFFSET 32 -#define ARPT_BASE_CTL (XT_BASE_CTL+ARPT_CTL_OFFSET) - -#define ARPT_SO_SET_REPLACE (XT_SO_SET_REPLACE+ARPT_CTL_OFFSET) -#define ARPT_SO_SET_ADD_COUNTERS (XT_SO_SET_ADD_COUNTERS+ARPT_CTL_OFFSET) -#define ARPT_SO_SET_MAX (XT_SO_SET_MAX+ARPT_CTL_OFFSET) - -#define ARPT_SO_GET_INFO (XT_SO_GET_INFO+ARPT_CTL_OFFSET) -#define ARPT_SO_GET_ENTRIES (XT_SO_GET_ENTRIES+ARPT_CTL_OFFSET) -/* #define ARPT_SO_GET_REVISION_MATCH XT_SO_GET_REVISION_MATCH */ -#define ARPT_SO_GET_REVISION_TARGET (XT_SO_GET_REVISION_TARGET+ARPT_CTL_OFFSET) -#define ARPT_SO_GET_MAX (XT_SO_GET_REVISION_TARGET+ARPT_CTL_OFFSET) +#define ARPT_BASE_CTL 96 + +#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) +#define ARPT_SO_SET_ADD_COUNTERS (ARPT_BASE_CTL + 1) +#define ARPT_SO_SET_MAX ARPT_SO_SET_ADD_COUNTERS + +#define ARPT_SO_GET_INFO (ARPT_BASE_CTL) +#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) +/* #define ARPT_SO_GET_REVISION_MATCH (APRT_BASE_CTL + 2) */ +#define ARPT_SO_GET_REVISION_TARGET (ARPT_BASE_CTL + 3) +#define ARPT_SO_GET_MAX (ARPT_SO_GET_REVISION_TARGET) /* CONTINUE verdict for targets */ #define ARPT_CONTINUE XT_CONTINUE diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index a536bbdef14..4f06dad0bde 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -101,18 +101,21 @@ struct ipt_entry /* * New IP firewall options for [gs]etsockopt at the RAW IP level. * Unlike BSD Linux inherits IP options so you don't have to use a raw - * socket for this. Instead we check rights in the calls. */ -#define IPT_BASE_CTL XT_BASE_CTL - -#define IPT_SO_SET_REPLACE XT_SO_SET_REPLACE -#define IPT_SO_SET_ADD_COUNTERS XT_SO_SET_ADD_COUNTERS -#define IPT_SO_SET_MAX XT_SO_SET_MAX - -#define IPT_SO_GET_INFO XT_SO_GET_INFO -#define IPT_SO_GET_ENTRIES XT_SO_GET_ENTRIES -#define IPT_SO_GET_REVISION_MATCH XT_SO_GET_REVISION_MATCH -#define IPT_SO_GET_REVISION_TARGET XT_SO_GET_REVISION_TARGET -#define IPT_SO_GET_MAX XT_SO_GET_REVISION_TARGET + * socket for this. Instead we check rights in the calls. + * + * ATTENTION: check linux/in.h before adding new number here. + */ +#define IPT_BASE_CTL 64 + +#define IPT_SO_SET_REPLACE (IPT_BASE_CTL) +#define IPT_SO_SET_ADD_COUNTERS (IPT_BASE_CTL + 1) +#define IPT_SO_SET_MAX IPT_SO_SET_ADD_COUNTERS + +#define IPT_SO_GET_INFO (IPT_BASE_CTL) +#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) +#define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2) +#define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) +#define IPT_SO_GET_MAX IPT_SO_GET_REVISION_TARGET #define IPT_CONTINUE XT_CONTINUE #define IPT_RETURN XT_RETURN diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index d7a8e9c0dad..4aed340401d 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -107,18 +107,21 @@ struct ip6t_entry /* * New IP firewall options for [gs]etsockopt at the RAW IP level. * Unlike BSD Linux inherits IP options so you don't have to use - * a raw socket for this. Instead we check rights in the calls. */ -#define IP6T_BASE_CTL XT_BASE_CTL - -#define IP6T_SO_SET_REPLACE XT_SO_SET_REPLACE -#define IP6T_SO_SET_ADD_COUNTERS XT_SO_SET_ADD_COUNTERS -#define IP6T_SO_SET_MAX XT_SO_SET_MAX - -#define IP6T_SO_GET_INFO XT_SO_GET_INFO -#define IP6T_SO_GET_ENTRIES XT_SO_GET_ENTRIES -#define IP6T_SO_GET_REVISION_MATCH XT_SO_GET_REVISION_MATCH -#define IP6T_SO_GET_REVISION_TARGET XT_SO_GET_REVISION_TARGET -#define IP6T_SO_GET_MAX XT_SO_GET_REVISION_TARGET + * a raw socket for this. Instead we check rights in the calls. + * + * ATTENTION: check linux/in6.h before adding new number here. + */ +#define IP6T_BASE_CTL 64 + +#define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) +#define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) +#define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS + +#define IP6T_SO_GET_INFO (IP6T_BASE_CTL) +#define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) +#define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) +#define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) +#define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET /* CONTINUE verdict for targets */ #define IP6T_CONTINUE XT_CONTINUE -- cgit v1.2.3 From c7835a77c86422d276b0d1a4c70924d933014c13 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Wed, 15 Nov 2006 21:14:42 -0800 Subject: [TG3]: Disable TSO on 5906 if CLKREQ is enabled. Due to hardware errata, TSO must be disabled if the PCI Express clock request is enabled on 5906. The chip may hang when transmitting TSO frames if CLKREQ is enabled. Update version to 3.69. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- include/linux/pci_regs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index c312a12ad2d..c321316f1bc 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -371,6 +371,7 @@ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKCTL_CLKREQ_EN 0x100 /* Enable clkreq */ #define PCI_EXP_LNKSTA 18 /* Link Status */ #define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ #define PCI_EXP_SLTCTL 24 /* Slot Control */ -- cgit v1.2.3 From da63fc7ce63b43426dc3c69c05e28de2872c159a Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Thu, 16 Nov 2006 01:19:28 -0800 Subject: [PATCH] fat: add fat_getattr() This adds fat_getattr() for setting stat->blksize. (FAT uses the size of cluster for proper I/O) Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msdos_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h index ce6c85815cb..24a9ef1506b 100644 --- a/include/linux/msdos_fs.h +++ b/include/linux/msdos_fs.h @@ -402,6 +402,8 @@ extern const struct file_operations fat_file_operations; extern struct inode_operations fat_file_inode_operations; extern int fat_notify_change(struct dentry * dentry, struct iattr * attr); extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); /* fat/inode.c */ extern void fat_attach(struct inode *inode, loff_t i_pos); -- cgit v1.2.3 From 610a5b742e9df4e59047f22d13d8bd83cafce388 Mon Sep 17 00:00:00 2001 From: Yoichi Yuasa Date: Fri, 17 Nov 2006 11:51:41 +1100 Subject: [CRYPTO] api: Remove one too many semicolon This patch has removed one too many semicolon in crypto.h. Signed-off-by: Yoichi Yuasa Signed-off-by: Herbert Xu --- include/linux/crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 8f2ffa4caab..6485e9716b3 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -245,7 +245,7 @@ int crypto_alg_available(const char *name, u32 flags) __deprecated_for_modules; int crypto_has_alg(const char *name, u32 type, u32 mask); #else -static int crypto_alg_available(const char *name, u32 flags); +static int crypto_alg_available(const char *name, u32 flags) __deprecated_for_modules; static inline int crypto_alg_available(const char *name, u32 flags) { -- cgit v1.2.3 From b3438f8266cb1f5010085ac47d7ad6a36a212164 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Nov 2006 11:47:18 -0800 Subject: Add "pure_initcall" for static variable initialization This is a quick hack to overcome the fact that SRCU currently does not allow static initializers, and we need to sometimes initialize those things before any other initializers (even "core" ones) can do so. Currently we don't allow this at all for modules, and the only user that needs is right now is cpufreq. As reported by Thomas Gleixner: "Commit b4dfdbb3c707474a2254c5b4d7e62be31a4b7da9 ("[PATCH] cpufreq: make the transition_notifier chain use SRCU breaks cpu frequency notification users, which register the callback > on core_init level." Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Arjan van de Ven Cc: Andrew Morton , Signed-off-by: Linus Torvalds --- include/linux/init.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index ff40ea118e3..5eb5d24b768 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -93,6 +93,14 @@ extern void setup_arch(char **); static initcall_t __initcall_##fn##id __attribute_used__ \ __attribute__((__section__(".initcall" level ".init"))) = fn +/* + * A "pure" initcall has no dependencies on anything else, and purely + * initializes variables that couldn't be statically initialized. + * + * This only exists for built-in code, not for modules. + */ +#define pure_initcall(fn) __define_initcall("0",fn,1) + #define core_initcall(fn) __define_initcall("1",fn,1) #define core_initcall_sync(fn) __define_initcall("1s",fn,1s) #define postcore_initcall(fn) __define_initcall("2",fn,2) -- cgit v1.2.3 From fb47ddb2db9c18664bd7b06c201a2398885b64fc Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Sun, 19 Nov 2006 10:38:39 -0800 Subject: [IGMP]: Fix IGMPV3_EXP() normalization bit shift value. The IGMPV3_EXP() macro doesn't correctly shift the normalization bit, so time-out values are longer than they should be. Thanks to Dirk Ooms for finding the problem in IGMPv3 - MLDv2 had a similar problem that was already fixed a year ago. :-( Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 03f43e2893a..21dd5690527 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -191,7 +191,7 @@ struct ip_mc_list #define IGMPV3_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value)) #define IGMPV3_EXP(thresh, nbmant, nbexp, value) \ ((value) < (thresh) ? (value) : \ - ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \ + ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant))) << \ (IGMPV3_MASK((value) >> (nbmant), nbexp) + (nbexp)))) #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) -- cgit v1.2.3 From 701e054e0c2db82359f0454c7ed4fd24346d52eb Mon Sep 17 00:00:00 2001 From: Vasily Tarasov Date: Sat, 25 Nov 2006 11:09:22 -0800 Subject: [PATCH] mounstats NULL pointer dereference OpenVZ developers team has encountered the following problem in 2.6.19-rc6 kernel. After some seconds of running script while [[ 1 ]] do find /proc -name mountstats | xargs cat done this Oops appears: BUG: unable to handle kernel NULL pointer dereference at virtual address 00000010 printing eip: c01a6b70 *pde = 00000000 Oops: 0000 [#1] SMP Modules linked in: xt_length ipt_ttl xt_tcpmss ipt_TCPMSS iptable_mangle iptable_filter xt_multiport xt_limit ipt_tos ipt_REJECT ip_tables x_tables parport_pc lp parport sunrpc af_packet thermal processor fan button battery asus_acpi ac ohci_hcd ehci_hcd usbcore i2c_nforce2 i2c_core tg3 floppy pata_amd ide_cd cdrom sata_nv libata CPU: 1 EIP: 0060:[] Not tainted VLI EFLAGS: 00010246 (2.6.19-rc6 #2) EIP is at mountstats_open+0x70/0xf0 eax: 00000000 ebx: e6247030 ecx: e62470f8 edx: 00000000 esi: 00000000 edi: c01a6b00 ebp: c33b83c0 esp: f4105eb4 ds: 007b es: 007b ss: 0068 Process cat (pid: 6044, ti=f4105000 task=f4104a70 task.ti=f4105000) Stack: c33b83c0 c04ee940 f46a4a80 c33b83c0 e4df31b4 c01a6b00 f4105000 c0169231 e4df31b4 c33b83c0 c33b83c0 f4105f20 00000003 f4105000 c0169445 f2503cf0 f7f8c4c0 00008000 c33b83c0 00000000 00008000 c0169350 f4105f20 00008000 Call Trace: [] mountstats_open+0x0/0xf0 [] __dentry_open+0x181/0x250 [] nameidata_to_filp+0x35/0x50 [] do_filp_open+0x50/0x60 [] seq_read+0xc6/0x300 [] get_unused_fd+0x31/0xc0 [] do_sys_open+0x63/0x110 [] sys_open+0x27/0x30 [] sysenter_past_esp+0x56/0x79 ======================= Code: 45 74 8b 54 24 20 89 44 24 08 8b 42 f0 31 d2 e8 47 cb f8 ff 85 c0 89 c3 74 51 8d 80 a0 04 00 00 e8 46 06 2c 00 8b 83 48 04 00 00 <8b> 78 10 85 ff 74 03 f0 ff 07 b0 01 86 83 a0 04 00 00 f0 ff 4b EIP: [] mountstats_open+0x70/0xf0 SS:ESP 0068:f4105eb4 The problem is that task->nsproxy can be equal NULL for some time during task exit. This patch fixes the BUG. Signed-off-by: Vasily Tarasov Cc: Herbert Poetzl Cc: "Serge E. Hallyn" Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nsproxy.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index f6baecdeecd..971d1c6dfc4 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -45,8 +45,10 @@ static inline void exit_task_namespaces(struct task_struct *p) { struct nsproxy *ns = p->nsproxy; if (ns) { - put_nsproxy(ns); + task_lock(p); p->nsproxy = NULL; + task_unlock(p); + put_nsproxy(ns); } } #endif -- cgit v1.2.3 From ee3ce191e8eaa4cc15c51a28b34143b36404c4f5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 25 Nov 2006 11:09:36 -0800 Subject: [PATCH] Enforce "unsigned long flags;" when spinlocking Make it break or warn if you pass to spin_lock_irqsave() and friends something different from "unsigned long flags;". Suprisingly large amount of these was caught by recent commit c53421b18f205c5f97c604ae55c6a921f034b0f6 and others. Idea is largely from FRV typechecking. Suggestions from Andrew Morton. All stupid typos in first version fixed. Passes allmodconfig on i386, x86_64, alpha, arm as well as my usual config. Note #1: checking with sparse is still needed, because a driver can save and pass around flags or something. So far patch is very intrusive. Note #2: techically, we should break only if sizeof(flags) < sizeof(unsigned long), however, the more pain for getting suspicious code into kernel, the better. Signed-off-by: Alexey Dobriyan Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/irqflags.h | 37 ++++++++++++++++++++++++++++----- include/linux/spinlock.h | 53 ++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 76 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 412e025bc5c..4fe740bf4ea 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -11,6 +11,12 @@ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H +#define BUILD_CHECK_IRQ_FLAGS(flags) \ + do { \ + BUILD_BUG_ON(sizeof(flags) != sizeof(unsigned long)); \ + typecheck(unsigned long, flags); \ + } while (0) + #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); @@ -50,10 +56,15 @@ #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) #define local_irq_save(flags) \ - do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + raw_local_irq_save(flags); \ + trace_hardirqs_off(); \ + } while (0) #define local_irq_restore(flags) \ do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ @@ -69,8 +80,16 @@ */ # define raw_local_irq_disable() local_irq_disable() # define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) local_irq_save(flags) -# define raw_local_irq_restore(flags) local_irq_restore(flags) +# define raw_local_irq_save(flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + local_irq_save(flags); \ + } while (0) +# define raw_local_irq_restore(flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + local_irq_restore(flags); \ + } while (0) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -80,7 +99,11 @@ raw_safe_halt(); \ } while (0) -#define local_save_flags(flags) raw_local_save_flags(flags) +#define local_save_flags(flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + raw_local_save_flags(flags); \ + } while (0) #define irqs_disabled() \ ({ \ @@ -90,7 +113,11 @@ raw_irqs_disabled_flags(flags); \ }) -#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) +#define irqs_disabled_flags(flags) \ +({ \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + raw_irqs_disabled_flags(flags); \ +}) #endif /* CONFIG_X86 */ #endif diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index b800d2d68b3..54ad37089c4 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -52,6 +52,7 @@ #include #include #include +#include #include @@ -183,13 +184,37 @@ do { \ #define read_lock(lock) _read_lock(lock) #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock) -#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock) -#define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock) +#define spin_lock_irqsave(lock, flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + flags = _spin_lock_irqsave(lock); \ + } while (0) +#define read_lock_irqsave(lock, flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + flags = _read_lock_irqsave(lock); \ + } while (0) +#define write_lock_irqsave(lock, flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + flags = _write_lock_irqsave(lock); \ + } while (0) #else -#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags) -#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags) -#define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags) +#define spin_lock_irqsave(lock, flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + _spin_lock_irqsave(lock, flags); \ + } while (0) +#define read_lock_irqsave(lock, flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + _read_lock_irqsave(lock, flags); \ + } while (0) +#define write_lock_irqsave(lock, flags) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + _write_lock_irqsave(lock, flags); \ + } while (0) #endif #define spin_lock_irq(lock) _spin_lock_irq(lock) @@ -225,15 +250,24 @@ do { \ #endif #define spin_unlock_irqrestore(lock, flags) \ - _spin_unlock_irqrestore(lock, flags) + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + _spin_unlock_irqrestore(lock, flags); \ + } while (0) #define spin_unlock_bh(lock) _spin_unlock_bh(lock) #define read_unlock_irqrestore(lock, flags) \ - _read_unlock_irqrestore(lock, flags) + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + _read_unlock_irqrestore(lock, flags); \ + } while (0) #define read_unlock_bh(lock) _read_unlock_bh(lock) #define write_unlock_irqrestore(lock, flags) \ - _write_unlock_irqrestore(lock, flags) + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + _write_unlock_irqrestore(lock, flags); \ + } while (0) #define write_unlock_bh(lock) _write_unlock_bh(lock) #define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock)) @@ -247,6 +281,7 @@ do { \ #define spin_trylock_irqsave(lock, flags) \ ({ \ + BUILD_CHECK_IRQ_FLAGS(flags); \ local_irq_save(flags); \ spin_trylock(lock) ? \ 1 : ({ local_irq_restore(flags); 0; }); \ -- cgit v1.2.3 From cfd3ef2346f924d6c0e82236c20fdb3a8840136a Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 25 Nov 2006 11:09:37 -0800 Subject: [PATCH] lockdep: spin_lock_irqsave_nested() Introduce spin_lock_irqsave_nested(); implementation from: http://lkml.org/lkml/2006/6/1/122 Patch from: http://lkml.org/lkml/2006/9/13/258 [akpm@osdl.org: two compile fixes] Signed-off-by: Arjan van de Ven Signed-off-by: Jiri Kosina Signed-off-by: Peter Zijlstra Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/spinlock.h | 18 ++++++++++++++++++ include/linux/spinlock_api_smp.h | 2 ++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 54ad37089c4..57f670d78f7 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -199,6 +199,21 @@ do { \ BUILD_CHECK_IRQ_FLAGS(flags); \ flags = _write_lock_irqsave(lock); \ } while (0) + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + flags = _spin_lock_irqsave_nested(lock, subclass); \ + } while (0) +#else +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + do { \ + BUILD_CHECK_IRQ_FLAGS(flags); \ + flags = _spin_lock_irqsave(lock); \ + } while (0) +#endif + #else #define spin_lock_irqsave(lock, flags) \ do { \ @@ -215,6 +230,9 @@ do { \ BUILD_CHECK_IRQ_FLAGS(flags); \ _write_lock_irqsave(lock, flags); \ } while (0) +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + spin_lock_irqsave(lock, flags) + #endif #define spin_lock_irq(lock) _spin_lock_irq(lock) diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 8828b8155e9..8a2307ce729 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -32,6 +32,8 @@ void __lockfunc _read_lock_irq(rwlock_t *lock) __acquires(lock); void __lockfunc _write_lock_irq(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock) __acquires(lock); +unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass) + __acquires(lock); unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock) -- cgit v1.2.3 From b8e6ec865fd1d8838b6ce9516977b65e9f08f876 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Nov 2006 16:27:17 -0800 Subject: Revert "[PATCH] Enforce "unsigned long flags;" when spinlocking" This reverts commit ee3ce191e8eaa4cc15c51a28b34143b36404c4f5, since it broke on at least ARM, MIPS and PA-RISC due to complicated header file dependencies. Conflicts in include/linux/spinlock.h (due to the "nested" variety fixes) fixed up by hand. Cc: Alexey Dobriyan Cc: Ralf Baechle Cc: Kyle McMartin Cc: Russell King Signed-off-by: Linus Torvalds --- include/linux/irqflags.h | 37 ++++---------------------- include/linux/spinlock.h | 69 +++++++++++------------------------------------- 2 files changed, 20 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 4fe740bf4ea..412e025bc5c 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -11,12 +11,6 @@ #ifndef _LINUX_TRACE_IRQFLAGS_H #define _LINUX_TRACE_IRQFLAGS_H -#define BUILD_CHECK_IRQ_FLAGS(flags) \ - do { \ - BUILD_BUG_ON(sizeof(flags) != sizeof(unsigned long)); \ - typecheck(unsigned long, flags); \ - } while (0) - #ifdef CONFIG_TRACE_IRQFLAGS extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); @@ -56,15 +50,10 @@ #define local_irq_disable() \ do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) #define local_irq_save(flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - raw_local_irq_save(flags); \ - trace_hardirqs_off(); \ - } while (0) + do { raw_local_irq_save(flags); trace_hardirqs_off(); } while (0) #define local_irq_restore(flags) \ do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ if (raw_irqs_disabled_flags(flags)) { \ raw_local_irq_restore(flags); \ trace_hardirqs_off(); \ @@ -80,16 +69,8 @@ */ # define raw_local_irq_disable() local_irq_disable() # define raw_local_irq_enable() local_irq_enable() -# define raw_local_irq_save(flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - local_irq_save(flags); \ - } while (0) -# define raw_local_irq_restore(flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - local_irq_restore(flags); \ - } while (0) +# define raw_local_irq_save(flags) local_irq_save(flags) +# define raw_local_irq_restore(flags) local_irq_restore(flags) #endif /* CONFIG_TRACE_IRQFLAGS_SUPPORT */ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT @@ -99,11 +80,7 @@ raw_safe_halt(); \ } while (0) -#define local_save_flags(flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - raw_local_save_flags(flags); \ - } while (0) +#define local_save_flags(flags) raw_local_save_flags(flags) #define irqs_disabled() \ ({ \ @@ -113,11 +90,7 @@ raw_irqs_disabled_flags(flags); \ }) -#define irqs_disabled_flags(flags) \ -({ \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - raw_irqs_disabled_flags(flags); \ -}) +#define irqs_disabled_flags(flags) raw_irqs_disabled_flags(flags) #endif /* CONFIG_X86 */ #endif diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 57f670d78f7..8451052ca66 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -52,7 +52,6 @@ #include #include #include -#include #include @@ -184,52 +183,24 @@ do { \ #define read_lock(lock) _read_lock(lock) #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -#define spin_lock_irqsave(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - flags = _spin_lock_irqsave(lock); \ - } while (0) -#define read_lock_irqsave(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - flags = _read_lock_irqsave(lock); \ - } while (0) -#define write_lock_irqsave(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - flags = _write_lock_irqsave(lock); \ - } while (0) + +#define spin_lock_irqsave(lock, flags) flags = _spin_lock_irqsave(lock) +#define read_lock_irqsave(lock, flags) flags = _read_lock_irqsave(lock) +#define write_lock_irqsave(lock, flags) flags = _write_lock_irqsave(lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC -#define spin_lock_irqsave_nested(lock, flags, subclass) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - flags = _spin_lock_irqsave_nested(lock, subclass); \ - } while (0) +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + flags = _spin_lock_irqsave_nested(lock, subclass) #else -#define spin_lock_irqsave_nested(lock, flags, subclass) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - flags = _spin_lock_irqsave(lock); \ - } while (0) +#define spin_lock_irqsave_nested(lock, flags, subclass) \ + flags = _spin_lock_irqsave(lock) #endif #else -#define spin_lock_irqsave(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - _spin_lock_irqsave(lock, flags); \ - } while (0) -#define read_lock_irqsave(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - _read_lock_irqsave(lock, flags); \ - } while (0) -#define write_lock_irqsave(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - _write_lock_irqsave(lock, flags); \ - } while (0) + +#define spin_lock_irqsave(lock, flags) _spin_lock_irqsave(lock, flags) +#define read_lock_irqsave(lock, flags) _read_lock_irqsave(lock, flags) +#define write_lock_irqsave(lock, flags) _write_lock_irqsave(lock, flags) #define spin_lock_irqsave_nested(lock, flags, subclass) \ spin_lock_irqsave(lock, flags) @@ -268,24 +239,15 @@ do { \ #endif #define spin_unlock_irqrestore(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - _spin_unlock_irqrestore(lock, flags); \ - } while (0) + _spin_unlock_irqrestore(lock, flags) #define spin_unlock_bh(lock) _spin_unlock_bh(lock) #define read_unlock_irqrestore(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - _read_unlock_irqrestore(lock, flags); \ - } while (0) + _read_unlock_irqrestore(lock, flags) #define read_unlock_bh(lock) _read_unlock_bh(lock) #define write_unlock_irqrestore(lock, flags) \ - do { \ - BUILD_CHECK_IRQ_FLAGS(flags); \ - _write_unlock_irqrestore(lock, flags); \ - } while (0) + _write_unlock_irqrestore(lock, flags) #define write_unlock_bh(lock) _write_unlock_bh(lock) #define spin_trylock_bh(lock) __cond_lock(lock, _spin_trylock_bh(lock)) @@ -299,7 +261,6 @@ do { \ #define spin_trylock_irqsave(lock, flags) \ ({ \ - BUILD_CHECK_IRQ_FLAGS(flags); \ local_irq_save(flags); \ spin_trylock(lock) ? \ 1 : ({ local_irq_restore(flags); 0; }); \ -- cgit v1.2.3 From 2ea5814472c3c910aed5c5b60f1f3b1000e353f1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 26 Nov 2006 19:05:22 -0800 Subject: Fix 'ALIGN()' macro, take 2 You wouldn't think that doing an ALIGN() macro that aligns something up to a power-of-two boundary would be likely to have bugs, would you? But hey, in the wonderful world of mixing integer types, you have to be careful. This just makes sure that the alignment is interpreted in the same type as the thing to be aligned. Thanks to Roland Dreier, who noticed that the amso1100 driver got broken by the previous fix (that just extended the mask to "unsigned long", but was still broken in "unsigned long long" - it just happened to be the same on 64-bit architectures). See commit 4c8bd7eeee4c8f157fb61fb64b57500990b42e0e for the history of bugs here... Acked-by: Roland Dreier Cc: Andrew Morton Cc: David Miller Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 24b611147ad..b9b5e4ba166 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -30,8 +30,10 @@ extern const char linux_banner[]; #define STACK_MAGIC 0xdeadbeef +#define ALIGN(x,a) __ALIGN_MASK(x,(typeof(x))(a)-1) +#define __ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) + #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) -- cgit v1.2.3 From e81c73596704793e73e6dbb478f41686f15a4b34 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 28 Nov 2006 20:53:39 -0800 Subject: [NET]: Fix MAX_HEADER setting. MAX_HEADER is either set to LL_MAX_HEADER or LL_MAX_HEADER + 48, and this is controlled by a set of CONFIG_* ifdef tests. It is trying to use LL_MAX_HEADER + 48 when any of the tunnels are enabled which set hard_header_len like this: dev->hard_header_len = LL_MAX_HEADER + sizeof(struct xxx); The correct set of tunnel drivers which do this are: ipip ip_gre ip6_tunnel sit so make the ifdef test match. Noticed by Patrick McHardy and with help from Herbert Xu. Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9264139bd8d..83b8c4f1d69 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -93,8 +93,10 @@ struct netpoll_info; #endif #endif -#if !defined(CONFIG_NET_IPIP) && \ - !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) +#if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \ + !defined(CONFIG_NET_IPGRE) && !defined(CONFIG_NET_IPGRE_MODULE) && \ + !defined(CONFIG_IPV6_SIT) && !defined(CONFIG_IPV6_SIT_MODULE) && \ + !defined(CONFIG_IPV6_TUNNEL) && !defined(CONFIG_IPV6_TUNNEL_MODULE) #define MAX_HEADER LL_MAX_HEADER #else #define MAX_HEADER (LL_MAX_HEADER + 48) -- cgit v1.2.3