diff options
183 files changed, 3302 insertions, 2357 deletions
@@ -92,6 +92,7 @@ Rudolf Marek <R.Marek@sh.cvut.cz> Rui Saraiva <rmps@joel.ist.utl.pt> Sachin P Sant <ssant@in.ibm.com> Sam Ravnborg <sam@mars.ravnborg.org> +Sascha Hauer <s.hauer@pengutronix.de> S.Çağlar Onur <caglar@pardus.org.tr> Simon Kelley <simon@thekelleys.org.uk> Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr> @@ -100,6 +101,7 @@ Tejun Heo <htejun@gmail.com> Thomas Graf <tgraf@suug.ch> Tony Luck <tony.luck@intel.com> Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com> -Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com> Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de> +Uwe Kleine-König <ukl@pengutronix.de> +Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com> Valdis Kletnieks <Valdis.Kletnieks@vt.edu> diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt index e3443ddcfb8..917918f84fc 100644 --- a/Documentation/cpu-freq/user-guide.txt +++ b/Documentation/cpu-freq/user-guide.txt @@ -195,19 +195,3 @@ scaling_setspeed. By "echoing" a new frequency into this you can change the speed of the CPU, but only within the limits of scaling_min_freq and scaling_max_freq. - - -3.2 Deprecated Interfaces -------------------------- - -Depending on your kernel configuration, you might find the following -cpufreq-related files: -/proc/cpufreq -/proc/sys/cpu/*/speed -/proc/sys/cpu/*/speed-min -/proc/sys/cpu/*/speed-max - -These are files for deprecated interfaces to cpufreq, which offer far -less functionality. Because of this, these interfaces aren't described -here. - diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 68ef48839c0..9f8740ca3f3 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -9,6 +9,7 @@ that support it. For example, a given bus might look like this: | |-- class | |-- config | |-- device + | |-- enable | |-- irq | |-- local_cpus | |-- resource @@ -32,6 +33,7 @@ files, each with their own function. class PCI class (ascii, ro) config PCI config space (binary, rw) device PCI device (ascii, ro) + enable Whether the device is enabled (ascii, rw) irq IRQ number (ascii, ro) local_cpus nearby CPU mask (cpumask, ro) resource PCI resource host addresses (ascii, ro) @@ -57,10 +59,19 @@ used to do actual device programming from userspace. Note that some platforms don't support mmapping of certain resources, so be sure to check the return value from any attempted mmap. +The 'enable' file provides a counter that indicates how many times the device +has been enabled. If the 'enable' file currently returns '4', and a '1' is +echoed into it, it will then return '5'. Echoing a '0' into it will decrease +the count. Even when it returns to 0, though, some of the initialisation +may not be reversed. + The 'rom' file is special in that it provides read-only access to the device's ROM file, if available. It's disabled by default, however, so applications should write the string "1" to the file to enable it before attempting a read -call, and disable it following the access by writing "0" to the file. +call, and disable it following the access by writing "0" to the file. Note +that the device must be enabled for a rom read to return data succesfully. +In the event a driver is not bound to the device, it can be enabled using the +'enable' file, documented above. Accessing legacy resources through sysfs ---------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 829a697f123..4486ac1ccac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1021,6 +1021,14 @@ M: mb@bu3sch.de W: http://bu3sch.de/btgpio.php S: Maintained +BTRFS FILE SYSTEM +P: Chris Mason +M: chris.mason@oracle.com +L: linux-btrfs@vger.kernel.org +W: http://btrfs.wiki.kernel.org/ +T: git kernel.org:/pub/scm/linux/kernel/git/mason/btrfs-unstable.git +S: Maintained + BTTV VIDEO4LINUX DRIVER P: Mauro Carvalho Chehab M: mchehab@infradead.org @@ -2212,7 +2220,7 @@ P: Sean Hefty M: sean.hefty@intel.com P: Hal Rosenstock M: hal.rosenstock@gmail.com -L: general@lists.openfabrics.org +L: general@lists.openfabrics.org (moderated for non-subscribers) W: http://www.openib.org/ T: git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git S: Supported @@ -4847,6 +4855,7 @@ P: Ingo Molnar M: mingo@redhat.com P: H. Peter Anvin M: hpa@zytor.com +M: x86@kernel.org L: linux-kernel@vger.kernel.org T: git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git S: Maintained @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 29 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Erotic Pickled Herring # *DOCUMENTATION* diff --git a/arch/alpha/include/asm/bug.h b/arch/alpha/include/asm/bug.h index 7b85b7c9370..1720c8ad86f 100644 --- a/arch/alpha/include/asm/bug.h +++ b/arch/alpha/include/asm/bug.h @@ -8,12 +8,12 @@ /* ??? Would be nice to use .gprel32 here, but we can't be sure that the function loaded the GP, so this could fail in modules. */ -#define BUG() { \ +#define BUG() do { \ __asm__ __volatile__( \ "call_pal %0 # bugchk\n\t" \ ".long %1\n\t.8byte %2" \ : : "i"(PAL_bugchk), "i"(__LINE__), "i"(__FILE__)); \ - for ( ; ; ); } + for ( ; ; ); } while (0) #define HAVE_ARCH_BUG #endif diff --git a/arch/ia64/sn/kernel/io_acpi_init.c b/arch/ia64/sn/kernel/io_acpi_init.c index c5a214026a7..d0223abbbbd 100644 --- a/arch/ia64/sn/kernel/io_acpi_init.c +++ b/arch/ia64/sn/kernel/io_acpi_init.c @@ -443,7 +443,7 @@ sn_acpi_slot_fixup(struct pci_dev *dev) size = pci_resource_len(dev, PCI_ROM_RESOURCE); addr = ioremap(pcidev_info->pdi_pio_mapped_addr[PCI_ROM_RESOURCE], size); - image_size = pci_get_rom_size(addr, size); + image_size = pci_get_rom_size(dev, addr, size); dev->resource[PCI_ROM_RESOURCE].start = (unsigned long) addr; dev->resource[PCI_ROM_RESOURCE].end = (unsigned long) addr + image_size - 1; diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 4e1801bad83..e2eb2da60f9 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -269,7 +269,7 @@ sn_io_slot_fixup(struct pci_dev *dev) rom = ioremap(pci_resource_start(dev, PCI_ROM_RESOURCE), size + 1); - image_size = pci_get_rom_size(rom, size + 1); + image_size = pci_get_rom_size(dev, rom, size + 1); dev->resource[PCI_ROM_RESOURCE].end = dev->resource[PCI_ROM_RESOURCE].start + image_size - 1; diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c index caf4c33f4e8..7c35787d29b 100644 --- a/arch/sh/boards/board-ap325rxa.c +++ b/arch/sh/boards/board-ap325rxa.c @@ -22,6 +22,7 @@ #include <linux/gpio.h> #include <linux/spi/spi.h> #include <linux/spi/spi_gpio.h> +#include <media/ov772x.h> #include <media/soc_camera_platform.h> #include <media/sh_mobile_ceu.h> #include <video/sh_mobile_lcdc.h> @@ -216,7 +217,14 @@ static struct platform_device lcdc_device = { }, }; +static void camera_power(int val) +{ + gpio_set_value(GPIO_PTZ5, val); /* RST_CAM/RSTB */ + mdelay(10); +} + #ifdef CONFIG_I2C +/* support for the old ncm03j camera */ static unsigned char camera_ncm03j_magic[] = { 0x87, 0x00, 0x88, 0x08, 0x89, 0x01, 0x8A, 0xE8, @@ -237,6 +245,23 @@ static unsigned char camera_ncm03j_magic[] = 0x63, 0xD4, 0x64, 0xEA, 0xD6, 0x0F, }; +static int camera_probe(void) +{ + struct i2c_adapter *a = i2c_get_adapter(0); + struct i2c_msg msg; + int ret; + + camera_power(1); + msg.addr = 0x6e; + msg.buf = camera_ncm03j_magic; + msg.len = 2; + msg.flags = 0; + ret = i2c_transfer(a, &msg, 1); + camera_power(0); + + return ret; +} + static int camera_set_capture(struct soc_camera_platform_info *info, int enable) { @@ -245,9 +270,11 @@ static int camera_set_capture(struct soc_camera_platform_info *info, int ret = 0; int i; + camera_power(0); if (!enable) return 0; /* no disable for now */ + camera_power(1); for (i = 0; i < ARRAY_SIZE(camera_ncm03j_magic); i += 2) { u_int8_t buf[8]; @@ -286,8 +313,35 @@ static struct platform_device camera_device = { .platform_data = &camera_info, }, }; + +static int __init camera_setup(void) +{ + if (camera_probe() > 0) + platform_device_register(&camera_device); + + return 0; +} +late_initcall(camera_setup); + #endif /* CONFIG_I2C */ +static int ov7725_power(struct device *dev, int mode) +{ + camera_power(0); + if (mode) + camera_power(1); + + return 0; +} + +static struct ov772x_camera_info ov7725_info = { + .buswidth = SOCAM_DATAWIDTH_8, + .flags = OV772X_FLAG_VFLIP | OV772X_FLAG_HFLIP, + .link = { + .power = ov7725_power, + }, +}; + static struct sh_mobile_ceu_info sh_mobile_ceu_info = { .flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_MASTER | SOCAM_DATAWIDTH_8, @@ -338,9 +392,6 @@ static struct platform_device *ap325rxa_devices[] __initdata = { &ap325rxa_nor_flash_device, &lcdc_device, &ceu_device, -#ifdef CONFIG_I2C - &camera_device, -#endif &nand_flash_device, &sdcard_cn3_device, }; @@ -349,6 +400,10 @@ static struct i2c_board_info __initdata ap325rxa_i2c_devices[] = { { I2C_BOARD_INFO("pcf8563", 0x51), }, + { + I2C_BOARD_INFO("ov772x", 0x21), + .platform_data = &ov7725_info, + }, }; static struct spi_board_info ap325rxa_spi_devices[] = { @@ -426,7 +481,7 @@ static int __init ap325rxa_devices_setup(void) gpio_request(GPIO_PTZ6, NULL); gpio_direction_output(GPIO_PTZ6, 0); /* STBY_CAM */ gpio_request(GPIO_PTZ5, NULL); - gpio_direction_output(GPIO_PTZ5, 1); /* RST_CAM */ + gpio_direction_output(GPIO_PTZ5, 0); /* RST_CAM */ gpio_request(GPIO_PTZ4, NULL); gpio_direction_output(GPIO_PTZ4, 0); /* SADDR */ diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 5c423fa8e6b..352f87d50fd 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28 -# Fri Jan 9 16:54:19 2009 +# Linux kernel version: 2.6.29-rc2 +# Tue Jan 27 11:45:08 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -45,12 +45,12 @@ CONFIG_BSD_PROCESS_ACCT=y # CONFIG_AUDIT is not set # CONFIG_IKCONFIG is not set CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set CONFIG_GROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set CONFIG_USER_SCHED=y # CONFIG_CGROUP_SCHED is not set +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -378,6 +378,7 @@ CONFIG_WIRELESS=y # CONFIG_WIRELESS_EXT is not set # CONFIG_LIB80211 is not set # CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -400,6 +401,7 @@ CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y # CONFIG_MTD_AR7_PARTS is not set @@ -447,9 +449,7 @@ CONFIG_MTD_CFI_UTIL=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_PHYSMAP_START=0xffffffff -CONFIG_MTD_PHYSMAP_LEN=0 -CONFIG_MTD_PHYSMAP_BANKWIDTH=0 +# CONFIG_MTD_PHYSMAP_COMPAT is not set # CONFIG_MTD_PLATRAM is not set # @@ -480,6 +480,12 @@ CONFIG_MTD_NAND_SH_FLCTL=y # CONFIG_MTD_ONENAND is not set # +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set + +# # UBI - Unsorted block images # CONFIG_MTD_UBI=y @@ -607,6 +613,10 @@ CONFIG_SMSC911X=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -790,6 +800,7 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set # @@ -837,7 +848,7 @@ CONFIG_SOC_CAMERA=y # CONFIG_SOC_CAMERA_MT9V022 is not set # CONFIG_SOC_CAMERA_TW9910 is not set CONFIG_SOC_CAMERA_PLATFORM=y -# CONFIG_SOC_CAMERA_OV772X is not set +CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set # CONFIG_DAB is not set @@ -1012,6 +1023,7 @@ CONFIG_FS_POSIX_ACL=y CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y @@ -1060,6 +1072,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_JFFS2_FS is not set # CONFIG_UBIFS_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set diff --git a/arch/sh/configs/migor_defconfig b/arch/sh/configs/migor_defconfig index 7758263514b..678576796bd 100644 --- a/arch/sh/configs/migor_defconfig +++ b/arch/sh/configs/migor_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.28 -# Fri Jan 9 17:09:35 2009 +# Linux kernel version: 2.6.29-rc1 +# Thu Jan 22 09:16:16 2009 # CONFIG_SUPERH=y CONFIG_SUPERH32=y @@ -45,8 +45,12 @@ CONFIG_SYSVIPC_SYSCTL=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_CGROUPS is not set # CONFIG_GROUP_SCHED is not set + +# +# Control Group support +# +# CONFIG_CGROUPS is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -389,6 +393,7 @@ CONFIG_WIRELESS_EXT=y CONFIG_WIRELESS_EXT_SYSFS=y # CONFIG_LIB80211 is not set # CONFIG_MAC80211 is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -411,6 +416,7 @@ CONFIG_MTD=y # CONFIG_MTD_DEBUG is not set CONFIG_MTD_CONCAT=y CONFIG_MTD_PARTITIONS=y +# CONFIG_MTD_TESTS is not set # CONFIG_MTD_REDBOOT_PARTS is not set CONFIG_MTD_CMDLINE_PARTS=y # CONFIG_MTD_AR7_PARTS is not set @@ -458,9 +464,7 @@ CONFIG_MTD_CFI_UTIL=y # # CONFIG_MTD_COMPLEX_MAPPINGS is not set CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_PHYSMAP_START=0xffffffff -CONFIG_MTD_PHYSMAP_LEN=0 -CONFIG_MTD_PHYSMAP_BANKWIDTH=0 +# CONFIG_MTD_PHYSMAP_COMPAT is not set # CONFIG_MTD_PLATRAM is not set # @@ -488,6 +492,12 @@ CONFIG_MTD_NAND_PLATFORM=y # CONFIG_MTD_ONENAND is not set # +# LPDDR flash memory drivers +# +# CONFIG_MTD_LPDDR is not set +# CONFIG_MTD_QINFO_PROBE is not set + +# # UBI - Unsorted block images # # CONFIG_MTD_UBI is not set @@ -587,6 +597,10 @@ CONFIG_SMC91X=y # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set # CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# # CONFIG_WAN is not set # CONFIG_PPP is not set # CONFIG_SLIP is not set @@ -761,6 +775,7 @@ CONFIG_SSB_POSSIBLE=y # CONFIG_PMIC_DA903X is not set # CONFIG_MFD_WM8400 is not set # CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_PCF50633 is not set # CONFIG_REGULATOR is not set # @@ -806,9 +821,9 @@ CONFIG_SOC_CAMERA=y # CONFIG_SOC_CAMERA_MT9M111 is not set # CONFIG_SOC_CAMERA_MT9T031 is not set # CONFIG_SOC_CAMERA_MT9V022 is not set -# CONFIG_SOC_CAMERA_TW9910 is not set -CONFIG_SOC_CAMERA_PLATFORM=y -# CONFIG_SOC_CAMERA_OV772X is not set +CONFIG_SOC_CAMERA_TW9910=y +# CONFIG_SOC_CAMERA_PLATFORM is not set +CONFIG_SOC_CAMERA_OV772X=y CONFIG_VIDEO_SH_MOBILE_CEU=y # CONFIG_RADIO_ADAPTERS is not set # CONFIG_DAB is not set @@ -866,11 +881,13 @@ CONFIG_USB_GADGET_SELECTED=y # CONFIG_USB_GADGET_PXA25X is not set # CONFIG_USB_GADGET_PXA27X is not set # CONFIG_USB_GADGET_S3C2410 is not set +# CONFIG_USB_GADGET_IMX is not set CONFIG_USB_GADGET_M66592=y CONFIG_USB_M66592=y CONFIG_SUPERH_BUILT_IN_M66592=y # CONFIG_USB_GADGET_AMD5536UDC is not set # CONFIG_USB_GADGET_FSL_QE is not set +# CONFIG_USB_GADGET_CI13XXX is not set # CONFIG_USB_GADGET_NET2280 is not set # CONFIG_USB_GADGET_GOKU is not set # CONFIG_USB_GADGET_DUMMY_HCD is not set @@ -883,6 +900,11 @@ CONFIG_USB_G_SERIAL=y # CONFIG_USB_MIDI_GADGET is not set # CONFIG_USB_G_PRINTER is not set # CONFIG_USB_CDC_COMPOSITE is not set + +# +# OTG and related infrastructure +# +# CONFIG_USB_GPIO_VBUS is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -961,6 +983,7 @@ CONFIG_UIO_PDRV_GENIRQ=y CONFIG_FILE_LOCKING=y # CONFIG_XFS_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY is not set # CONFIG_QUOTA is not set @@ -1004,6 +1027,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_EFS_FS is not set # CONFIG_JFFS2_FS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set diff --git a/arch/sh/include/asm/mutex-llsc.h b/arch/sh/include/asm/mutex-llsc.h index ee839ee58ac..090358a7e1b 100644 --- a/arch/sh/include/asm/mutex-llsc.h +++ b/arch/sh/include/asm/mutex-llsc.h @@ -21,38 +21,36 @@ static inline void __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - int __ex_flag, __res; + int __done, __res; __asm__ __volatile__ ( "movli.l @%2, %0 \n" "add #-1, %0 \n" "movco.l %0, @%2 \n" "movt %1 \n" - : "=&z" (__res), "=&r" (__ex_flag) + : "=&z" (__res), "=&r" (__done) : "r" (&(count)->counter) : "t"); - __res |= !__ex_flag; - if (unlikely(__res != 0)) + if (unlikely(!__done || __res != 0)) fail_fn(count); } static inline int __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) { - int __ex_flag, __res; + int __done, __res; __asm__ __volatile__ ( "movli.l @%2, %0 \n" "add #-1, %0 \n" "movco.l %0, @%2 \n" "movt %1 \n" - : "=&z" (__res), "=&r" (__ex_flag) + : "=&z" (__res), "=&r" (__done) : "r" (&(count)->counter) : "t"); - __res |= !__ex_flag; - if (unlikely(__res != 0)) + if (unlikely(!__done || __res != 0)) __res = fail_fn(count); return __res; @@ -61,19 +59,18 @@ __mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *)) static inline void __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *)) { - int __ex_flag, __res; + int __done, __res; __asm__ __volatile__ ( "movli.l @%2, %0 \n\t" "add #1, %0 \n\t" "movco.l %0, @%2 \n\t" "movt %1 \n\t" - : "=&z" (__res), "=&r" (__ex_flag) + : "=&z" (__res), "=&r" (__done) : "r" (&(count)->counter) : "t"); - __res |= !__ex_flag; - if (unlikely(__res <= 0)) + if (unlikely(!__done || __res <= 0)) fail_fn(count); } diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h index 05a868a71ef..5bc34681d99 100644 --- a/arch/sh/include/asm/syscall_32.h +++ b/arch/sh/include/asm/syscall_32.h @@ -21,23 +21,10 @@ static inline void syscall_rollback(struct task_struct *task, */ } -static inline bool syscall_has_error(struct pt_regs *regs) -{ - return (regs->sr & 0x1) ? true : false; -} -static inline void syscall_set_error(struct pt_regs *regs) -{ - regs->sr |= 0x1; -} -static inline void syscall_clear_error(struct pt_regs *regs) -{ - regs->sr &= ~0x1; -} - static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return syscall_has_error(regs) ? regs->regs[0] : 0; + return IS_ERR_VALUE(regs->regs[0]) ? regs->regs[0] : 0; } static inline long syscall_get_return_value(struct task_struct *task, @@ -50,13 +37,10 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - if (error) { - syscall_set_error(regs); + if (error) regs->regs[0] = -error; - } else { - syscall_clear_error(regs); + else regs->regs[0] = val; - } } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h index e1143b9784d..c3561ca72be 100644 --- a/arch/sh/include/asm/syscall_64.h +++ b/arch/sh/include/asm/syscall_64.h @@ -21,23 +21,10 @@ static inline void syscall_rollback(struct task_struct *task, */ } -static inline bool syscall_has_error(struct pt_regs *regs) -{ - return (regs->sr & 0x1) ? true : false; -} -static inline void syscall_set_error(struct pt_regs *regs) -{ - regs->sr |= 0x1; -} -static inline void syscall_clear_error(struct pt_regs *regs) -{ - regs->sr &= ~0x1; -} - static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return syscall_has_error(regs) ? regs->regs[9] : 0; + return IS_ERR_VALUE(regs->regs[9]) ? regs->regs[9] : 0; } static inline long syscall_get_return_value(struct task_struct *task, @@ -50,13 +37,10 @@ static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { - if (error) { - syscall_set_error(regs); + if (error) regs->regs[9] = -error; - } else { - syscall_clear_error(regs); + else regs->regs[9] = val; - } } static inline void syscall_get_arguments(struct task_struct *task, diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index 2780917c008..e3ea5411da6 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -423,7 +423,7 @@ static int ieee_fpe_handler(struct pt_regs *regs) int m; unsigned int hx; - m = (finsn >> 9) & 0x7; + m = (finsn >> 8) & 0x7; hx = tsk->thread.fpu.hard.fp_regs[m]; if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR) diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 53424750857..370d2cfa34e 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -262,11 +262,11 @@ void __init setup_bootmem_allocator(unsigned long free_pfn) BOOTMEM_DEFAULT); /* - * reserve physical page 0 - it's a special BIOS page on many boxes, - * enabling clean reboots, SMP operation, laptop functions. + * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. */ - reserve_bootmem(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET, - BOOTMEM_DEFAULT); + if (CONFIG_ZERO_PAGE_OFFSET != 0) + reserve_bootmem(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET, + BOOTMEM_DEFAULT); sparse_memory_present_with_active_regions(0); diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 77c21bde376..17784e19ae3 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -510,7 +510,6 @@ handle_syscall_restart(unsigned long save_r0, struct pt_regs *regs, case -ERESTARTNOHAND: no_system_call_restart: regs->regs[0] = -EINTR; - regs->sr |= 1; break; case -ERESTARTSYS: @@ -589,8 +588,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - if (regs->sr & 1) - handle_syscall_restart(save_r0, regs, &ka.sa); + handle_syscall_restart(save_r0, regs, &ka.sa); /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &ka, &info, oldset, diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index b22fdfaaa19..0663a0ee602 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -60,7 +60,6 @@ handle_syscall_restart(struct pt_regs *regs, struct sigaction *sa) case -ERESTARTNOHAND: no_system_call_restart: regs->regs[REG_RET] = -EINTR; - regs->sr |= 1; break; case -ERESTARTSYS: @@ -109,8 +108,7 @@ static int do_signal(struct pt_regs *regs, sigset_t *oldset) signr = get_signal_to_deliver(&info, &ka, regs, 0); if (signr > 0) { - if (regs->sr & 1) - handle_syscall_restart(regs, &ka.sa); + handle_syscall_restart(regs, &ka.sa); /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { diff --git a/arch/sh/lib/checksum.S b/arch/sh/lib/checksum.S index cbdd0d40e54..356c8ec9289 100644 --- a/arch/sh/lib/checksum.S +++ b/arch/sh/lib/checksum.S @@ -36,8 +36,7 @@ */ /* - * unsigned int csum_partial(const unsigned char *buf, int len, - * unsigned int sum); + * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); */ .text @@ -49,11 +48,31 @@ ENTRY(csum_partial) * Fortunately, it is easy to convert 2-byte alignment to 4-byte * alignment for the unrolled loop. */ - mov r5, r1 mov r4, r0 - tst #2, r0 ! Check alignment. - bt 2f ! Jump if alignment is ok. + tst #3, r0 ! Check alignment. + bt/s 2f ! Jump if alignment is ok. + mov r4, r7 ! Keep a copy to check for alignment ! + tst #1, r0 ! Check alignment. + bt 21f ! Jump if alignment is boundary of 2bytes. + + ! buf is odd + tst r5, r5 + add #-1, r5 + bt 9f + mov.b @r4+, r0 + extu.b r0, r0 + addc r0, r6 ! t=0 from previous tst + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 + mov r4, r0 + tst #2, r0 + bt 2f +21: + ! buf is 2 byte aligned (len could be 0) add #-2, r5 ! Alignment uses up two bytes. cmp/pz r5 ! bt/s 1f ! Jump if we had at least two bytes. @@ -61,16 +80,17 @@ ENTRY(csum_partial) bra 6f add #2, r5 ! r5 was < 2. Deal with it. 1: - mov r5, r1 ! Save new len for later use. mov.w @r4+, r0 extu.w r0, r0 addc r0, r6 bf 2f add #1, r6 2: + ! buf is 4 byte aligned (len could be 0) + mov r5, r1 mov #-5, r0 - shld r0, r5 - tst r5, r5 + shld r0, r1 + tst r1, r1 bt/s 4f ! if it's =0, go to 4f clrt .align 2 @@ -92,30 +112,31 @@ ENTRY(csum_partial) addc r0, r6 addc r2, r6 movt r0 - dt r5 + dt r1 bf/s 3b cmp/eq #1, r0 - ! here, we know r5==0 - addc r5, r6 ! add carry to r6 + ! here, we know r1==0 + addc r1, r6 ! add carry to r6 4: - mov r1, r0 + mov r5, r0 and #0x1c, r0 tst r0, r0 - bt/s 6f - mov r0, r5 - shlr2 r5 + bt 6f + ! 4 bytes or more remaining + mov r0, r1 + shlr2 r1 mov #0, r2 5: addc r2, r6 mov.l @r4+, r2 movt r0 - dt r5 + dt r1 bf/s 5b cmp/eq #1, r0 addc r2, r6 - addc r5, r6 ! r5==0 here, so it means add carry-bit + addc r1, r6 ! r1==0 here, so it means add carry-bit 6: - mov r1, r5 + ! 3 bytes or less remaining mov #3, r0 and r0, r5 tst r5, r5 @@ -139,8 +160,18 @@ ENTRY(csum_partial) 8: addc r0, r6 mov #0, r0 - addc r0, r6 + addc r0, r6 9: + ! Check if the buffer was misaligned, if so realign sum + mov r7, r0 + tst #1, r0 + bt 10f + mov r6, r0 + shll8 r6 + shlr16 r0 + shlr8 r0 + or r0, r6 +10: rts mov r6, r0 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 256b00b6189..5a0d76dc56a 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -418,9 +418,9 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys -ia32_do_syscall: cmpl $(IA32_NR_syscalls-1),%eax - ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + ja ia32_badsys +ia32_do_call: IA32_ARG_FIXUP call *ia32_sys_call_table(,%rax,8) # xxx: rip relative ia32_sysret: @@ -435,7 +435,9 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - jmp ia32_do_syscall + cmpl $(IA32_NR_syscalls-1),%eax + ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ + jmp ia32_do_call END(ia32_syscall) ia32_badsys: diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6f95f..a60c1f3bcb8 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -156,11 +156,11 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); + if (strncmp(str, "s4_nonvs", 8) == 0) + acpi_s4_no_nvs(); #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 4b6df2469fe..115449f869e 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1436,7 +1436,7 @@ static int __init detect_init_APIC(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || - (boot_cpu_data.x86 == 15)) + (boot_cpu_data.x86 >= 15)) break; goto no_apic; case X86_VENDOR_INTEL: diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index efae3b22a0f..65792c2cc46 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -245,17 +245,6 @@ config X86_E_POWERSAVER comment "shared options" -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - config X86_SPEEDSTEP_LIB tristate default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d77be7..da299eb85fc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -36,8 +36,11 @@ static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ + { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -85,6 +88,18 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ + { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ + { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ + { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ + { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ + { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ + { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ + { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ + { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ { 0x00, 0, 0} }; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a98779..a1346217e43 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -346,6 +346,7 @@ ENTRY(save_args) popq_cfi %rax /* move return address... */ mov %gs:pda_irqstackptr,%rsp EMPTY_FRAME 0 + pushq_cfi %rbp /* backlink for unwinder */ pushq_cfi %rax /* ... to the new stack */ /* * We entered an interrupt context - irqs are off: diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a1302536..9b0c480c383 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2528,14 +2528,15 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); + } } #else static inline void irq_complete_move(struct irq_desc **descp) {} diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e674..10a09c2f182 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -78,15 +78,6 @@ void __init init_ISA_irqs(void) } } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, [IRQ0_VECTOR] = 0, @@ -178,9 +169,6 @@ void __init native_init_IRQ(void) alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif - if (!acpi_ioapic) - setup_irq(2, &irq2); - /* setup after call gates are initialised (usually add in * the architecture specific gates) */ diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index df167f26562..a265a7c6319 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -38,6 +38,15 @@ void __init pre_intr_init_hook(void) init_ISA_irqs(); } +/* + * IRQ2 is cascade interrupt to second interrupt controller + */ +static struct irqaction irq2 = { + .handler = no_action, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + /** * intr_init_hook - post gate setup interrupt initialisation * @@ -53,6 +62,9 @@ void __init intr_init_hook(void) if (x86_quirks->arch_intr_init()) return; } + if (!acpi_ioapic) + setup_irq(2, &irq2); + } /** diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index a580b9562e7..d914a7996a6 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -33,13 +33,23 @@ void __init intr_init_hook(void) setup_irq(2, &irq2); } -void __init pre_setup_arch_hook(void) +static void voyager_disable_tsc(void) { /* Voyagers run their CPUs from independent clocks, so disable * the TSC code because we can't sync them */ setup_clear_cpu_cap(X86_FEATURE_TSC); } +void __init pre_setup_arch_hook(void) +{ + voyager_disable_tsc(); +} + +void __init pre_time_init_hook(void) +{ + voyager_disable_tsc(); +} + void __init trap_init_hook(void) { } diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 9840b7ec749..7ffcdeec463 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -81,7 +81,7 @@ static void enable_local_vic_irq(unsigned int irq); static void disable_local_vic_irq(unsigned int irq); static void before_handle_vic_irq(unsigned int irq); static void after_handle_vic_irq(unsigned int irq); -static void set_vic_irq_affinity(unsigned int irq, cpumask_t mask); +static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask); static void ack_vic_irq(unsigned int irq); static void vic_enable_cpi(void); static void do_boot_cpu(__u8 cpuid); @@ -211,8 +211,6 @@ static __u32 cpu_booted_map; static cpumask_t smp_commenced_mask = CPU_MASK_NONE; /* This is for the new dynamic CPU boot code */ -cpumask_t cpu_callin_map = CPU_MASK_NONE; -cpumask_t cpu_callout_map = CPU_MASK_NONE; /* The per processor IRQ masks (these are usually kept in sync) */ static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; @@ -378,7 +376,7 @@ void __init find_smp_config(void) cpus_addr(phys_cpu_present_map)[0] |= voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 3) << 24; - cpu_possible_map = phys_cpu_present_map; + init_cpu_possible(&phys_cpu_present_map); printk("VOYAGER SMP: phys_cpu_present_map = 0x%lx\n", cpus_addr(phys_cpu_present_map)[0]); /* Here we set up the VIC to enable SMP */ @@ -1599,16 +1597,16 @@ static void after_handle_vic_irq(unsigned int irq) * change the mask and then do an interrupt enable CPI to re-enable on * the selected processors */ -void set_vic_irq_affinity(unsigned int irq, cpumask_t mask) +void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask) { /* Only extended processors handle interrupts */ unsigned long real_mask; unsigned long irq_mask = 1 << irq; int cpu; - real_mask = cpus_addr(mask)[0] & voyager_extended_vic_processors; + real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors; - if (cpus_addr(mask)[0] == 0) + if (cpus_addr(*mask)[0] == 0) /* can't have no CPUs to accept the interrupt -- extremely * bad things will happen */ return; @@ -1750,10 +1748,11 @@ static void __cpuinit voyager_smp_prepare_boot_cpu(void) init_gdt(smp_processor_id()); switch_to_new_gdt(); - cpu_set(smp_processor_id(), cpu_online_map); - cpu_set(smp_processor_id(), cpu_callout_map); - cpu_set(smp_processor_id(), cpu_possible_map); - cpu_set(smp_processor_id(), cpu_present_map); + cpu_online_map = cpumask_of_cpu(smp_processor_id()); + cpu_callout_map = cpumask_of_cpu(smp_processor_id()); + cpu_callin_map = CPU_MASK_NONE; + cpu_present_map = cpumask_of_cpu(smp_processor_id()); + } static int __cpuinit voyager_cpu_up(unsigned int cpu) @@ -1783,9 +1782,9 @@ void __init smp_setup_processor_id(void) x86_write_percpu(cpu_number, hard_smp_processor_id()); } -static void voyager_send_call_func(cpumask_t callmask) +static void voyager_send_call_func(const struct cpumask *callmask) { - __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id()); + __u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id()); send_CPI(mask, VIC_CALL_FUNCTION_CPI); } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a4..c76ef1d701c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -603,8 +603,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) si_code = SEGV_MAPERR; - if (notify_page_fault(regs)) - return; if (unlikely(kmmio_fault(regs, address))) return; @@ -634,6 +632,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (spurious_fault(address, error_code)) return; + /* kprobes don't want to hook the spurious faults. */ + if (notify_page_fault(regs)) + return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. @@ -641,6 +642,9 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) goto bad_area_nosemaphore; } + /* kprobes don't want to hook the spurious faults. */ + if (notify_page_fault(regs)) + return; /* * It's safe to allow irq's after cr2 has been saved and the diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h index 85893824161..fa3e10725d9 100644 --- a/arch/x86/xen/multicalls.h +++ b/arch/x86/xen/multicalls.h @@ -19,8 +19,10 @@ DECLARE_PER_CPU(unsigned long, xen_mc_irq_flags); paired with xen_mc_issue() */ static inline void xen_mc_batch(void) { + unsigned long flags; /* need to disable interrupts until this entry is complete */ - local_irq_save(__get_cpu_var(xen_mc_irq_flags)); + local_irq_save(flags); + __get_cpu_var(xen_mc_irq_flags) = flags; } static inline struct multicall_space xen_mc_entry(size_t args) diff --git a/block/blktrace.c b/block/blktrace.c index c7698d1617a..ca6d32061e4 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -165,7 +165,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; struct blk_io_trace *t; - unsigned long flags; + unsigned long flags = 0; unsigned long *sequence; pid_t pid; int cpu, pc = 0; @@ -187,19 +187,15 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, cpu = raw_smp_processor_id(); if (blk_tr) { - struct trace_entry *ent; tracing_record_cmdline(current); - event = ring_buffer_lock_reserve(blk_tr->buffer, - sizeof(*t) + pdu_len, &flags); + pc = preempt_count(); + event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK, + sizeof(*t) + pdu_len, + 0, pc); if (!event) return; - - ent = ring_buffer_event_data(event); - t = (struct blk_io_trace *)ent; - pc = preempt_count(); - tracing_generic_entry_update(ent, 0, pc); - ent->type = TRACE_BLK; + t = ring_buffer_event_data(event); goto record_it; } @@ -241,12 +237,7 @@ record_it: memcpy((void *) t + sizeof(*t), pdu_data, pdu_len); if (blk_tr) { - ring_buffer_unlock_commit(blk_tr->buffer, event, flags); - if (pid != 0 && - !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC) && - (trace_flags & TRACE_ITER_STACKTRACE) != 0) - __trace_stack(blk_tr, NULL, flags, 5, pc); - trace_wake_up(); + trace_buffer_unlock_commit(blk_tr, event, 0, pc); return; } } @@ -1095,8 +1086,6 @@ static void blk_tracer_print_header(struct seq_file *m) static void blk_tracer_start(struct trace_array *tr) { - tracing_reset_online_cpus(tr); - mutex_lock(&blk_probe_mutex); if (atomic_add_return(1, &blk_probes_ref) == 1) if (blk_register_tracepoints()) @@ -1243,8 +1232,6 @@ static struct trace_event trace_blk_event = { .type = TRACE_BLK, .trace = blk_trace_event_print, .latency_trace = blk_trace_event_print, - .raw = trace_nop_print, - .hex = trace_nop_print, .binary = blk_trace_event_print_binary, }; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index d7f9839ba26..a7799a99f2d 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -9,6 +9,7 @@ menuconfig ACPI depends on PCI depends on PM select PNP + select CPU_IDLE default y ---help--- Advanced Configuration and Power Interface (ACPI) support for @@ -287,7 +288,7 @@ config ACPI_CONTAINER support physical cpu/memory hot-plug. If one selects "m", this driver can be loaded with - "modprobe acpi_container". + "modprobe container". config ACPI_HOTPLUG_MEMORY tristate "Memory Hotplug" diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 9684cc82793..22ce4898572 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -538,10 +538,9 @@ acpi_tb_parse_root_table(acpi_physical_address rsdp_address, u8 flags) if (ACPI_FAILURE(status)) { ACPI_WARNING((AE_INFO, "Truncating %u table entries!", - (unsigned) - (acpi_gbl_root_table_list.size - - acpi_gbl_root_table_list. - count))); + (unsigned) (table_count - + (acpi_gbl_root_table_list. + count - 2)))); break; } } diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c index da9450bc60f..9c9897dbe90 100644 --- a/drivers/acpi/acpica/uteval.c +++ b/drivers/acpi/acpica/uteval.c @@ -116,9 +116,9 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state) return_ACPI_STATUS(AE_NO_MEMORY); } - /* Default return value is SUPPORTED */ + /* Default return value is 0, NOT-SUPPORTED */ - return_desc->integer.value = ACPI_UINT32_MAX; + return_desc->integer.value = 0; walk_state->return_desc = return_desc; /* Compare input string to static table of supported interfaces */ @@ -127,10 +127,8 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state) if (!ACPI_STRCMP (string_desc->string.pointer, acpi_interfaces_supported[i])) { - - /* The interface is supported */ - - return_ACPI_STATUS(AE_OK); + return_desc->integer.value = ACPI_UINT32_MAX; + goto done; } } @@ -141,15 +139,14 @@ acpi_status acpi_ut_osi_implementation(struct acpi_walk_state *walk_state) */ status = acpi_os_validate_interface(string_desc->string.pointer); if (ACPI_SUCCESS(status)) { - - /* The interface is supported */ - - return_ACPI_STATUS(AE_OK); + return_desc->integer.value = ACPI_UINT32_MAX; } - /* The interface is not supported */ +done: + ACPI_DEBUG_PRINT_RAW((ACPI_DB_INFO, "ACPI: BIOS _OSI(%s) %ssupported\n", + string_desc->string.pointer, + return_desc->integer.value == 0 ? "not-" : "")); - return_desc->integer.value = 0; return_ACPI_STATUS(AE_OK); } diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c index 17020c12623..fe0cdf83641 100644 --- a/drivers/acpi/container.c +++ b/drivers/acpi/container.c @@ -163,7 +163,7 @@ static void container_notify_cb(acpi_handle handle, u32 type, void *context) case ACPI_NOTIFY_BUS_CHECK: /* Fall through */ case ACPI_NOTIFY_DEVICE_CHECK: - printk("Container driver received %s event\n", + printk(KERN_WARNING "Container driver received %s event\n", (type == ACPI_NOTIFY_BUS_CHECK) ? "ACPI_NOTIFY_BUS_CHECK" : "ACPI_NOTIFY_DEVICE_CHECK"); status = acpi_bus_get_device(handle, &device); @@ -174,7 +174,8 @@ static void container_notify_cb(acpi_handle handle, u32 type, void *context) kobject_uevent(&device->dev.kobj, KOBJ_ONLINE); else - printk("Failed to add container\n"); + printk(KERN_WARNING + "Failed to add container\n"); } } else { if (ACPI_SUCCESS(status)) { diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c index 5b30b8d91d7..35094f230b1 100644 --- a/drivers/acpi/dock.c +++ b/drivers/acpi/dock.c @@ -855,10 +855,14 @@ fdd_out: static ssize_t show_docked(struct device *dev, struct device_attribute *attr, char *buf) { + struct acpi_device *tmp; + struct dock_station *dock_station = *((struct dock_station **) dev->platform_data); - return snprintf(buf, PAGE_SIZE, "%d\n", dock_present(dock_station)); + if (ACPI_SUCCESS(acpi_bus_get_device(dock_station->handle, &tmp))) + return snprintf(buf, PAGE_SIZE, "1\n"); + return snprintf(buf, PAGE_SIZE, "0\n"); } static DEVICE_ATTR(docked, S_IRUGO, show_docked, NULL); @@ -984,7 +988,7 @@ static int dock_add(acpi_handle handle) ret = device_create_file(&dock_device->dev, &dev_attr_docked); if (ret) { - printk("Error %d adding sysfs file\n", ret); + printk(KERN_ERR "Error %d adding sysfs file\n", ret); platform_device_unregister(dock_device); kfree(dock_station); dock_station = NULL; @@ -992,7 +996,7 @@ static int dock_add(acpi_handle handle) } ret = device_create_file(&dock_device->dev, &dev_attr_undock); if (ret) { - printk("Error %d adding sysfs file\n", ret); + printk(KERN_ERR "Error %d adding sysfs file\n", ret); device_remove_file(&dock_device->dev, &dev_attr_docked); platform_device_unregister(dock_device); kfree(dock_station); @@ -1001,7 +1005,7 @@ static int dock_add(acpi_handle handle) } ret = device_create_file(&dock_device->dev, &dev_attr_uid); if (ret) { - printk("Error %d adding sysfs file\n", ret); + printk(KERN_ERR "Error %d adding sysfs file\n", ret); device_remove_file(&dock_device->dev, &dev_attr_docked); device_remove_file(&dock_device->dev, &dev_attr_undock); platform_device_unregister(dock_device); @@ -1011,7 +1015,7 @@ static int dock_add(acpi_handle handle) } ret = device_create_file(&dock_device->dev, &dev_attr_flags); if (ret) { - printk("Error %d adding sysfs file\n", ret); + printk(KERN_ERR "Error %d adding sysfs file\n", ret); device_remove_file(&dock_device->dev, &dev_attr_docked); device_remove_file(&dock_device->dev, &dev_attr_undock); device_remove_file(&dock_device->dev, &dev_attr_uid); diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index a2b82c90a68..5c2f5d343be 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -982,7 +982,7 @@ int __init acpi_ec_ecdt_probe(void) saved_ec = kmalloc(sizeof(struct acpi_ec), GFP_KERNEL); if (!saved_ec) return -ENOMEM; - memcpy(&saved_ec, boot_ec, sizeof(saved_ec)); + memcpy(saved_ec, boot_ec, sizeof(*saved_ec)); /* fall through */ } /* This workaround is needed only on some broken machines, diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index adec3d15810..5479b9f4251 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -255,12 +255,12 @@ static int acpi_platform_notify(struct device *dev) } type = acpi_get_bus_type(dev->bus); if (!type) { - DBG("No ACPI bus support for %s\n", dev->bus_id); + DBG("No ACPI bus support for %s\n", dev_name(dev)); ret = -EINVAL; goto end; } if ((ret = type->find_device(dev, &handle)) != 0) - DBG("Can't get handler for %s\n", dev->bus_id); + DBG("Can't get handler for %s\n", dev_name(dev)); end: if (!ret) acpi_bind_one(dev, handle); @@ -271,10 +271,10 @@ static int acpi_platform_notify(struct device *dev) acpi_get_name(dev->archdata.acpi_handle, ACPI_FULL_PATHNAME, &buffer); - DBG("Device %s -> %s\n", dev->bus_id, (char *)buffer.pointer); + DBG("Device %s -> %s\n", dev_name(dev), (char *)buffer.pointer); kfree(buffer.pointer); } else - DBG("Device %s -> No ACPI support\n", dev->bus_id); + DBG("Device %s -> No ACPI support\n", dev_name(dev)); #endif return ret; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 6729a4992f2..b3193ec0a2e 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -228,10 +228,10 @@ void acpi_os_vprintf(const char *fmt, va_list args) if (acpi_in_debugger) { kdb_printf("%s", buffer); } else { - printk("%s", buffer); + printk(KERN_CONT "%s", buffer); } #else - printk("%s", buffer); + printk(KERN_CONT "%s", buffer); #endif } @@ -1317,6 +1317,54 @@ acpi_os_validate_interface (char *interface) return AE_SUPPORT; } +#ifdef CONFIG_X86 + +struct aml_port_desc { + uint start; + uint end; + char* name; + char warned; +}; + +static struct aml_port_desc aml_invalid_port_list[] = { + {0x20, 0x21, "PIC0", 0}, + {0xA0, 0xA1, "PIC1", 0}, + {0x4D0, 0x4D1, "ELCR", 0} +}; + +/* + * valid_aml_io_address() + * + * if valid, return true + * else invalid, warn once, return false + */ +static bool valid_aml_io_address(uint address, uint length) +{ + int i; + int entries = sizeof(aml_invalid_port_list) / sizeof(struct aml_port_desc); + + for (i = 0; i < entries; ++i) { + if ((address >= aml_invalid_port_list[i].start && + address <= aml_invalid_port_list[i].end) || + (address + length >= aml_invalid_port_list[i].start && + address + length <= aml_invalid_port_list[i].end)) + { + if (!aml_invalid_port_list[i].warned) + { + printk(KERN_ERR "ACPI: Denied BIOS AML access" + " to invalid port 0x%x+0x%x (%s)\n", + address, length, + aml_invalid_port_list[i].name); + aml_invalid_port_list[i].warned = 1; + } + return false; /* invalid */ + } + } + return true; /* valid */ +} +#else +static inline bool valid_aml_io_address(uint address, uint length) { return true; } +#endif /****************************************************************************** * * FUNCTION: acpi_os_validate_address @@ -1346,6 +1394,8 @@ acpi_os_validate_address ( switch (space_id) { case ACPI_ADR_SPACE_SYSTEM_IO: + if (!valid_aml_io_address(address, length)) + return AE_AML_ILLEGAL_ADDRESS; case ACPI_ADR_SPACE_SYSTEM_MEMORY: /* Only interference checks against SystemIO and SytemMemory are needed */ diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 1c6e73c7865..6c772ca76bd 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -593,7 +593,7 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link) return -ENODEV; } else { acpi_irq_penalty[link->irq.active] += PIRQ_PENALTY_PCI_USING; - printk(PREFIX "%s [%s] enabled at IRQ %d\n", + printk(KERN_WARNING PREFIX "%s [%s] enabled at IRQ %d\n", acpi_device_name(link->device), acpi_device_bid(link->device), link->irq.active); } diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 66a9d814556..7bc22a471fe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -66,43 +66,17 @@ ACPI_MODULE_NAME("processor_idle"); #define ACPI_PROCESSOR_FILE_POWER "power" #define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000) #define PM_TIMER_TICK_NS (1000000000ULL/PM_TIMER_FREQUENCY) -#ifndef CONFIG_CPU_IDLE -#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */ -#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */ -static void (*pm_idle_save) (void) __read_mostly; -#else #define C2_OVERHEAD 1 /* 1us */ #define C3_OVERHEAD 1 /* 1us */ -#endif #define PM_TIMER_TICKS_TO_US(p) (((p) * 1000)/(PM_TIMER_FREQUENCY/1000)) static unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER; -#ifdef CONFIG_CPU_IDLE module_param(max_cstate, uint, 0000); -#else -module_param(max_cstate, uint, 0644); -#endif static unsigned int nocst __read_mostly; module_param(nocst, uint, 0000); -#ifndef CONFIG_CPU_IDLE -/* - * bm_history -- bit-mask with a bit per jiffy of bus-master activity - * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms - * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms - * 100 HZ: 0x0000000F: 4 jiffies = 40ms - * reduce history for more aggressive entry into C3 - */ -static unsigned int bm_history __read_mostly = - (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1)); -module_param(bm_history, uint, 0644); - -static int acpi_processor_set_power_policy(struct acpi_processor *pr); - -#else /* CONFIG_CPU_IDLE */ static unsigned int latency_factor __read_mostly = 2; module_param(latency_factor, uint, 0644); -#endif /* * IBM ThinkPad R40e crashes mysteriously when going into C2 or C3. @@ -224,71 +198,6 @@ static void acpi_safe_halt(void) current_thread_info()->status |= TS_POLLING; } -#ifndef CONFIG_CPU_IDLE - -static void -acpi_processor_power_activate(struct acpi_processor *pr, - struct acpi_processor_cx *new) -{ - struct acpi_processor_cx *old; - - if (!pr || !new) - return; - - old = pr->power.state; - - if (old) - old->promotion.count = 0; - new->demotion.count = 0; - - /* Cleanup from old state. */ - if (old) { - switch (old->type) { - case ACPI_STATE_C3: - /* Disable bus master reload */ - if (new->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - break; - } - } - - /* Prepare to use new state. */ - switch (new->type) { - case ACPI_STATE_C3: - /* Enable bus master reload */ - if (old->type != ACPI_STATE_C3 && pr->flags.bm_check) - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); - break; - } - - pr->power.state = new; - - return; -} - -static atomic_t c3_cpu_count; - -/* Common C-state entry for C2, C3, .. */ -static void acpi_cstate_enter(struct acpi_processor_cx *cstate) -{ - /* Don't trace irqs off for idle */ - stop_critical_timings(); - if (cstate->entry_method == ACPI_CSTATE_FFH) { - /* Call into architectural FFH based C-state */ - acpi_processor_ffh_cstate_enter(cstate); - } else { - int unused; - /* IO port based C-state */ - inb(cstate->address); - /* Dummy wait op - must do something useless after P_LVL2 read - because chipsets cannot guarantee that STPCLK# signal - gets asserted in time to freeze execution properly. */ - unused = inl(acpi_gbl_FADT.xpm_timer_block.address); - } - start_critical_timings(); -} -#endif /* !CONFIG_CPU_IDLE */ - #ifdef ARCH_APICTIMER_STOPS_ON_C3 /* @@ -390,421 +299,6 @@ static int tsc_halts_in_c(int state) } #endif -#ifndef CONFIG_CPU_IDLE -static void acpi_processor_idle(void) -{ - struct acpi_processor *pr = NULL; - struct acpi_processor_cx *cx = NULL; - struct acpi_processor_cx *next_state = NULL; - int sleep_ticks = 0; - u32 t1, t2 = 0; - - /* - * Interrupts must be disabled during bus mastering calculations and - * for C2/C3 transitions. - */ - local_irq_disable(); - - pr = __get_cpu_var(processors); - if (!pr) { - local_irq_enable(); - return; - } - - /* - * Check whether we truly need to go idle, or should - * reschedule: - */ - if (unlikely(need_resched())) { - local_irq_enable(); - return; - } - - cx = pr->power.state; - if (!cx || acpi_idle_suspend) { - if (pm_idle_save) { - pm_idle_save(); /* enables IRQs */ - } else { - acpi_safe_halt(); - local_irq_enable(); - } - - return; - } - - /* - * Check BM Activity - * ----------------- - * Check for bus mastering activity (if required), record, and check - * for demotion. - */ - if (pr->flags.bm_check) { - u32 bm_status = 0; - unsigned long diff = jiffies - pr->power.bm_check_timestamp; - - if (diff > 31) - diff = 31; - - pr->power.bm_activity <<= diff; - - acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); - if (bm_status) { - pr->power.bm_activity |= 0x1; - acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); - } - /* - * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect - * the true state of bus mastering activity; forcing us to - * manually check the BMIDEA bit of each IDE channel. - */ - else if (errata.piix4.bmisx) { - if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01) - || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01)) - pr->power.bm_activity |= 0x1; - } - - pr->power.bm_check_timestamp = jiffies; - - /* - * If bus mastering is or was active this jiffy, demote - * to avoid a faulty transition. Note that the processor - * won't enter a low-power state during this call (to this - * function) but should upon the next. - * - * TBD: A better policy might be to fallback to the demotion - * state (use it for this quantum only) istead of - * demoting -- and rely on duration as our sole demotion - * qualification. This may, however, introduce DMA - * issues (e.g. floppy DMA transfer overrun/underrun). - */ - if ((pr->power.bm_activity & 0x1) && - cx->demotion.threshold.bm) { - local_irq_enable(); - next_state = cx->demotion.state; - goto end; - } - } - -#ifdef CONFIG_HOTPLUG_CPU - /* - * Check for P_LVL2_UP flag before entering C2 and above on - * an SMP system. We do it here instead of doing it at _CST/P_LVL - * detection phase, to work cleanly with logical CPU hotplug. - */ - if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) && - !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) - cx = &pr->power.states[ACPI_STATE_C1]; -#endif - - /* - * Sleep: - * ------ - * Invoke the current Cx state to put the processor to sleep. - */ - if (cx->type == ACPI_STATE_C2 || cx->type == ACPI_STATE_C3) { - current_thread_info()->status &= ~TS_POLLING; - /* - * TS_POLLING-cleared state must be visible before we - * test NEED_RESCHED: - */ - smp_mb(); - if (need_resched()) { - current_thread_info()->status |= TS_POLLING; - local_irq_enable(); - return; - } - } - - switch (cx->type) { - - case ACPI_STATE_C1: - /* - * Invoke C1. - * Use the appropriate idle routine, the one that would - * be used without acpi C-states. - */ - if (pm_idle_save) { - pm_idle_save(); /* enables IRQs */ - } else { - acpi_safe_halt(); - local_irq_enable(); - } - - /* - * TBD: Can't get time duration while in C1, as resumes - * go to an ISR rather than here. Need to instrument - * base interrupt handler. - * - * Note: the TSC better not stop in C1, sched_clock() will - * skew otherwise. - */ - sleep_ticks = 0xFFFFFFFF; - - break; - - case ACPI_STATE_C2: - /* Get start time (ticks) */ - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); - /* Tell the scheduler that we are going deep-idle: */ - sched_clock_idle_sleep_event(); - /* Invoke C2 */ - acpi_state_timer_broadcast(pr, cx, 1); - acpi_cstate_enter(cx); - /* Get end time (ticks) */ - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); - -#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) - /* TSC halts in C2, so notify users */ - if (tsc_halts_in_c(ACPI_STATE_C2)) - mark_tsc_unstable("possible TSC halt in C2"); -#endif - /* Compute time (ticks) that we were actually asleep */ - sleep_ticks = ticks_elapsed(t1, t2); - - /* Tell the scheduler how much we idled: */ - sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); - - /* Re-enable interrupts */ - local_irq_enable(); - /* Do not account our idle-switching overhead: */ - sleep_ticks -= cx->latency_ticks + C2_OVERHEAD; - - current_thread_info()->status |= TS_POLLING; - acpi_state_timer_broadcast(pr, cx, 0); - break; - - case ACPI_STATE_C3: - acpi_unlazy_tlb(smp_processor_id()); - /* - * Must be done before busmaster disable as we might - * need to access HPET ! - */ - acpi_state_timer_broadcast(pr, cx, 1); - /* - * disable bus master - * bm_check implies we need ARB_DIS - * !bm_check implies we need cache flush - * bm_control implies whether we can do ARB_DIS - * - * That leaves a case where bm_check is set and bm_control is - * not set. In that case we cannot do much, we enter C3 - * without doing anything. - */ - if (pr->flags.bm_check && pr->flags.bm_control) { - if (atomic_inc_return(&c3_cpu_count) == - num_online_cpus()) { - /* - * All CPUs are trying to go to C3 - * Disable bus master arbitration - */ - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1); - } - } else if (!pr->flags.bm_check) { - /* SMP with no shared cache... Invalidate cache */ - ACPI_FLUSH_CPU_CACHE(); - } - - /* Get start time (ticks) */ - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); - /* Invoke C3 */ - /* Tell the scheduler that we are going deep-idle: */ - sched_clock_idle_sleep_event(); - acpi_cstate_enter(cx); - /* Get end time (ticks) */ - t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); - if (pr->flags.bm_check && pr->flags.bm_control) { - /* Enable bus master arbitration */ - atomic_dec(&c3_cpu_count); - acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0); - } - -#if defined (CONFIG_GENERIC_TIME) && defined (CONFIG_X86) - /* TSC halts in C3, so notify users */ - if (tsc_halts_in_c(ACPI_STATE_C3)) - mark_tsc_unstable("TSC halts in C3"); -#endif - /* Compute time (ticks) that we were actually asleep */ - sleep_ticks = ticks_elapsed(t1, t2); - /* Tell the scheduler how much we idled: */ - sched_clock_idle_wakeup_event(sleep_ticks*PM_TIMER_TICK_NS); - - /* Re-enable interrupts */ - local_irq_enable(); - /* Do not account our idle-switching overhead: */ - sleep_ticks -= cx->latency_ticks + C3_OVERHEAD; - - current_thread_info()->status |= TS_POLLING; - acpi_state_timer_broadcast(pr, cx, 0); - break; - - default: - local_irq_enable(); - return; - } - cx->usage++; - if ((cx->type != ACPI_STATE_C1) && (sleep_ticks > 0)) - cx->time += sleep_ticks; - - next_state = pr->power.state; - -#ifdef CONFIG_HOTPLUG_CPU - /* Don't do promotion/demotion */ - if ((cx->type == ACPI_STATE_C1) && (num_online_cpus() > 1) && - !pr->flags.has_cst && !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED)) { - next_state = cx; - goto end; - } -#endif - - /* - * Promotion? - * ---------- - * Track the number of longs (time asleep is greater than threshold) - * and promote when the count threshold is reached. Note that bus - * mastering activity may prevent promotions. - * Do not promote above max_cstate. - */ - if (cx->promotion.state && - ((cx->promotion.state - pr->power.states) <= max_cstate)) { - if (sleep_ticks > cx->promotion.threshold.ticks && - cx->promotion.state->latency <= - pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { - cx->promotion.count++; - cx->demotion.count = 0; - if (cx->promotion.count >= - cx->promotion.threshold.count) { - if (pr->flags.bm_check) { - if (! - (pr->power.bm_activity & cx-> - promotion.threshold.bm)) { - next_state = - cx->promotion.state; - goto end; - } - } else { - next_state = cx->promotion.state; - goto end; - } - } - } - } - - /* - * Demotion? - * --------- - * Track the number of shorts (time asleep is less than time threshold) - * and demote when the usage threshold is reached. - */ - if (cx->demotion.state) { - if (sleep_ticks < cx->demotion.threshold.ticks) { - cx->demotion.count++; - cx->promotion.count = 0; - if (cx->demotion.count >= cx->demotion.threshold.count) { - next_state = cx->demotion.state; - goto end; - } - } - } - - end: - /* - * Demote if current state exceeds max_cstate - * or if the latency of the current state is unacceptable - */ - if ((pr->power.state - pr->power.states) > max_cstate || - pr->power.state->latency > - pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY)) { - if (cx->demotion.state) - next_state = cx->demotion.state; - } - - /* - * New Cx State? - * ------------- - * If we're going to start using a new Cx state we must clean up - * from the previous and prepare to use the new. - */ - if (next_state != pr->power.state) - acpi_processor_power_activate(pr, next_state); -} - -static int acpi_processor_set_power_policy(struct acpi_processor *pr) -{ - unsigned int i; - unsigned int state_is_set = 0; - struct acpi_processor_cx *lower = NULL; - struct acpi_processor_cx *higher = NULL; - struct acpi_processor_cx *cx; - - - if (!pr) - return -EINVAL; - - /* - * This function sets the default Cx state policy (OS idle handler). - * Our scheme is to promote quickly to C2 but more conservatively - * to C3. We're favoring C2 for its characteristics of low latency - * (quick response), good power savings, and ability to allow bus - * mastering activity. Note that the Cx state policy is completely - * customizable and can be altered dynamically. - */ - - /* startup state */ - for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { - cx = &pr->power.states[i]; - if (!cx->valid) - continue; - - if (!state_is_set) - pr->power.state = cx; - state_is_set++; - break; - } - - if (!state_is_set) - return -ENODEV; - - /* demotion */ - for (i = 1; i < ACPI_PROCESSOR_MAX_POWER; i++) { - cx = &pr->power.states[i]; - if (!cx->valid) - continue; - - if (lower) { - cx->demotion.state = lower; - cx->demotion.threshold.ticks = cx->latency_ticks; - cx->demotion.threshold.count = 1; - if (cx->type == ACPI_STATE_C3) - cx->demotion.threshold.bm = bm_history; - } - - lower = cx; - } - - /* promotion */ - for (i = (ACPI_PROCESSOR_MAX_POWER - 1); i > 0; i--) { - cx = &pr->power.states[i]; - if (!cx->valid) - continue; - - if (higher) { - cx->promotion.state = higher; - cx->promotion.threshold.ticks = cx->latency_ticks; - if (cx->type >= ACPI_STATE_C2) - cx->promotion.threshold.count = 4; - else - cx->promotion.threshold.count = 10; - if (higher->type == ACPI_STATE_C3) - cx->promotion.threshold.bm = bm_history; - } - - higher = cx; - } - - return 0; -} -#endif /* !CONFIG_CPU_IDLE */ - static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr) { @@ -1047,11 +541,7 @@ static void acpi_processor_power_verify_c2(struct acpi_processor_cx *cx) */ cx->valid = 1; -#ifndef CONFIG_CPU_IDLE - cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); -#else cx->latency_ticks = cx->latency; -#endif return; } @@ -1121,7 +611,6 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, " for C3 to be enabled on SMP systems\n")); return; } - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); } /* @@ -1132,11 +621,16 @@ static void acpi_processor_power_verify_c3(struct acpi_processor *pr, */ cx->valid = 1; -#ifndef CONFIG_CPU_IDLE - cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency); -#else cx->latency_ticks = cx->latency; -#endif + /* + * On older chipsets, BM_RLD needs to be set + * in order for Bus Master activity to wake the + * system from C3. Newer chipsets handle DMA + * during C3 automatically and BM_RLD is a NOP. + * In either case, the proper way to + * handle BM_RLD is to set it and leave it set. + */ + acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); return; } @@ -1201,20 +695,6 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr) pr->power.count = acpi_processor_power_verify(pr); -#ifndef CONFIG_CPU_IDLE - /* - * Set Default Policy - * ------------------ - * Now that we know which states are supported, set the default - * policy. Note that this policy can be changed dynamically - * (e.g. encourage deeper sleeps to conserve battery life when - * not on AC). - */ - result = acpi_processor_set_power_policy(pr); - if (result) - return result; -#endif - /* * if one state of type C2 or C3 is available, mark this * CPU as being "idle manageable" @@ -1312,69 +792,6 @@ static const struct file_operations acpi_processor_power_fops = { .release = single_release, }; -#ifndef CONFIG_CPU_IDLE - -int acpi_processor_cst_has_changed(struct acpi_processor *pr) -{ - int result = 0; - - if (boot_option_idle_override) - return 0; - - if (!pr) - return -EINVAL; - - if (nocst) { - return -ENODEV; - } - - if (!pr->flags.power_setup_done) - return -ENODEV; - - /* - * Fall back to the default idle loop, when pm_idle_save had - * been initialized. - */ - if (pm_idle_save) { - pm_idle = pm_idle_save; - /* Relies on interrupts forcing exit from idle. */ - synchronize_sched(); - } - - pr->flags.power = 0; - result = acpi_processor_get_power_info(pr); - if ((pr->flags.power == 1) && (pr->flags.power_setup_done)) - pm_idle = acpi_processor_idle; - - return result; -} - -#ifdef CONFIG_SMP -static void smp_callback(void *v) -{ - /* we already woke the CPU up, nothing more to do */ -} - -/* - * This function gets called when a part of the kernel has a new latency - * requirement. This means we need to get all processors out of their C-state, - * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that - * wakes them all right up. - */ -static int acpi_processor_latency_notify(struct notifier_block *b, - unsigned long l, void *v) -{ - smp_call_function(smp_callback, NULL, 1); - return NOTIFY_OK; -} - -static struct notifier_block acpi_processor_latency_notifier = { - .notifier_call = acpi_processor_latency_notify, -}; - -#endif - -#else /* CONFIG_CPU_IDLE */ /** * acpi_idle_bm_check - checks if bus master activity was detected @@ -1383,7 +800,7 @@ static int acpi_idle_bm_check(void) { u32 bm_status = 0; - acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); + acpi_get_register_unlocked(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status); if (bm_status) acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1); /* @@ -1400,25 +817,6 @@ static int acpi_idle_bm_check(void) } /** - * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state - * @pr: the processor - * @target: the new target state - */ -static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr, - struct acpi_processor_cx *target) -{ - if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) { - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0); - pr->flags.bm_rld_set = 0; - } - - if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) { - acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1); - pr->flags.bm_rld_set = 1; - } -} - -/** * acpi_idle_do_entry - a helper function that does C2 and C3 type entry * @cx: cstate data * @@ -1473,9 +871,6 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev, return 0; } - if (pr->flags.bm_check) - acpi_idle_update_bm_rld(pr, cx); - t1 = inl(acpi_gbl_FADT.xpm_timer_block.address); acpi_idle_do_entry(cx); t2 = inl(acpi_gbl_FADT.xpm_timer_block.address); @@ -1527,9 +922,6 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev, */ acpi_state_timer_broadcast(pr, cx, 1); - if (pr->flags.bm_check) - acpi_idle_update_bm_rld(pr, cx); - if (cx->type == ACPI_STATE_C3) ACPI_FLUSH_CPU_CACHE(); @@ -1621,8 +1013,6 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev, */ acpi_state_timer_broadcast(pr, cx, 1); - acpi_idle_update_bm_rld(pr, cx); - /* * disable bus master * bm_check implies we need ARB_DIS @@ -1795,8 +1185,6 @@ int acpi_processor_cst_has_changed(struct acpi_processor *pr) return ret; } -#endif /* CONFIG_CPU_IDLE */ - int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, struct acpi_device *device) { @@ -1825,10 +1213,6 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, "ACPI: processor limited to max C-state %d\n", max_cstate); first_run++; -#if !defined(CONFIG_CPU_IDLE) && defined(CONFIG_SMP) - pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, - &acpi_processor_latency_notifier); -#endif } if (!pr) @@ -1852,11 +1236,9 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, * platforms that only support C1. */ if (pr->flags.power) { -#ifdef CONFIG_CPU_IDLE acpi_processor_setup_cpuidle(pr); if (cpuidle_register_device(&pr->power.dev)) return -EIO; -#endif printk(KERN_INFO PREFIX "CPU%d (power states:", pr->id); for (i = 1; i <= pr->power.count; i++) @@ -1864,13 +1246,6 @@ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr, printk(" C%d[C%d]", i, pr->power.states[i].type); printk(")\n"); - -#ifndef CONFIG_CPU_IDLE - if (pr->id == 0) { - pm_idle_save = pm_idle; - pm_idle = acpi_processor_idle; - } -#endif } /* 'power' [R] */ @@ -1889,34 +1264,12 @@ int acpi_processor_power_exit(struct acpi_processor *pr, if (boot_option_idle_override) return 0; -#ifdef CONFIG_CPU_IDLE cpuidle_unregister_device(&pr->power.dev); -#endif pr->flags.power_setup_done = 0; if (acpi_device_dir(device)) remove_proc_entry(ACPI_PROCESSOR_FILE_POWER, acpi_device_dir(device)); -#ifndef CONFIG_CPU_IDLE - - /* Unregister the idle handler when processor #0 is removed. */ - if (pr->id == 0) { - if (pm_idle_save) - pm_idle = pm_idle_save; - - /* - * We are about to unload the current idle thread pm callback - * (pm_idle), Wait for all processors to update cached/local - * copies of pm_idle before proceeding. - */ - cpu_idle_wait(); -#ifdef CONFIG_SMP - pm_qos_remove_notifier(PM_QOS_CPU_DMA_LATENCY, - &acpi_processor_latency_notifier); -#endif - } -#endif - return 0; } diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 846e227592d..9cc769b587f 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -31,14 +31,6 @@ #include <linux/init.h> #include <linux/cpufreq.h> -#ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/mutex.h> - -#include <asm/uaccess.h> -#endif - #ifdef CONFIG_X86 #include <asm/cpufeature.h> #endif @@ -434,96 +426,6 @@ int acpi_processor_notify_smm(struct module *calling_module) EXPORT_SYMBOL(acpi_processor_notify_smm); -#ifdef CONFIG_X86_ACPI_CPUFREQ_PROC_INTF -/* /proc/acpi/processor/../performance interface (DEPRECATED) */ - -static int acpi_processor_perf_open_fs(struct inode *inode, struct file *file); -static struct file_operations acpi_processor_perf_fops = { - .owner = THIS_MODULE, - .open = acpi_processor_perf_open_fs, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int acpi_processor_perf_seq_show(struct seq_file *seq, void *offset) -{ - struct acpi_processor *pr = seq->private; - int i; - - - if (!pr) - goto end; - - if (!pr->performance) { - seq_puts(seq, "<not supported>\n"); - goto end; - } - - seq_printf(seq, "state count: %d\n" - "active state: P%d\n", - pr->performance->state_count, pr->performance->state); - - seq_puts(seq, "states:\n"); - for (i = 0; i < pr->performance->state_count; i++) - seq_printf(seq, - " %cP%d: %d MHz, %d mW, %d uS\n", - (i == pr->performance->state ? '*' : ' '), i, - (u32) pr->performance->states[i].core_frequency, - (u32) pr->performance->states[i].power, - (u32) pr->performance->states[i].transition_latency); - - end: - return 0; -} - -static int acpi_processor_perf_open_fs(struct inode *inode, struct file *file) -{ - return single_open(file, acpi_processor_perf_seq_show, - PDE(inode)->data); -} - -static void acpi_cpufreq_add_file(struct acpi_processor *pr) -{ - struct acpi_device *device = NULL; - - - if (acpi_bus_get_device(pr->handle, &device)) - return; - - /* add file 'performance' [R/W] */ - proc_create_data(ACPI_PROCESSOR_FILE_PERFORMANCE, S_IFREG | S_IRUGO, - acpi_device_dir(device), - &acpi_processor_perf_fops, acpi_driver_data(device)); - return; -} - -static void acpi_cpufreq_remove_file(struct acpi_processor *pr) -{ - struct acpi_device *device = NULL; - - - if (acpi_bus_get_device(pr->handle, &device)) - return; - - /* remove file 'performance' */ - remove_proc_entry(ACPI_PROCESSOR_FILE_PERFORMANCE, - acpi_device_dir(device)); - - return; -} - -#else -static void acpi_cpufreq_add_file(struct acpi_processor *pr) -{ - return; -} -static void acpi_cpufreq_remove_file(struct acpi_processor *pr) -{ - return; -} -#endif /* CONFIG_X86_ACPI_CPUFREQ_PROC_INTF */ - static int acpi_processor_get_psd(struct acpi_processor *pr) { int result = 0; @@ -747,14 +649,12 @@ err_ret: } EXPORT_SYMBOL(acpi_processor_preregister_performance); - int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu) { struct acpi_processor *pr; - if (!(acpi_processor_ppc_status & PPC_REGISTERED)) return -EINVAL; @@ -781,8 +681,6 @@ acpi_processor_register_performance(struct acpi_processor_performance return -EIO; } - acpi_cpufreq_add_file(pr); - mutex_unlock(&performance_mutex); return 0; } @@ -795,7 +693,6 @@ acpi_processor_unregister_performance(struct acpi_processor_performance { struct acpi_processor *pr; - mutex_lock(&performance_mutex); pr = per_cpu(processors, cpu); @@ -808,8 +705,6 @@ acpi_processor_unregister_performance(struct acpi_processor_performance kfree(pr->performance->states); pr->performance = NULL; - acpi_cpufreq_remove_file(pr); - mutex_unlock(&performance_mutex); return; diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 7e3c609cbef..519266654f0 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -90,31 +90,6 @@ void __init acpi_old_suspend_ordering(void) old_suspend_ordering = true; } -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; -/* - * The ACPI specification wants us to save NVS memory regions during hibernation - * and to restore them during the subsequent resume. However, it is not certain - * if this mechanism is going to work on all machines, so we allow the user to - * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line - * option. - */ -static bool s4_no_nvs; - -void __init acpi_s4_no_nvs(void) -{ - s4_no_nvs = true; -} - /** * acpi_pm_disable_gpes - Disable the GPEs. */ @@ -193,6 +168,18 @@ static void acpi_pm_end(void) #endif /* CONFIG_ACPI_SLEEP */ #ifdef CONFIG_SUSPEND +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; + extern void do_suspend_lowlevel(void); static u32 acpi_suspend_states[] = { @@ -396,6 +383,20 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { #endif /* CONFIG_SUSPEND */ #ifdef CONFIG_HIBERNATION +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. However, it is not certain + * if this mechanism is going to work on all machines, so we allow the user to + * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line + * option. + */ +static bool s4_no_nvs; + +void __init acpi_s4_no_nvs(void) +{ + s4_no_nvs = true; +} + static unsigned long s4_hardware_signature; static struct acpi_table_facs *facs; static bool nosigcheck; @@ -679,7 +680,7 @@ static void acpi_power_off_prepare(void) static void acpi_power_off(void) { /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ - printk("%s called\n", __func__); + printk(KERN_DEBUG "%s called\n", __func__); local_irq_disable(); acpi_enable_wakeup_device(ACPI_STATE_S5); acpi_enter_sleep_state(ACPI_STATE_S5); diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 775c97a282b..a8852952fac 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -293,7 +293,12 @@ static void __init check_multiple_madt(void) int __init acpi_table_init(void) { - acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0); + acpi_status status; + + status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0); + if (ACPI_FAILURE(status)) + return 1; + check_multiple_madt(); return 0; } diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index f261737636d..bb5ed059114 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1020,7 +1020,7 @@ acpi_video_device_brightness_seq_show(struct seq_file *seq, void *offset) } seq_printf(seq, "levels: "); - for (i = 0; i < dev->brightness->count; i++) + for (i = 2; i < dev->brightness->count; i++) seq_printf(seq, " %d", dev->brightness->levels[i]); seq_printf(seq, "\ncurrent: %d\n", dev->brightness->curr); @@ -1059,7 +1059,7 @@ acpi_video_device_write_brightness(struct file *file, return -EFAULT; /* validate through the list of available levels */ - for (i = 0; i < dev->brightness->count; i++) + for (i = 2; i < dev->brightness->count; i++) if (level == dev->brightness->levels[i]) { if (ACPI_SUCCESS (acpi_video_device_lcd_set_level(dev, level))) @@ -1260,7 +1260,7 @@ static int acpi_video_bus_POST_info_seq_show(struct seq_file *seq, void *offset) printk(KERN_WARNING PREFIX "This indicates a BIOS bug. Please contact the manufacturer.\n"); } - printk("%llx\n", options); + printk(KERN_WARNING "%llx\n", options); seq_printf(seq, "can POST: <integrated video>"); if (options & 2) seq_printf(seq, " <PCI video>"); @@ -1712,7 +1712,7 @@ acpi_video_get_next_level(struct acpi_video_device *device, max = max_below = 0; min = min_above = 255; /* Find closest level to level_current */ - for (i = 0; i < device->brightness->count; i++) { + for (i = 2; i < device->brightness->count; i++) { l = device->brightness->levels[i]; if (abs(l - level_current) < abs(delta)) { delta = l - level_current; @@ -1722,7 +1722,7 @@ acpi_video_get_next_level(struct acpi_video_device *device, } /* Ajust level_current to closest available level */ level_current += delta; - for (i = 0; i < device->brightness->count; i++) { + for (i = 2; i < device->brightness->count; i++) { l = device->brightness->levels[i]; if (l < min) min = l; @@ -2006,6 +2006,12 @@ static int acpi_video_bus_add(struct acpi_device *device) device->pnp.bus_id[3] = '0' + instance; instance ++; } + /* a hack to fix the duplicate name "VGA" problem on Pa 3553 */ + if (!strcmp(device->pnp.bus_id, "VGA")) { + if (instance) + device->pnp.bus_id[3] = '0' + instance; + instance++; + } video->device = device; strcpy(acpi_device_name(device), ACPI_VIDEO_BUS_NAME); diff --git a/drivers/char/sx.c b/drivers/char/sx.c index b60be7b0dec..f146e90404f 100644 --- a/drivers/char/sx.c +++ b/drivers/char/sx.c @@ -1713,8 +1713,8 @@ static long sx_fw_ioctl(struct file *filp, unsigned int cmd, for (i = 0; i < SX_NBOARDS; i++) sx_dprintk(SX_DEBUG_FIRMWARE, "<%x> ", boards[i].flags); sx_dprintk(SX_DEBUG_FIRMWARE, "\n"); - unlock_kernel(); - return -EIO; + rc = -EIO; + goto out; } switch (cmd) { @@ -1747,7 +1747,8 @@ static long sx_fw_ioctl(struct file *filp, unsigned int cmd, break; case SXIO_DO_RAMTEST: if (sx_initialized) /* Already initialized: better not ramtest the board. */ - return -EPERM; + rc = -EPERM; + break; if (IS_SX_BOARD(board)) { rc = do_memtest(board, 0, 0x7000); if (!rc) @@ -1844,6 +1845,7 @@ static long sx_fw_ioctl(struct file *filp, unsigned int cmd, rc = -ENOTTY; break; } +out: unlock_kernel(); func_exit(); return rc; diff --git a/drivers/firewire/fw-card.c b/drivers/firewire/fw-card.c index 7be2cf3514e..a5dd7a665aa 100644 --- a/drivers/firewire/fw-card.c +++ b/drivers/firewire/fw-card.c @@ -412,6 +412,7 @@ fw_card_add(struct fw_card *card, { u32 *config_rom; size_t length; + int err; card->max_receive = max_receive; card->link_speed = link_speed; @@ -422,7 +423,13 @@ fw_card_add(struct fw_card *card, list_add_tail(&card->link, &card_list); mutex_unlock(&card_mutex); - return card->driver->enable(card, config_rom, length); + err = card->driver->enable(card, config_rom, length); + if (err < 0) { + mutex_lock(&card_mutex); + list_del(&card->link); + mutex_unlock(&card_mutex); + } + return err; } EXPORT_SYMBOL(fw_card_add); diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index 03705240000..abf4dfc8ec2 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -153,7 +153,10 @@ static struct axis_conversion lis3lv02d_axis_y_inverted = {1, -2, 3}; static struct axis_conversion lis3lv02d_axis_x_inverted = {-1, 2, 3}; static struct axis_conversion lis3lv02d_axis_z_inverted = {1, 2, -3}; static struct axis_conversion lis3lv02d_axis_xy_rotated_left = {-2, 1, 3}; +static struct axis_conversion lis3lv02d_axis_xy_rotated_left_usd = {-2, 1, -3}; static struct axis_conversion lis3lv02d_axis_xy_swap_inverted = {-2, -1, 3}; +static struct axis_conversion lis3lv02d_axis_xy_rotated_right = {2, -1, 3}; +static struct axis_conversion lis3lv02d_axis_xy_swap_yz_inverted = {2, -1, -3}; #define AXIS_DMI_MATCH(_ident, _name, _axis) { \ .ident = _ident, \ @@ -172,10 +175,12 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { AXIS_DMI_MATCH("NC2510", "HP Compaq 2510", y_inverted), AXIS_DMI_MATCH("NC8510", "HP Compaq 8510", xy_swap_inverted), AXIS_DMI_MATCH("HP2133", "HP 2133", xy_rotated_left), + AXIS_DMI_MATCH("NC653x", "HP Compaq 653", xy_rotated_left_usd), + AXIS_DMI_MATCH("NC673x", "HP Compaq 673", xy_rotated_left_usd), + AXIS_DMI_MATCH("NC651xx", "HP Compaq 651", xy_rotated_right), + AXIS_DMI_MATCH("NC671xx", "HP Compaq 671", xy_swap_yz_inverted), { NULL, } /* Laptop models without axis info (yet): - * "NC651xx" "HP Compaq 651" - * "NC671xx" "HP Compaq 671" * "NC6910" "HP Compaq 6910" * HP Compaq 8710x Notebook PC / Mobile Workstation * "NC2400" "HP Compaq nc2400" diff --git a/drivers/ieee1394/dv1394.c b/drivers/ieee1394/dv1394.c index a329e6bd5d2..3838bc4acab 100644 --- a/drivers/ieee1394/dv1394.c +++ b/drivers/ieee1394/dv1394.c @@ -1823,6 +1823,10 @@ static int dv1394_open(struct inode *inode, struct file *file) #endif + printk(KERN_INFO "%s: NOTE, the dv1394 interface is unsupported " + "and will not be available in the new firewire driver stack. " + "Try libraw1394 based programs instead.\n", current->comm); + return 0; } @@ -2567,10 +2571,6 @@ static int __init dv1394_init_module(void) { int ret; - printk(KERN_WARNING - "NOTE: The dv1394 driver is unsupported and may be removed in a " - "future Linux release. Use raw1394 instead.\n"); - cdev_init(&dv1394_cdev, &dv1394_fops); dv1394_cdev.owner = THIS_MODULE; ret = cdev_add(&dv1394_cdev, IEEE1394_DV1394_DEV, 16); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 1e3aea9eecf..09658b21847 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -25,13 +25,13 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector) { dev_info_t *hash; linear_conf_t *conf = mddev_to_conf(mddev); + sector_t idx = sector >> conf->sector_shift; /* * sector_div(a,b) returns the remainer and sets a to a/b */ - sector >>= conf->sector_shift; - (void)sector_div(sector, conf->spacing); - hash = conf->hash_table[sector]; + (void)sector_div(idx, conf->spacing); + hash = conf->hash_table[idx]; while (sector >= hash->num_sectors + hash->start_sector) hash++; diff --git a/drivers/md/md.c b/drivers/md/md.c index 41e2509bf89..4495104f6c9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1481,6 +1481,11 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if (find_rdev_nr(mddev, rdev->desc_nr)) return -EBUSY; } + if (mddev->max_disks && rdev->desc_nr >= mddev->max_disks) { + printk(KERN_WARNING "md: %s: array is limited to %d devices\n", + mdname(mddev), mddev->max_disks); + return -EBUSY; + } bdevname(rdev->bdev,b); while ( (s=strchr(b, '/')) != NULL) *s = '!'; @@ -2441,6 +2446,15 @@ static void analyze_sbs(mddev_t * mddev) i = 0; rdev_for_each(rdev, tmp, mddev) { + if (rdev->desc_nr >= mddev->max_disks || + i > mddev->max_disks) { + printk(KERN_WARNING + "md: %s: %s: only %d devices permitted\n", + mdname(mddev), bdevname(rdev->bdev, b), + mddev->max_disks); + kick_rdev_from_array(rdev); + continue; + } if (rdev != freshest) if (super_types[mddev->major_version]. validate_super(mddev, rdev)) { @@ -4614,13 +4628,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) * noticed in interrupt contexts ... */ - if (rdev->desc_nr == mddev->max_disks) { - printk(KERN_WARNING "%s: can not hot-add to full array!\n", - mdname(mddev)); - err = -EBUSY; - goto abort_unbind_export; - } - rdev->raid_disk = -1; md_update_sb(mddev, 1); @@ -4634,9 +4641,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) md_new_event(mddev); return 0; -abort_unbind_export: - unbind_rdev_from_array(rdev); - abort_export: export_rdev(rdev); return err; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7b4f5f7155d..01e3cffd03b 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1640,7 +1640,8 @@ static void raid1d(mddev_t *mddev) } bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1) { + if ((disk=read_balance(conf, r1_bio)) == -1 || + disk == r1_bio->read_disk) { printk(KERN_ALERT "raid1: %s: unrecoverable I/O" " read error for block %llu\n", bdevname(bio->bi_bdev,b), diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 56073199ceb..c64e6798878 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -217,6 +217,7 @@ config DELL_LAPTOP depends on EXPERIMENTAL depends on BACKLIGHT_CLASS_DEVICE depends on RFKILL + depends on POWER_SUPPLY default n ---help--- This driver adds support for rfkill and backlight control to Dell diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c index bf5e4d06543..558bf3f2c27 100644 --- a/drivers/misc/atmel-ssc.c +++ b/drivers/misc/atmel-ssc.c @@ -35,7 +35,7 @@ struct ssc_device *ssc_request(unsigned int ssc_num) if (!ssc_valid) { spin_unlock(&user_lock); - dev_dbg(&ssc->pdev->dev, "could not find requested device\n"); + pr_err("ssc: ssc%d platform device is missing\n", ssc_num); return ERR_PTR(-ENODEV); } diff --git a/drivers/misc/hpilo.c b/drivers/misc/hpilo.c index 10c421b73ea..f26667a7abf 100644 --- a/drivers/misc/hpilo.c +++ b/drivers/misc/hpilo.c @@ -207,7 +207,7 @@ static void ilo_ccb_close(struct pci_dev *pdev, struct ccb_data *data) &device_ccb->recv_ctrl); /* give iLO some time to process stop request */ - for (retries = 1000; retries > 0; retries--) { + for (retries = MAX_WAIT; retries > 0; retries--) { doorbell_set(driver_ccb); udelay(1); if (!(ioread32(&device_ccb->send_ctrl) & (1 << CTRL_BITPOS_A)) @@ -309,7 +309,7 @@ static int ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) doorbell_clr(driver_ccb); /* make sure iLO is really handling requests */ - for (i = 1000; i > 0; i--) { + for (i = MAX_WAIT; i > 0; i--) { if (ilo_pkt_dequeue(hw, driver_ccb, SENDQ, &pkt_id, NULL, NULL)) break; udelay(1); @@ -326,7 +326,7 @@ static int ilo_ccb_open(struct ilo_hwinfo *hw, struct ccb_data *data, int slot) return 0; free: - pci_free_consistent(pdev, data->dma_size, data->dma_va, data->dma_pa); + ilo_ccb_close(pdev, data); out: return error; } diff --git a/drivers/misc/hpilo.h b/drivers/misc/hpilo.h index a281207696c..b64a20ef07e 100644 --- a/drivers/misc/hpilo.h +++ b/drivers/misc/hpilo.h @@ -19,6 +19,8 @@ #define MAX_ILO_DEV 1 /* max number of files */ #define MAX_OPEN (MAX_CCB * MAX_ILO_DEV) +/* spin counter for open/close delay */ +#define MAX_WAIT 10000 /* * Per device, used to track global memory allocations. diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h index a5bd658c2e8..275b78896a7 100644 --- a/drivers/misc/sgi-xp/xpc.h +++ b/drivers/misc/sgi-xp/xpc.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2004-2009 Silicon Graphics, Inc. All Rights Reserved. */ /* @@ -514,7 +514,8 @@ struct xpc_channel_uv { /* partition's notify mq */ struct xpc_send_msg_slot_uv *send_msg_slots; - struct xpc_notify_mq_msg_uv *recv_msg_slots; + void *recv_msg_slots; /* each slot will hold a xpc_notify_mq_msg_uv */ + /* structure plus the user's payload */ struct xpc_fifo_head_uv msg_slot_free_list; struct xpc_fifo_head_uv recv_msg_list; /* deliverable payloads */ diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index f17f7d40ea2..29c0502a96b 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. */ /* @@ -1010,8 +1010,8 @@ xpc_allocate_recv_msg_slot_uv(struct xpc_channel *ch) continue; for (entry = 0; entry < nentries; entry++) { - msg_slot = ch_uv->recv_msg_slots + entry * - ch->entry_size; + msg_slot = ch_uv->recv_msg_slots + + entry * ch->entry_size; msg_slot->hdr.msg_slot_number = entry; } @@ -1308,9 +1308,8 @@ xpc_handle_notify_mq_msg_uv(struct xpc_partition *part, /* we're dealing with a normal message sent via the notify_mq */ ch_uv = &ch->sn.uv; - msg_slot = (struct xpc_notify_mq_msg_uv *)((u64)ch_uv->recv_msg_slots + - (msg->hdr.msg_slot_number % ch->remote_nentries) * - ch->entry_size); + msg_slot = ch_uv->recv_msg_slots + + (msg->hdr.msg_slot_number % ch->remote_nentries) * ch->entry_size; BUG_ON(msg->hdr.msg_slot_number != msg_slot->hdr.msg_slot_number); BUG_ON(msg_slot->hdr.size != 0); diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 379a1324db4..d31791f6029 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -2276,8 +2276,7 @@ no_mem: } else if ((len = ntohl(r->len_cq)) != 0) { struct sge_fl *fl; - if (eth) - lro = qs->lro_enabled && is_eth_tcp(rss_hi); + lro &= eth && is_eth_tcp(rss_hi); fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; if (fl->use_pages) { diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 3f7eab42aef..acae2d8cd68 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -351,6 +351,9 @@ static int gfar_probe(struct of_device *ofdev, /* Reset MAC layer */ gfar_write(&priv->regs->maccfg1, MACCFG1_SOFT_RESET); + /* We need to delay at least 3 TX clocks */ + udelay(2); + tempval = (MACCFG1_TX_FLOW | MACCFG1_RX_FLOW); gfar_write(&priv->regs->maccfg1, tempval); diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h index b1a83344acc..eaa86897f5c 100644 --- a/drivers/net/gianfar.h +++ b/drivers/net/gianfar.h @@ -312,7 +312,7 @@ extern const char gfar_driver_version[]; #define ATTRELI_EI(x) (x) #define BD_LFLAG(flags) ((flags) << 16) -#define BD_LENGTH_MASK 0x00ff +#define BD_LENGTH_MASK 0x0000ffff /* TxBD status field bits */ #define TXBD_READY 0x8000 diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index b35c8813bef..c01ea48da5f 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -4042,6 +4042,7 @@ static int iwl_pci_suspend(struct pci_dev *pdev, pm_message_t state) priv->is_open = 1; } + pci_save_state(pdev); pci_set_power_state(pdev, PCI_D3hot); return 0; @@ -4052,6 +4053,7 @@ static int iwl_pci_resume(struct pci_dev *pdev) struct iwl_priv *priv = pci_get_drvdata(pdev); pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); if (priv->is_open) iwl_mac_start(priv->hw); diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 412f66bac1a..70a8b21ca39 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -480,6 +480,9 @@ void iwl_clear_stations_table(struct iwl_priv *priv) priv->num_stations = 0; memset(priv->stations, 0, sizeof(priv->stations)); + /* clean ucode key table bit map */ + priv->ucode_key_table = 0; + spin_unlock_irqrestore(&priv->sta_lock, flags); } EXPORT_SYMBOL(iwl_clear_stations_table); diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 95d01984c80..5b44d322b99 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -8143,6 +8143,7 @@ static int iwl3945_pci_suspend(struct pci_dev *pdev, pm_message_t state) priv->is_open = 1; } + pci_save_state(pdev); pci_set_power_state(pdev, PCI_D3hot); return 0; @@ -8153,6 +8154,7 @@ static int iwl3945_pci_resume(struct pci_dev *pdev) struct iwl3945_priv *priv = pci_get_drvdata(pdev); pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); if (priv->is_open) iwl3945_mac_start(priv->hw); diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index e76d715e434..f0e99d4c066 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -161,7 +161,7 @@ struct op_sample { entry->event = ring_buffer_lock_reserve (op_ring_buffer_write, sizeof(struct op_sample) + - size * sizeof(entry->sample->data[0]), &entry->irq_flags); + size * sizeof(entry->sample->data[0])); if (entry->event) entry->sample = ring_buffer_event_data(entry->event); else @@ -178,8 +178,7 @@ struct op_sample int op_cpu_buffer_write_commit(struct op_entry *entry) { - return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, - entry->irq_flags); + return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event); } struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index ab1d615425a..93eac142358 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -355,6 +355,8 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) int i = 0; if (drv && drv->suspend) { + pci_power_t prev = pci_dev->current_state; + pci_dev->state_saved = false; i = drv->suspend(pci_dev, state); @@ -365,12 +367,16 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) if (pci_dev->state_saved) goto Fixup; - if (WARN_ON_ONCE(pci_dev->current_state != PCI_D0)) + if (pci_dev->current_state != PCI_D0 + && pci_dev->current_state != PCI_UNKNOWN) { + WARN_ONCE(pci_dev->current_state != prev, + "PCI PM: Device state not saved by %pF\n", + drv->suspend); goto Fixup; + } } pci_save_state(pci_dev); - pci_dev->state_saved = true; /* * This is for compatibility with existing code with legacy PM support. */ @@ -424,35 +430,20 @@ static void pci_pm_default_resume_noirq(struct pci_dev *pci_dev) pci_fixup_device(pci_fixup_resume_early, pci_dev); } -static int pci_pm_default_resume(struct pci_dev *pci_dev) +static void pci_pm_default_resume(struct pci_dev *pci_dev) { pci_fixup_device(pci_fixup_resume, pci_dev); if (!pci_is_bridge(pci_dev)) pci_enable_wake(pci_dev, PCI_D0, false); - - return pci_pm_reenable_device(pci_dev); -} - -static void pci_pm_default_suspend_generic(struct pci_dev *pci_dev) -{ - /* If device is enabled at this point, disable it */ - pci_disable_enabled_device(pci_dev); - /* - * Save state with interrupts enabled, because in principle the bus the - * device is on may be put into a low power state after this code runs. - */ - pci_save_state(pci_dev); } static void pci_pm_default_suspend(struct pci_dev *pci_dev) { - pci_pm_default_suspend_generic(pci_dev); - + /* Disable non-bridge devices without PM support */ if (!pci_is_bridge(pci_dev)) - pci_prepare_to_sleep(pci_dev); - - pci_fixup_device(pci_fixup_suspend, pci_dev); + pci_disable_enabled_device(pci_dev); + pci_save_state(pci_dev); } static bool pci_has_legacy_pm_support(struct pci_dev *pci_dev) @@ -497,21 +488,49 @@ static void pci_pm_complete(struct device *dev) static int pci_pm_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; - int error = 0; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_SUSPEND); - if (drv && drv->pm && drv->pm->suspend) { - error = drv->pm->suspend(dev); - suspend_report_result(drv->pm->suspend, error); + if (!pm) { + pci_pm_default_suspend(pci_dev); + goto Fixup; } - if (!error) - pci_pm_default_suspend(pci_dev); + pci_dev->state_saved = false; - return error; + if (pm->suspend) { + pci_power_t prev = pci_dev->current_state; + int error; + + error = pm->suspend(dev); + suspend_report_result(pm->suspend, error); + if (error) + return error; + + if (pci_dev->state_saved) + goto Fixup; + + if (pci_dev->current_state != PCI_D0 + && pci_dev->current_state != PCI_UNKNOWN) { + WARN_ONCE(pci_dev->current_state != prev, + "PCI PM: State of device not saved by %pF\n", + pm->suspend); + goto Fixup; + } + } + + if (!pci_dev->state_saved) { + pci_save_state(pci_dev); + if (!pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); + } + + Fixup: + pci_fixup_device(pci_fixup_suspend, pci_dev); + + return 0; } static int pci_pm_suspend_noirq(struct device *dev) @@ -554,7 +573,7 @@ static int pci_pm_resume_noirq(struct device *dev) static int pci_pm_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; /* @@ -567,12 +586,16 @@ static int pci_pm_resume(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - error = pci_pm_default_resume(pci_dev); + pci_pm_default_resume(pci_dev); - if (!error && drv && drv->pm && drv->pm->resume) - error = drv->pm->resume(dev); + if (pm) { + if (pm->resume) + error = pm->resume(dev); + } else { + pci_pm_reenable_device(pci_dev); + } - return error; + return 0; } #else /* !CONFIG_SUSPEND */ @@ -589,21 +612,31 @@ static int pci_pm_resume(struct device *dev) static int pci_pm_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; - int error = 0; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_FREEZE); - if (drv && drv->pm && drv->pm->freeze) { - error = drv->pm->freeze(dev); - suspend_report_result(drv->pm->freeze, error); + if (!pm) { + pci_pm_default_suspend(pci_dev); + return 0; } - if (!error) - pci_pm_default_suspend_generic(pci_dev); + pci_dev->state_saved = false; - return error; + if (pm->freeze) { + int error; + + error = pm->freeze(dev); + suspend_report_result(pm->freeze, error); + if (error) + return error; + } + + if (!pci_dev->state_saved) + pci_save_state(pci_dev); + + return 0; } static int pci_pm_freeze_noirq(struct device *dev) @@ -646,16 +679,18 @@ static int pci_pm_thaw_noirq(struct device *dev) static int pci_pm_thaw(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - pci_pm_reenable_device(pci_dev); - - if (drv && drv->pm && drv->pm->thaw) - error = drv->pm->thaw(dev); + if (pm) { + if (pm->thaw) + error = pm->thaw(dev); + } else { + pci_pm_reenable_device(pci_dev); + } return error; } @@ -663,22 +698,29 @@ static int pci_pm_thaw(struct device *dev) static int pci_pm_poweroff(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_suspend(dev, PMSG_HIBERNATE); - if (!drv || !drv->pm) - return 0; + if (!pm) { + pci_pm_default_suspend(pci_dev); + goto Fixup; + } + + pci_dev->state_saved = false; - if (drv->pm->poweroff) { - error = drv->pm->poweroff(dev); - suspend_report_result(drv->pm->poweroff, error); + if (pm->poweroff) { + error = pm->poweroff(dev); + suspend_report_result(pm->poweroff, error); } - if (!error) - pci_pm_default_suspend(pci_dev); + if (!pci_dev->state_saved && !pci_is_bridge(pci_dev)) + pci_prepare_to_sleep(pci_dev); + + Fixup: + pci_fixup_device(pci_fixup_suspend, pci_dev); return error; } @@ -719,7 +761,7 @@ static int pci_pm_restore_noirq(struct device *dev) static int pci_pm_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - struct device_driver *drv = dev->driver; + struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int error = 0; /* @@ -732,10 +774,14 @@ static int pci_pm_restore(struct device *dev) if (pci_has_legacy_pm_support(pci_dev)) return pci_legacy_resume(dev); - error = pci_pm_default_resume(pci_dev); + pci_pm_default_resume(pci_dev); - if (!error && drv && drv->pm && drv->pm->restore) - error = drv->pm->restore(dev); + if (pm) { + if (pm->restore) + error = pm->restore(dev); + } else { + pci_pm_reenable_device(pci_dev); + } return error; } diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index db7ec14fa71..dfc4e0ddf24 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -768,8 +768,8 @@ pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr, return -EINVAL; rom = pci_map_rom(pdev, &size); /* size starts out as PCI window size */ - if (!rom) - return 0; + if (!rom || !size) + return -EIO; if (off >= size) count = 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 48807556b47..e3efe6b19ee 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1418,10 +1418,10 @@ int pci_restore_standard_config(struct pci_dev *dev) break; } - dev->current_state = PCI_D0; + pci_update_current_state(dev, PCI_D0); Restore: - return pci_restore_state(dev); + return dev->state_saved ? pci_restore_state(dev) : 0; } /** diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 586b6f75910..b0367f168af 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -718,9 +718,9 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev) /* * All PCIe functions are in one slot, remove one function will remove - * the the whole slot, so just wait + * the whole slot, so just wait until we are the last function left. */ - if (!list_empty(&parent->subordinate->devices)) + if (!list_is_last(&pdev->bus_list, &parent->subordinate->devices)) goto out; /* All functions are removed, so just disable ASPM for the link */ diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 99a914a027f..f9b874eaeb9 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -55,25 +55,13 @@ static int pcie_portdrv_suspend(struct pci_dev *dev, pm_message_t state) } -static int pcie_portdrv_suspend_late(struct pci_dev *dev, pm_message_t state) -{ - return pci_save_state(dev); -} - -static int pcie_portdrv_resume_early(struct pci_dev *dev) -{ - return pci_restore_state(dev); -} - static int pcie_portdrv_resume(struct pci_dev *dev) { - pcie_portdrv_restore_config(dev); + pci_set_master(dev); return pcie_port_device_resume(dev); } #else #define pcie_portdrv_suspend NULL -#define pcie_portdrv_suspend_late NULL -#define pcie_portdrv_resume_early NULL #define pcie_portdrv_resume NULL #endif @@ -292,8 +280,6 @@ static struct pci_driver pcie_portdriver = { .remove = pcie_portdrv_remove, .suspend = pcie_portdrv_suspend, - .suspend_late = pcie_portdrv_suspend_late, - .resume_early = pcie_portdrv_resume_early, .resume = pcie_portdrv_resume, .err_handler = &pcie_portdrv_err_handler, diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c index 132a78159b6..29cbe47f219 100644 --- a/drivers/pci/rom.c +++ b/drivers/pci/rom.c @@ -63,7 +63,7 @@ void pci_disable_rom(struct pci_dev *pdev) * The PCI window size could be much larger than the * actual image size. */ -size_t pci_get_rom_size(void __iomem *rom, size_t size) +size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size) { void __iomem *image; int last_image; @@ -72,8 +72,10 @@ size_t pci_get_rom_size(void __iomem *rom, size_t size) do { void __iomem *pds; /* Standard PCI ROMs start out with these bytes 55 AA */ - if (readb(image) != 0x55) + if (readb(image) != 0x55) { + dev_err(&pdev->dev, "Invalid ROM contents\n"); break; + } if (readb(image + 1) != 0xAA) break; /* get the PCI data structure and check its signature */ @@ -159,7 +161,7 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size) * size is much larger than the actual size of the ROM. * True size is important if the ROM is going to be copied. */ - *size = pci_get_rom_size(rom, *size); + *size = pci_get_rom_size(pdev, rom, *size); return rom; } diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 1a266d4ab5f..94363115a42 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -42,6 +42,7 @@ config ASUS_LAPTOP depends on LEDS_CLASS depends on NEW_LEDS depends on BACKLIGHT_CLASS_DEVICE + depends on INPUT ---help--- This is the new Linux driver for Asus laptops. It may also support some MEDION, JVC or VICTOR laptops. It makes all the extra buttons generate diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index 8fb8b359104..56af6cf385b 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -46,6 +46,7 @@ #include <acpi/acpi_drivers.h> #include <acpi/acpi_bus.h> #include <asm/uaccess.h> +#include <linux/input.h> #define ASUS_LAPTOP_VERSION "0.42" @@ -181,6 +182,8 @@ struct asus_hotk { u8 light_level; //light sensor level u8 light_switch; //light sensor switch value u16 event_count[128]; //count for each event TODO make this better + struct input_dev *inputdev; + u16 *keycode_map; }; /* @@ -250,6 +253,37 @@ ASUS_LED(rled, "record"); ASUS_LED(pled, "phone"); ASUS_LED(gled, "gaming"); +struct key_entry { + char type; + u8 code; + u16 keycode; +}; + +enum { KE_KEY, KE_END }; + +static struct key_entry asus_keymap[] = { + {KE_KEY, 0x30, KEY_VOLUMEUP}, + {KE_KEY, 0x31, KEY_VOLUMEDOWN}, + {KE_KEY, 0x32, KEY_MUTE}, + {KE_KEY, 0x33, KEY_SWITCHVIDEOMODE}, + {KE_KEY, 0x34, KEY_SWITCHVIDEOMODE}, + {KE_KEY, 0x40, KEY_PREVIOUSSONG}, + {KE_KEY, 0x41, KEY_NEXTSONG}, + {KE_KEY, 0x43, KEY_STOP}, + {KE_KEY, 0x45, KEY_PLAYPAUSE}, + {KE_KEY, 0x50, KEY_EMAIL}, + {KE_KEY, 0x51, KEY_WWW}, + {KE_KEY, 0x5C, BTN_EXTRA}, /* Performance */ + {KE_KEY, 0x5D, KEY_WLAN}, + {KE_KEY, 0x61, KEY_SWITCHVIDEOMODE}, + {KE_KEY, 0x6B, BTN_TOUCH}, /* Lock Mouse */ + {KE_KEY, 0x82, KEY_CAMERA}, + {KE_KEY, 0x8A, KEY_TV}, + {KE_KEY, 0x95, KEY_MEDIA}, + {KE_KEY, 0x99, KEY_PHONE}, + {KE_END, 0}, +}; + /* * This function evaluates an ACPI method, given an int as parameter, the * method is searched within the scope of the handle, can be NULL. The output @@ -720,8 +754,68 @@ static ssize_t store_gps(struct device *dev, struct device_attribute *attr, return store_status(buf, count, NULL, GPS_ON); } +/* + * Hotkey functions + */ +static struct key_entry *asus_get_entry_by_scancode(int code) +{ + struct key_entry *key; + + for (key = asus_keymap; key->type != KE_END; key++) + if (code == key->code) + return key; + + return NULL; +} + +static struct key_entry *asus_get_entry_by_keycode(int code) +{ + struct key_entry *key; + + for (key = asus_keymap; key->type != KE_END; key++) + if (code == key->keycode && key->type == KE_KEY) + return key; + + return NULL; +} + +static int asus_getkeycode(struct input_dev *dev, int scancode, int *keycode) +{ + struct key_entry *key = asus_get_entry_by_scancode(scancode); + + if (key && key->type == KE_KEY) { + *keycode = key->keycode; + return 0; + } + + return -EINVAL; +} + +static int asus_setkeycode(struct input_dev *dev, int scancode, int keycode) +{ + struct key_entry *key; + int old_keycode; + + if (keycode < 0 || keycode > KEY_MAX) + return -EINVAL; + + key = asus_get_entry_by_scancode(scancode); + if (key && key->type == KE_KEY) { + old_keycode = key->keycode; + key->keycode = keycode; + set_bit(keycode, dev->keybit); + if (!asus_get_entry_by_keycode(old_keycode)) + clear_bit(old_keycode, dev->keybit); + return 0; + } + + return -EINVAL; +} + static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) { + static struct key_entry *key; + /* TODO Find a better way to handle events count. */ if (!hotk) return; @@ -738,10 +832,24 @@ static void asus_hotk_notify(acpi_handle handle, u32 event, void *data) lcd_blank(FB_BLANK_POWERDOWN); } - acpi_bus_generate_proc_event(hotk->device, event, - hotk->event_count[event % 128]++); - - return; + acpi_bus_generate_netlink_event(hotk->device->pnp.device_class, + dev_name(&hotk->device->dev), event, + hotk->event_count[event % 128]++); + + if (hotk->inputdev) { + key = asus_get_entry_by_scancode(event); + if (!key) + return ; + + switch (key->type) { + case KE_KEY: + input_report_key(hotk->inputdev, key->keycode, 1); + input_sync(hotk->inputdev); + input_report_key(hotk->inputdev, key->keycode, 0); + input_sync(hotk->inputdev); + break; + } + } } #define ASUS_CREATE_DEVICE_ATTR(_name) \ @@ -959,6 +1067,38 @@ static int asus_hotk_get_info(void) return AE_OK; } +static int asus_input_init(void) +{ + const struct key_entry *key; + int result; + + hotk->inputdev = input_allocate_device(); + if (!hotk->inputdev) { + printk(ASUS_INFO "Unable to allocate input device\n"); + return 0; + } + hotk->inputdev->name = "Asus Laptop extra buttons"; + hotk->inputdev->phys = ASUS_HOTK_FILE "/input0"; + hotk->inputdev->id.bustype = BUS_HOST; + hotk->inputdev->getkeycode = asus_getkeycode; + hotk->inputdev->setkeycode = asus_setkeycode; + + for (key = asus_keymap; key->type != KE_END; key++) { + switch (key->type) { + case KE_KEY: + set_bit(EV_KEY, hotk->inputdev->evbit); + set_bit(key->keycode, hotk->inputdev->keybit); + break; + } + } + result = input_register_device(hotk->inputdev); + if (result) { + printk(ASUS_INFO "Unable to register input device\n"); + input_free_device(hotk->inputdev); + } + return result; +} + static int asus_hotk_check(void) { int result = 0; @@ -1044,7 +1184,7 @@ static int asus_hotk_add(struct acpi_device *device) /* GPS is on by default */ write_status(NULL, 1, GPS_ON); - end: +end: if (result) { kfree(hotk->name); kfree(hotk); @@ -1091,10 +1231,17 @@ static void asus_led_exit(void) ASUS_LED_UNREGISTER(gled); } +static void asus_input_exit(void) +{ + if (hotk->inputdev) + input_unregister_device(hotk->inputdev); +} + static void __exit asus_laptop_exit(void) { asus_backlight_exit(); asus_led_exit(); + asus_input_exit(); acpi_bus_unregister_driver(&asus_hotk_driver); sysfs_remove_group(&asuspf_device->dev.kobj, &asuspf_attribute_group); @@ -1216,6 +1363,10 @@ static int __init asus_laptop_init(void) printk(ASUS_INFO "Brightness ignored, must be controlled by " "ACPI video driver\n"); + result = asus_input_init(); + if (result) + goto fail_input; + result = asus_led_init(dev); if (result) goto fail_led; @@ -1242,22 +1393,25 @@ static int __init asus_laptop_init(void) return 0; - fail_sysfs: +fail_sysfs: platform_device_del(asuspf_device); - fail_platform_device2: +fail_platform_device2: platform_device_put(asuspf_device); - fail_platform_device1: +fail_platform_device1: platform_driver_unregister(&asuspf_driver); - fail_platform_driver: +fail_platform_driver: asus_led_exit(); - fail_led: +fail_led: + asus_input_exit(); + +fail_input: asus_backlight_exit(); - fail_backlight: +fail_backlight: return result; } diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c index 1e74988c7b2..d63f26e666a 100644 --- a/drivers/platform/x86/asus_acpi.c +++ b/drivers/platform/x86/asus_acpi.c @@ -143,6 +143,7 @@ struct asus_hotk { S1300N, S5200N*/ A4S, /* Z81sp */ F3Sa, /* (Centrino) */ + R1F, END_MODEL } model; /* Models currently supported */ u16 event_count[128]; /* Count for each event TODO make this better */ @@ -420,7 +421,18 @@ static struct model_data model_conf[END_MODEL] = { .display_get = "\\ADVG", .display_set = "SDSP", }, - + { + .name = "R1F", + .mt_bt_switch = "BLED", + .mt_mled = "MLED", + .mt_wled = "WLED", + .mt_lcd_switch = "\\Q10", + .lcd_status = "\\GP06", + .brightness_set = "SPLV", + .brightness_get = "GPLV", + .display_set = "SDSP", + .display_get = "\\INFB" + } }; /* procdir we use */ @@ -1165,6 +1177,8 @@ static int asus_model_match(char *model) return W3V; else if (strncmp(model, "W5A", 3) == 0) return W5A; + else if (strncmp(model, "R1F", 3) == 0) + return R1F; else if (strncmp(model, "A4S", 3) == 0) return A4S; else if (strncmp(model, "F3Sa", 4) == 0) diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 9d93cb971e5..786ed8661cb 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -30,6 +30,7 @@ #include <linux/uaccess.h> #include <linux/input.h> #include <linux/rfkill.h> +#include <linux/pci.h> #define EEEPC_LAPTOP_VERSION "0.1" @@ -161,6 +162,10 @@ static struct key_entry eeepc_keymap[] = { {KE_KEY, 0x13, KEY_MUTE }, {KE_KEY, 0x14, KEY_VOLUMEDOWN }, {KE_KEY, 0x15, KEY_VOLUMEUP }, + {KE_KEY, 0x1a, KEY_COFFEE }, + {KE_KEY, 0x1b, KEY_ZOOM }, + {KE_KEY, 0x1c, KEY_PROG2 }, + {KE_KEY, 0x1d, KEY_PROG3 }, {KE_KEY, 0x30, KEY_SWITCHVIDEOMODE }, {KE_KEY, 0x31, KEY_SWITCHVIDEOMODE }, {KE_KEY, 0x32, KEY_SWITCHVIDEOMODE }, @@ -510,7 +515,43 @@ static int eeepc_hotk_check(void) static void notify_brn(void) { struct backlight_device *bd = eeepc_backlight_device; - bd->props.brightness = read_brightness(bd); + if (bd) + bd->props.brightness = read_brightness(bd); +} + +static void eeepc_rfkill_notify(acpi_handle handle, u32 event, void *data) +{ + struct pci_dev *dev; + struct pci_bus *bus = pci_find_bus(0, 1); + + if (event != ACPI_NOTIFY_BUS_CHECK) + return; + + if (!bus) { + printk(EEEPC_WARNING "Unable to find PCI bus 1?\n"); + return; + } + + if (get_acpi(CM_ASL_WLAN) == 1) { + dev = pci_get_slot(bus, 0); + if (dev) { + /* Device already present */ + pci_dev_put(dev); + return; + } + dev = pci_scan_single_device(bus, 0); + if (dev) { + pci_bus_assign_resources(bus); + if (pci_bus_add_device(dev)) + printk(EEEPC_ERR "Unable to hotplug wifi\n"); + } + } else { + dev = pci_get_slot(bus, 0); + if (dev) { + pci_remove_bus_device(dev); + pci_dev_put(dev); + } + } } static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) @@ -520,8 +561,9 @@ static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) return; if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX) notify_brn(); - acpi_bus_generate_proc_event(ehotk->device, event, - ehotk->event_count[event % 128]++); + acpi_bus_generate_netlink_event(ehotk->device->pnp.device_class, + dev_name(&ehotk->device->dev), event, + ehotk->event_count[event % 128]++); if (ehotk->inputdev) { key = eepc_get_entry_by_scancode(event); if (key) { @@ -539,6 +581,45 @@ static void eeepc_hotk_notify(acpi_handle handle, u32 event, void *data) } } +static int eeepc_register_rfkill_notifier(char *node) +{ + acpi_status status = AE_OK; + acpi_handle handle; + + status = acpi_get_handle(NULL, node, &handle); + + if (ACPI_SUCCESS(status)) { + status = acpi_install_notify_handler(handle, + ACPI_SYSTEM_NOTIFY, + eeepc_rfkill_notify, + NULL); + if (ACPI_FAILURE(status)) + printk(EEEPC_WARNING + "Failed to register notify on %s\n", node); + } else + return -ENODEV; + + return 0; +} + +static void eeepc_unregister_rfkill_notifier(char *node) +{ + acpi_status status = AE_OK; + acpi_handle handle; + + status = acpi_get_handle(NULL, node, &handle); + + if (ACPI_SUCCESS(status)) { + status = acpi_remove_notify_handler(handle, + ACPI_SYSTEM_NOTIFY, + eeepc_rfkill_notify); + if (ACPI_FAILURE(status)) + printk(EEEPC_ERR + "Error removing rfkill notify handler %s\n", + node); + } +} + static int eeepc_hotk_add(struct acpi_device *device) { acpi_status status = AE_OK; @@ -558,7 +639,7 @@ static int eeepc_hotk_add(struct acpi_device *device) ehotk->device = device; result = eeepc_hotk_check(); if (result) - goto end; + goto ehotk_fail; status = acpi_install_notify_handler(ehotk->handle, ACPI_SYSTEM_NOTIFY, eeepc_hotk_notify, ehotk); if (ACPI_FAILURE(status)) @@ -569,18 +650,25 @@ static int eeepc_hotk_add(struct acpi_device *device) RFKILL_TYPE_WLAN); if (!ehotk->eeepc_wlan_rfkill) - goto end; + goto wlan_fail; ehotk->eeepc_wlan_rfkill->name = "eeepc-wlan"; ehotk->eeepc_wlan_rfkill->toggle_radio = eeepc_wlan_rfkill_set; ehotk->eeepc_wlan_rfkill->get_state = eeepc_wlan_rfkill_state; - if (get_acpi(CM_ASL_WLAN) == 1) + if (get_acpi(CM_ASL_WLAN) == 1) { ehotk->eeepc_wlan_rfkill->state = RFKILL_STATE_UNBLOCKED; - else + rfkill_set_default(RFKILL_TYPE_WLAN, + RFKILL_STATE_UNBLOCKED); + } else { ehotk->eeepc_wlan_rfkill->state = RFKILL_STATE_SOFT_BLOCKED; - rfkill_register(ehotk->eeepc_wlan_rfkill); + rfkill_set_default(RFKILL_TYPE_WLAN, + RFKILL_STATE_SOFT_BLOCKED); + } + result = rfkill_register(ehotk->eeepc_wlan_rfkill); + if (result) + goto wlan_fail; } if (get_acpi(CM_ASL_BLUETOOTH) != -1) { @@ -588,27 +676,47 @@ static int eeepc_hotk_add(struct acpi_device *device) rfkill_allocate(&device->dev, RFKILL_TYPE_BLUETOOTH); if (!ehotk->eeepc_bluetooth_rfkill) - goto end; + goto bluetooth_fail; ehotk->eeepc_bluetooth_rfkill->name = "eeepc-bluetooth"; ehotk->eeepc_bluetooth_rfkill->toggle_radio = eeepc_bluetooth_rfkill_set; ehotk->eeepc_bluetooth_rfkill->get_state = eeepc_bluetooth_rfkill_state; - if (get_acpi(CM_ASL_BLUETOOTH) == 1) + if (get_acpi(CM_ASL_BLUETOOTH) == 1) { ehotk->eeepc_bluetooth_rfkill->state = RFKILL_STATE_UNBLOCKED; - else + rfkill_set_default(RFKILL_TYPE_BLUETOOTH, + RFKILL_STATE_UNBLOCKED); + } else { ehotk->eeepc_bluetooth_rfkill->state = RFKILL_STATE_SOFT_BLOCKED; - rfkill_register(ehotk->eeepc_bluetooth_rfkill); - } + rfkill_set_default(RFKILL_TYPE_BLUETOOTH, + RFKILL_STATE_SOFT_BLOCKED); + } - end: - if (result) { - kfree(ehotk); - ehotk = NULL; + result = rfkill_register(ehotk->eeepc_bluetooth_rfkill); + if (result) + goto bluetooth_fail; } + + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P6"); + eeepc_register_rfkill_notifier("\\_SB.PCI0.P0P7"); + + return 0; + + bluetooth_fail: + if (ehotk->eeepc_bluetooth_rfkill) + rfkill_free(ehotk->eeepc_bluetooth_rfkill); + rfkill_unregister(ehotk->eeepc_wlan_rfkill); + ehotk->eeepc_wlan_rfkill = NULL; + wlan_fail: + if (ehotk->eeepc_wlan_rfkill) + rfkill_free(ehotk->eeepc_wlan_rfkill); + ehotk_fail: + kfree(ehotk); + ehotk = NULL; + return result; } @@ -622,6 +730,10 @@ static int eeepc_hotk_remove(struct acpi_device *device, int type) eeepc_hotk_notify); if (ACPI_FAILURE(status)) printk(EEEPC_ERR "Error removing notify handler\n"); + + eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P6"); + eeepc_unregister_rfkill_notifier("\\_SB.PCI0.P0P7"); + kfree(ehotk); return 0; } @@ -737,13 +849,21 @@ static void eeepc_backlight_exit(void) { if (eeepc_backlight_device) backlight_device_unregister(eeepc_backlight_device); - if (ehotk->inputdev) - input_unregister_device(ehotk->inputdev); + eeepc_backlight_device = NULL; +} + +static void eeepc_rfkill_exit(void) +{ if (ehotk->eeepc_wlan_rfkill) rfkill_unregister(ehotk->eeepc_wlan_rfkill); if (ehotk->eeepc_bluetooth_rfkill) rfkill_unregister(ehotk->eeepc_bluetooth_rfkill); - eeepc_backlight_device = NULL; +} + +static void eeepc_input_exit(void) +{ + if (ehotk->inputdev) + input_unregister_device(ehotk->inputdev); } static void eeepc_hwmon_exit(void) @@ -762,6 +882,8 @@ static void eeepc_hwmon_exit(void) static void __exit eeepc_laptop_exit(void) { eeepc_backlight_exit(); + eeepc_rfkill_exit(); + eeepc_input_exit(); eeepc_hwmon_exit(); acpi_bus_unregister_driver(&eeepc_hotk_driver); sysfs_remove_group(&platform_device->dev.kobj, @@ -865,6 +987,8 @@ fail_platform_driver: fail_hwmon: eeepc_backlight_exit(); fail_backlight: + eeepc_input_exit(); + eeepc_rfkill_exit(); return result; } diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index de91ddab0a8..f41135f2fb2 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -463,9 +463,11 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) return 0; register_wwan_err: - rfkill_unregister(bluetooth_rfkill); + if (bluetooth_rfkill) + rfkill_unregister(bluetooth_rfkill); register_bluetooth_error: - rfkill_unregister(wifi_rfkill); + if (wifi_rfkill) + rfkill_unregister(wifi_rfkill); add_sysfs_error: cleanup_sysfs(device); return err; diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index f30db367c82..c47a44dcb70 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -507,7 +507,7 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) hkey_num = result & 0xf; - if (hkey_num < 0 || hkey_num > ARRAY_SIZE(pcc->keymap)) { + if (hkey_num < 0 || hkey_num >= ARRAY_SIZE(pcc->keymap)) { ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "hotkey number out of range: %d\n", hkey_num)); diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index cced4d10831..81450fbd8b1 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -241,6 +241,12 @@ config RTC_DRV_M41T80_WDT If you say Y here you will get support for the watchdog timer in the ST M41T60 and M41T80 RTC chips series. +config RTC_DRV_DM355EVM + tristate "TI DaVinci DM355 EVM RTC" + depends on MFD_DM355EVM_MSP + help + Supports the RTC firmware in the MSP430 on the DM355 EVM. + config RTC_DRV_TWL92330 boolean "TI TWL92330/Menelaus" depends on MENELAUS diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 6e28021abb9..0e697aa51ca 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_RTC_DRV_AT91SAM9) += rtc-at91sam9.o obj-$(CONFIG_RTC_DRV_AU1XXX) += rtc-au1xxx.o obj-$(CONFIG_RTC_DRV_BFIN) += rtc-bfin.o obj-$(CONFIG_RTC_DRV_CMOS) += rtc-cmos.o +obj-$(CONFIG_RTC_DRV_DM355EVM) += rtc-dm355evm.o obj-$(CONFIG_RTC_DRV_DS1216) += rtc-ds1216.o obj-$(CONFIG_RTC_DRV_DS1286) += rtc-ds1286.o obj-$(CONFIG_RTC_DRV_DS1302) += rtc-ds1302.o diff --git a/drivers/rtc/rtc-dm355evm.c b/drivers/rtc/rtc-dm355evm.c new file mode 100644 index 00000000000..58d4e18530d --- /dev/null +++ b/drivers/rtc/rtc-dm355evm.c @@ -0,0 +1,175 @@ +/* + * rtc-dm355evm.c - access battery-backed counter in MSP430 firmware + * + * Copyright (c) 2008 by David Brownell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/rtc.h> +#include <linux/platform_device.h> + +#include <linux/i2c/dm355evm_msp.h> + + +/* + * The MSP430 firmware on the DM355 EVM uses a watch crystal to feed + * a 1 Hz counter. When a backup battery is supplied, that makes a + * reasonable RTC for applications where alarms and non-NTP drift + * compensation aren't important. + * + * The only real glitch is the inability to read or write all four + * counter bytes atomically: the count may increment in the middle + * of an operation, causing trouble when the LSB rolls over. + * + * This driver was tested with firmware revision A4. + */ +union evm_time { + u8 bytes[4]; + u32 value; +}; + +static int dm355evm_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + union evm_time time; + int status; + int tries = 0; + + do { + /* + * Read LSB(0) to MSB(3) bytes. Defend against the counter + * rolling over by re-reading until the value is stable, + * and assuming the four reads take at most a few seconds. + */ + status = dm355evm_msp_read(DM355EVM_MSP_RTC_0); + if (status < 0) + return status; + if (tries && time.bytes[0] == status) + break; + time.bytes[0] = status; + + status = dm355evm_msp_read(DM355EVM_MSP_RTC_1); + if (status < 0) + return status; + if (tries && time.bytes[1] == status) + break; + time.bytes[1] = status; + + status = dm355evm_msp_read(DM355EVM_MSP_RTC_2); + if (status < 0) + return status; + if (tries && time.bytes[2] == status) + break; + time.bytes[2] = status; + + status = dm355evm_msp_read(DM355EVM_MSP_RTC_3); + if (status < 0) + return status; + if (tries && time.bytes[3] == status) + break; + time.bytes[3] = status; + + } while (++tries < 5); + + dev_dbg(dev, "read timestamp %08x\n", time.value); + + rtc_time_to_tm(le32_to_cpu(time.value), tm); + return 0; +} + +static int dm355evm_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + union evm_time time; + unsigned long value; + int status; + + rtc_tm_to_time(tm, &value); + time.value = cpu_to_le32(value); + + dev_dbg(dev, "write timestamp %08x\n", time.value); + + /* + * REVISIT handle non-atomic writes ... maybe just retry until + * byte[1] sticks (no rollover)? + */ + status = dm355evm_msp_write(time.bytes[0], DM355EVM_MSP_RTC_0); + if (status < 0) + return status; + + status = dm355evm_msp_write(time.bytes[1], DM355EVM_MSP_RTC_1); + if (status < 0) + return status; + + status = dm355evm_msp_write(time.bytes[2], DM355EVM_MSP_RTC_2); + if (status < 0) + return status; + + status = dm355evm_msp_write(time.bytes[3], DM355EVM_MSP_RTC_3); + if (status < 0) + return status; + + return 0; +} + +static struct rtc_class_ops dm355evm_rtc_ops = { + .read_time = dm355evm_rtc_read_time, + .set_time = dm355evm_rtc_set_time, +}; + +/*----------------------------------------------------------------------*/ + +static int __devinit dm355evm_rtc_probe(struct platform_device *pdev) +{ + struct rtc_device *rtc; + + rtc = rtc_device_register(pdev->name, + &pdev->dev, &dm355evm_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc)) { + dev_err(&pdev->dev, "can't register RTC device, err %ld\n", + PTR_ERR(rtc)); + return PTR_ERR(rtc); + } + platform_set_drvdata(pdev, rtc); + + return 0; +} + +static int __devexit dm355evm_rtc_remove(struct platform_device *pdev) +{ + struct rtc_device *rtc = platform_get_drvdata(pdev); + + rtc_device_unregister(rtc); + platform_set_drvdata(pdev, NULL); + return 0; +} + +/* + * I2C is used to talk to the MSP430, but this platform device is + * exposed by an MFD driver that manages I2C communications. + */ +static struct platform_driver rtc_dm355evm_driver = { + .probe = dm355evm_rtc_probe, + .remove = __devexit_p(dm355evm_rtc_remove), + .driver = { + .owner = THIS_MODULE, + .name = "rtc-dm355evm", + }, +}; + +static int __init dm355evm_rtc_init(void) +{ + return platform_driver_register(&rtc_dm355evm_driver); +} +module_init(dm355evm_rtc_init); + +static void __exit dm355evm_rtc_exit(void) +{ + platform_driver_unregister(&rtc_dm355evm_driver); +} +module_exit(dm355evm_rtc_exit); + +MODULE_LICENSE("GPL"); diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c index e54b5c619bd..e01b955db07 100644 --- a/drivers/rtc/rtc-ds1390.c +++ b/drivers/rtc/rtc-ds1390.c @@ -122,7 +122,6 @@ static const struct rtc_class_ops ds1390_rtc_ops = { static int __devinit ds1390_probe(struct spi_device *spi) { - struct rtc_device *rtc; unsigned char tmp; struct ds1390 *chip; int res; diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index db16112cf19..e6e299feb51 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -1475,7 +1475,7 @@ static int aty128fb_set_par(struct fb_info *info) aty128_set_pll(&par->pll, par); aty128_set_fifo(&par->fifo_reg, par); - config = aty_ld_le32(CONFIG_CNTL) & ~3; + config = aty_ld_le32(CNFG_CNTL) & ~3; #if defined(__BIG_ENDIAN) if (par->crtc.bpp == 32) @@ -1484,7 +1484,7 @@ static int aty128fb_set_par(struct fb_info *info) config |= 1; /* make aperture do 16 bit swapping */ #endif - aty_st_le32(CONFIG_CNTL, config); + aty_st_le32(CNFG_CNTL, config); aty_st_8(CRTC_EXT_CNTL + 1, 0); /* turn the video back on */ info->fix.line_length = (par->crtc.vxres * par->crtc.bpp) >> 3; @@ -1875,7 +1875,7 @@ static int __devinit aty128_init(struct pci_dev *pdev, const struct pci_device_i u32 dac; /* Get the chip revision */ - chip_rev = (aty_ld_le32(CONFIG_CNTL) >> 16) & 0x1F; + chip_rev = (aty_ld_le32(CNFG_CNTL) >> 16) & 0x1F; strcpy(video_card, "Rage128 XX "); video_card[8] = ent->device >> 8; @@ -2057,7 +2057,7 @@ static int __devinit aty128_probe(struct pci_dev *pdev, const struct pci_device_ /* Grab memory size from the card */ // How does this relate to the resource length from the PCI hardware? - par->vram_size = aty_ld_le32(CONFIG_MEMSIZE) & 0x03FFFFFF; + par->vram_size = aty_ld_le32(CNFG_MEMSIZE) & 0x03FFFFFF; /* Virtualize the framebuffer */ info->screen_base = ioremap(fb_addr, par->vram_size); @@ -2374,6 +2374,8 @@ static void aty128_set_suspend(struct aty128fb_par *par, int suspend) /* Set the chip into the appropriate suspend mode (we use D2, * D3 would require a complete re-initialisation of the chip, * including PCI config registers, clocks, AGP configuration, ...) + * + * For resume, the core will have already brought us back to D0 */ if (suspend) { /* Make sure CRTC2 is reset. Remove that the day we decide to @@ -2391,17 +2393,9 @@ static void aty128_set_suspend(struct aty128fb_par *par, int suspend) aty_st_le32(BUS_CNTL1, 0x00000010); aty_st_le32(MEM_POWER_MISC, 0x0c830000); mdelay(100); - pci_read_config_word(pdev, par->pm_reg+PCI_PM_CTRL, &pwr_command); + /* Switch PCI power management to D2 */ - pci_write_config_word(pdev, par->pm_reg+PCI_PM_CTRL, - (pwr_command & ~PCI_PM_CTRL_STATE_MASK) | 2); - pci_read_config_word(pdev, par->pm_reg+PCI_PM_CTRL, &pwr_command); - } else { - /* Switch back PCI power management to D0 */ - mdelay(100); - pci_write_config_word(pdev, par->pm_reg+PCI_PM_CTRL, 0); - pci_read_config_word(pdev, par->pm_reg+PCI_PM_CTRL, &pwr_command); - mdelay(100); + pci_set_power_state(pdev, PCI_D2); } } @@ -2410,6 +2404,12 @@ static int aty128_pci_suspend(struct pci_dev *pdev, pm_message_t state) struct fb_info *info = pci_get_drvdata(pdev); struct aty128fb_par *par = info->par; + /* Because we may change PCI D state ourselves, we need to + * first save the config space content so the core can + * restore it properly on resume. + */ + pci_save_state(pdev); + /* We don't do anything but D2, for now we return 0, but * we may want to change that. How do we know if the BIOS * can properly take care of D3 ? Also, with swsusp, we @@ -2476,6 +2476,11 @@ static int aty128_do_resume(struct pci_dev *pdev) if (pdev->dev.power.power_state.event == PM_EVENT_ON) return 0; + /* PCI state will have been restored by the core, so + * we should be in D0 now with our config space fully + * restored + */ + /* Wakeup chip */ aty128_set_suspend(par, 0); par->asleep = 0; diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index cc6b470073d..1207c208a30 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -135,7 +135,7 @@ #if defined(CONFIG_PM) || defined(CONFIG_PMAC_BACKLIGHT) || \ defined (CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_FB_ATY_BACKLIGHT) static const u32 lt_lcd_regs[] = { - CONFIG_PANEL_LG, + CNFG_PANEL_LG, LCD_GEN_CNTL_LG, DSTN_CONTROL_LG, HFB_PITCH_ADDR_LG, @@ -446,7 +446,7 @@ static int __devinit correct_chipset(struct atyfb_par *par) par->pll_limits.ecp_max = aty_chips[i].ecp_max; par->features = aty_chips[i].features; - chip_id = aty_ld_le32(CONFIG_CHIP_ID, par); + chip_id = aty_ld_le32(CNFG_CHIP_ID, par); type = chip_id & CFG_CHIP_TYPE; rev = (chip_id & CFG_CHIP_REV) >> 24; @@ -629,7 +629,7 @@ static void aty_get_crtc(const struct atyfb_par *par, struct crtc *crtc) crtc->lcd_index = aty_ld_le32(LCD_INDEX, par); aty_st_le32(LCD_INDEX, crtc->lcd_index, par); } - crtc->lcd_config_panel = aty_ld_lcd(CONFIG_PANEL, par); + crtc->lcd_config_panel = aty_ld_lcd(CNFG_PANEL, par); crtc->lcd_gen_cntl = aty_ld_lcd(LCD_GEN_CNTL, par); @@ -676,7 +676,7 @@ static void aty_set_crtc(const struct atyfb_par *par, const struct crtc *crtc) aty_st_le32(CRTC_GEN_CNTL, crtc->gen_cntl & ~(CRTC_EXT_DISP_EN | CRTC_EN), par); /* update non-shadow registers first */ - aty_st_lcd(CONFIG_PANEL, crtc->lcd_config_panel, par); + aty_st_lcd(CNFG_PANEL, crtc->lcd_config_panel, par); aty_st_lcd(LCD_GEN_CNTL, crtc->lcd_gen_cntl & ~(CRTC_RW_SELECT | SHADOW_EN | SHADOW_RW_EN), par); @@ -858,7 +858,7 @@ static int aty_var_to_crtc(const struct fb_info *info, if (!M64_HAS(MOBIL_BUS)) crtc->lcd_index |= CRTC2_DISPLAY_DIS; - crtc->lcd_config_panel = aty_ld_lcd(CONFIG_PANEL, par) | 0x4000; + crtc->lcd_config_panel = aty_ld_lcd(CNFG_PANEL, par) | 0x4000; crtc->lcd_gen_cntl = aty_ld_lcd(LCD_GEN_CNTL, par) & ~CRTC_RW_SELECT; crtc->lcd_gen_cntl &= @@ -1978,7 +1978,7 @@ static int aty_power_mgmt(int sleep, struct atyfb_par *par) return timeout ? 0 : -EIO; } -#endif +#endif /* CONFIG_PPC_PMAC */ static int atyfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) { @@ -2002,9 +2002,15 @@ static int atyfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) par->asleep = 1; par->lock_blank = 1; + /* Because we may change PCI D state ourselves, we need to + * first save the config space content so the core can + * restore it properly on resume. + */ + pci_save_state(pdev); + #ifdef CONFIG_PPC_PMAC /* Set chip to "suspend" mode */ - if (aty_power_mgmt(1, par)) { + if (machine_is(powermac) && aty_power_mgmt(1, par)) { par->asleep = 0; par->lock_blank = 0; atyfb_blank(FB_BLANK_UNBLANK, info); @@ -2047,11 +2053,15 @@ static int atyfb_pci_resume(struct pci_dev *pdev) acquire_console_sem(); + /* PCI state will have been restored by the core, so + * we should be in D0 now with our config space fully + * restored + */ + #ifdef CONFIG_PPC_PMAC - if (pdev->dev.power.power_state.event == 2) + if (machine_is(powermac) && + pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) aty_power_mgmt(0, par); -#else - pci_set_power_state(pdev, PCI_D0); #endif aty_resume_chip(info); @@ -2254,7 +2264,7 @@ static int __devinit aty_init(struct fb_info *info) if (!M64_HAS(INTEGRATED)) { u32 stat0; u8 dac_type, dac_subtype, clk_type; - stat0 = aty_ld_le32(CONFIG_STAT0, par); + stat0 = aty_ld_le32(CNFG_STAT0, par); par->bus_type = (stat0 >> 0) & 0x07; par->ram_type = (stat0 >> 3) & 0x07; ramname = aty_gx_ram[par->ram_type]; @@ -2324,7 +2334,7 @@ static int __devinit aty_init(struct fb_info *info) par->dac_ops = &aty_dac_ct; par->pll_ops = &aty_pll_ct; par->bus_type = PCI; - par->ram_type = (aty_ld_le32(CONFIG_STAT0, par) & 0x07); + par->ram_type = (aty_ld_le32(CNFG_STAT0, par) & 0x07); ramname = aty_ct_ram[par->ram_type]; /* for many chips, the mclk is 67 MHz for SDRAM, 63 MHz otherwise */ if (par->pll_limits.mclk == 67 && par->ram_type < SDRAM) @@ -2433,7 +2443,7 @@ static int __devinit aty_init(struct fb_info *info) } if (M64_HAS(MAGIC_VRAM_SIZE)) { - if (aty_ld_le32(CONFIG_STAT1, par) & 0x40000000) + if (aty_ld_le32(CNFG_STAT1, par) & 0x40000000) info->fix.smem_len += 0x400000; } @@ -2946,7 +2956,7 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev, * Fix PROMs idea of MEM_CNTL settings... */ mem = aty_ld_le32(MEM_CNTL, par); - chip_id = aty_ld_le32(CONFIG_CHIP_ID, par); + chip_id = aty_ld_le32(CNFG_CHIP_ID, par); if (((chip_id & CFG_CHIP_TYPE) == VT_CHIP_ID) && !((chip_id >> 24) & 1)) { switch (mem & 0x0f) { case 3: @@ -2964,7 +2974,7 @@ static int __devinit atyfb_setup_sparc(struct pci_dev *pdev, default: break; } - if ((aty_ld_le32(CONFIG_STAT0, par) & 7) >= SDRAM) + if ((aty_ld_le32(CNFG_STAT0, par) & 7) >= SDRAM) mem &= ~(0x00700000); } mem &= ~(0xcf80e000); /* Turn off all undocumented bits. */ @@ -3572,7 +3582,7 @@ static int __init atyfb_atari_probe(void) } /* Fake pci_id for correct_chipset() */ - switch (aty_ld_le32(CONFIG_CHIP_ID, par) & CFG_CHIP_TYPE) { + switch (aty_ld_le32(CNFG_CHIP_ID, par) & CFG_CHIP_TYPE) { case 0x00d7: par->pci_id = PCI_CHIP_MACH64GX; break; diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c index d0f1a7fc2c9..16bb7e3c031 100644 --- a/drivers/video/aty/radeon_base.c +++ b/drivers/video/aty/radeon_base.c @@ -1936,8 +1936,8 @@ static void fixup_memory_mappings(struct radeonfb_info *rinfo) OUTREG(CRTC_GEN_CNTL, save_crtc_gen_cntl | CRTC_DISP_REQ_EN_B); mdelay(100); - aper_base = INREG(CONFIG_APER_0_BASE); - aper_size = INREG(CONFIG_APER_SIZE); + aper_base = INREG(CNFG_APER_0_BASE); + aper_size = INREG(CNFG_APER_SIZE); #ifdef SET_MC_FB_FROM_APERTURE /* Set framebuffer to be at the same address as set in PCI BAR */ @@ -2024,11 +2024,11 @@ static void radeon_identify_vram(struct radeonfb_info *rinfo) ~CRTC_H_CUTOFF_ACTIVE_EN); } } else { - tmp = INREG(CONFIG_MEMSIZE); + tmp = INREG(CNFG_MEMSIZE); } /* mem size is bits [28:0], mask off the rest */ - rinfo->video_ram = tmp & CONFIG_MEMSIZE_MASK; + rinfo->video_ram = tmp & CNFG_MEMSIZE_MASK; /* * Hack to get around some busted production M6's @@ -2228,7 +2228,7 @@ static int __devinit radeonfb_pci_register (struct pci_dev *pdev, */ rinfo->errata = 0; if (rinfo->family == CHIP_FAMILY_R300 && - (INREG(CONFIG_CNTL) & CFG_ATI_REV_ID_MASK) + (INREG(CNFG_CNTL) & CFG_ATI_REV_ID_MASK) == CFG_ATI_REV_A11) rinfo->errata |= CHIP_ERRATA_R300_CG; diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index 675abdafc2d..ca5f0dc2854 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -333,7 +333,7 @@ static void radeon_pm_enable_dynamic_mode(struct radeonfb_info *rinfo) if (!rinfo->has_CRTC2) { tmp = INPLL(pllSCLK_CNTL); - if ((INREG(CONFIG_CNTL) & CFG_ATI_REV_ID_MASK) > CFG_ATI_REV_A13) + if ((INREG(CNFG_CNTL) & CFG_ATI_REV_ID_MASK) > CFG_ATI_REV_A13) tmp &= ~(SCLK_CNTL__FORCE_CP | SCLK_CNTL__FORCE_RB); tmp &= ~(SCLK_CNTL__FORCE_HDP | SCLK_CNTL__FORCE_DISP1 | SCLK_CNTL__FORCE_TOP | SCLK_CNTL__FORCE_SE | @@ -468,9 +468,9 @@ static void radeon_pm_enable_dynamic_mode(struct radeonfb_info *rinfo) /*RAGE_6::A11 A12 A12N1 A13, RV250::A11 A12, R300*/ if ((rinfo->family == CHIP_FAMILY_RV250 && - ((INREG(CONFIG_CNTL) & CFG_ATI_REV_ID_MASK) < CFG_ATI_REV_A13)) || + ((INREG(CNFG_CNTL) & CFG_ATI_REV_ID_MASK) < CFG_ATI_REV_A13)) || ((rinfo->family == CHIP_FAMILY_RV100) && - ((INREG(CONFIG_CNTL) & CFG_ATI_REV_ID_MASK) <= CFG_ATI_REV_A13))) { + ((INREG(CNFG_CNTL) & CFG_ATI_REV_ID_MASK) <= CFG_ATI_REV_A13))) { tmp |= SCLK_CNTL__FORCE_CP; tmp |= SCLK_CNTL__FORCE_VIP; } @@ -486,7 +486,7 @@ static void radeon_pm_enable_dynamic_mode(struct radeonfb_info *rinfo) /* RV200::A11 A12 RV250::A11 A12 */ if (((rinfo->family == CHIP_FAMILY_RV200) || (rinfo->family == CHIP_FAMILY_RV250)) && - ((INREG(CONFIG_CNTL) & CFG_ATI_REV_ID_MASK) < CFG_ATI_REV_A13)) + ((INREG(CNFG_CNTL) & CFG_ATI_REV_ID_MASK) < CFG_ATI_REV_A13)) tmp |= SCLK_MORE_CNTL__FORCEON; OUTPLL(pllSCLK_MORE_CNTL, tmp); @@ -497,7 +497,7 @@ static void radeon_pm_enable_dynamic_mode(struct radeonfb_info *rinfo) /* RV200::A11 A12, RV250::A11 A12 */ if (((rinfo->family == CHIP_FAMILY_RV200) || (rinfo->family == CHIP_FAMILY_RV250)) && - ((INREG(CONFIG_CNTL) & CFG_ATI_REV_ID_MASK) < CFG_ATI_REV_A13)) { + ((INREG(CNFG_CNTL) & CFG_ATI_REV_ID_MASK) < CFG_ATI_REV_A13)) { tmp = INPLL(pllPLL_PWRMGT_CNTL); tmp |= PLL_PWRMGT_CNTL__TCL_BYPASS_DISABLE; OUTPLL(pllPLL_PWRMGT_CNTL, tmp); @@ -702,7 +702,7 @@ static void radeon_pm_restore_regs(struct radeonfb_info *rinfo) OUTREG(DISPLAY_BASE_ADDR, rinfo->save_regs[31]); OUTREG(MC_AGP_LOCATION, rinfo->save_regs[32]); OUTREG(CRTC2_DISPLAY_BASE_ADDR, rinfo->save_regs[33]); - OUTREG(CONFIG_MEMSIZE, rinfo->video_ram); + OUTREG(CNFG_MEMSIZE, rinfo->video_ram); OUTREG(DISP_MISC_CNTL, rinfo->save_regs[9]); OUTREG(DISP_PWR_MAN, rinfo->save_regs[10]); @@ -1723,7 +1723,7 @@ static void radeon_reinitialize_M10(struct radeonfb_info *rinfo) OUTREG(CRTC2_DISPLAY_BASE_ADDR, rinfo->save_regs[33]); OUTREG(MC_FB_LOCATION, rinfo->save_regs[30]); OUTREG(OV0_BASE_ADDR, rinfo->save_regs[80]); - OUTREG(CONFIG_MEMSIZE, rinfo->video_ram); + OUTREG(CNFG_MEMSIZE, rinfo->video_ram); OUTREG(BUS_CNTL, rinfo->save_regs[36]); OUTREG(BUS_CNTL1, rinfo->save_regs[14]); OUTREG(MPP_TB_CONFIG, rinfo->save_regs[37]); @@ -1961,7 +1961,7 @@ static void radeon_pm_m9p_reconfigure_mc(struct radeonfb_info *rinfo) OUTMC(rinfo, ixMC_CHP_IO_CNTL_B1, rinfo->save_regs[68] /*0x141555ff*/); OUTMC(rinfo, ixMC_IMP_CNTL_0, rinfo->save_regs[71] /*0x00009249*/); OUTREG(MC_IND_INDEX, 0); - OUTREG(CONFIG_MEMSIZE, rinfo->video_ram); + OUTREG(CNFG_MEMSIZE, rinfo->video_ram); mdelay(20); } @@ -2361,7 +2361,7 @@ static void radeon_reinitialize_QW(struct radeonfb_info *rinfo) OUTMC(rinfo, ixMC_IMP_CNTL_0, 0x00009249); OUTREG(MC_IND_INDEX, 0); - OUTREG(CONFIG_MEMSIZE, rinfo->video_ram); + OUTREG(CNFG_MEMSIZE, rinfo->video_ram); radeon_pm_full_reset_sdram(rinfo); @@ -2509,9 +2509,7 @@ static void radeon_reinitialize_QW(struct radeonfb_info *rinfo) static void radeon_set_suspend(struct radeonfb_info *rinfo, int suspend) { - u16 pwr_cmd; u32 tmp; - int i; if (!rinfo->pm_reg) return; @@ -2557,32 +2555,14 @@ static void radeon_set_suspend(struct radeonfb_info *rinfo, int suspend) } } - for (i = 0; i < 64; ++i) - pci_read_config_dword(rinfo->pdev, i * 4, - &rinfo->cfg_save[i]); - /* Switch PCI power management to D2. */ pci_disable_device(rinfo->pdev); - for (;;) { - pci_read_config_word( - rinfo->pdev, rinfo->pm_reg+PCI_PM_CTRL, - &pwr_cmd); - if (pwr_cmd & 2) - break; - pci_write_config_word( - rinfo->pdev, rinfo->pm_reg+PCI_PM_CTRL, - (pwr_cmd & ~PCI_PM_CTRL_STATE_MASK) | 2); - mdelay(500); - } + pci_save_state(rinfo->pdev); + pci_set_power_state(rinfo->pdev, PCI_D2); } else { printk(KERN_DEBUG "radeonfb (%s): switching to D0 state...\n", pci_name(rinfo->pdev)); - /* Switch back PCI powermanagment to D0 */ - mdelay(200); - pci_write_config_word(rinfo->pdev, rinfo->pm_reg+PCI_PM_CTRL, 0); - mdelay(500); - if (rinfo->family <= CHIP_FAMILY_RV250) { /* Reset the SDRAM controller */ radeon_pm_full_reset_sdram(rinfo); @@ -2598,37 +2578,10 @@ static void radeon_set_suspend(struct radeonfb_info *rinfo, int suspend) } } -static int radeon_restore_pci_cfg(struct radeonfb_info *rinfo) -{ - int i; - static u32 radeon_cfg_after_resume[64]; - - for (i = 0; i < 64; ++i) - pci_read_config_dword(rinfo->pdev, i * 4, - &radeon_cfg_after_resume[i]); - - if (radeon_cfg_after_resume[PCI_BASE_ADDRESS_0/4] - == rinfo->cfg_save[PCI_BASE_ADDRESS_0/4]) - return 0; /* assume everything is ok */ - - for (i = PCI_BASE_ADDRESS_0/4; i < 64; ++i) { - if (radeon_cfg_after_resume[i] != rinfo->cfg_save[i]) - pci_write_config_dword(rinfo->pdev, i * 4, - rinfo->cfg_save[i]); - } - pci_write_config_word(rinfo->pdev, PCI_CACHE_LINE_SIZE, - rinfo->cfg_save[PCI_CACHE_LINE_SIZE/4]); - pci_write_config_word(rinfo->pdev, PCI_COMMAND, - rinfo->cfg_save[PCI_COMMAND/4]); - return 1; -} - - int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) { struct fb_info *info = pci_get_drvdata(pdev); struct radeonfb_info *rinfo = info->par; - int i; if (mesg.event == pdev->dev.power.power_state.event) return 0; @@ -2674,6 +2627,11 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) pmac_suspend_agp_for_card(pdev); #endif /* CONFIG_PPC_PMAC */ + /* It's unclear whether or when the generic code will do that, so let's + * do it ourselves. We save state before we do any power management + */ + pci_save_state(pdev); + /* If we support wakeup from poweroff, we save all regs we can including cfg * space */ @@ -2698,9 +2656,6 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) mdelay(20); OUTREG(LVDS_GEN_CNTL, INREG(LVDS_GEN_CNTL) & ~(LVDS_DIGON)); } - // FIXME: Use PCI layer - for (i = 0; i < 64; ++i) - pci_read_config_dword(pdev, i * 4, &rinfo->cfg_save[i]); pci_disable_device(pdev); } /* If we support D2, we go to it (should be fixed later with a flag forcing @@ -2717,6 +2672,13 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t mesg) return 0; } +static int radeon_check_power_loss(struct radeonfb_info *rinfo) +{ + return rinfo->save_regs[4] != INPLL(CLK_PIN_CNTL) || + rinfo->save_regs[2] != INPLL(MCLK_CNTL) || + rinfo->save_regs[3] != INPLL(SCLK_CNTL); +} + int radeonfb_pci_resume(struct pci_dev *pdev) { struct fb_info *info = pci_get_drvdata(pdev); @@ -2735,20 +2697,13 @@ int radeonfb_pci_resume(struct pci_dev *pdev) printk(KERN_DEBUG "radeonfb (%s): resuming from state: %d...\n", pci_name(pdev), pdev->dev.power.power_state.event); - - if (pci_enable_device(pdev)) { - rc = -ENODEV; - printk(KERN_ERR "radeonfb (%s): can't enable PCI device !\n", - pci_name(pdev)); - goto bail; - } - pci_set_master(pdev); - + /* PCI state will have been restored by the core, so + * we should be in D0 now with our config space fully + * restored + */ if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { - /* Wakeup chip. Check from config space if we were powered off - * (todo: additionally, check CLK_PIN_CNTL too) - */ - if ((rinfo->pm_mode & radeon_pm_off) && radeon_restore_pci_cfg(rinfo)) { + /* Wakeup chip */ + if ((rinfo->pm_mode & radeon_pm_off) && radeon_check_power_loss(rinfo)) { if (rinfo->reinit_func != NULL) rinfo->reinit_func(rinfo); else { diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h index 3ea1b00fdd2..7351e66c7f5 100644 --- a/drivers/video/aty/radeonfb.h +++ b/drivers/video/aty/radeonfb.h @@ -361,8 +361,6 @@ struct radeonfb_info { #ifdef CONFIG_FB_RADEON_I2C struct radeon_i2c_chan i2c[4]; #endif - - u32 cfg_save[64]; }; diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile index 363b3cb2f01..63d75949816 100644 --- a/drivers/video/backlight/Makefile +++ b/drivers/video/backlight/Makefile @@ -18,7 +18,7 @@ obj-$(CONFIG_BACKLIGHT_OMAP1) += omap1_bl.o obj-$(CONFIG_BACKLIGHT_PROGEAR) += progear_bl.o obj-$(CONFIG_BACKLIGHT_CARILLO_RANCH) += cr_bllcd.o obj-$(CONFIG_BACKLIGHT_PWM) += pwm_bl.o -obj-$(CONFIG_BACKLIGHT_DA903X) += da903x.o +obj-$(CONFIG_BACKLIGHT_DA903X) += da903x_bl.o obj-$(CONFIG_BACKLIGHT_MBP_NVIDIA) += mbp_nvidia_bl.o obj-$(CONFIG_BACKLIGHT_TOSA) += tosa_bl.o obj-$(CONFIG_BACKLIGHT_SAHARA) += kb3886_bl.o diff --git a/drivers/video/backlight/da903x.c b/drivers/video/backlight/da903x_bl.c index 93bb4340cc6..93bb4340cc6 100644 --- a/drivers/video/backlight/da903x.c +++ b/drivers/video/backlight/da903x_bl.c diff --git a/drivers/video/fbcmap.c b/drivers/video/fbcmap.c index 91b78e69150..f53b9f1d6ab 100644 --- a/drivers/video/fbcmap.c +++ b/drivers/video/fbcmap.c @@ -250,10 +250,6 @@ int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; - if (cmap->start < 0 || (!info->fbops->fb_setcolreg && - !info->fbops->fb_setcmap)) - return -EINVAL; - memset(&umap, 0, sizeof(struct fb_cmap)); rc = fb_alloc_cmap(&umap, cmap->len, cmap->transp != NULL); if (rc) @@ -262,11 +258,23 @@ int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) copy_from_user(umap.green, cmap->green, size) || copy_from_user(umap.blue, cmap->blue, size) || (cmap->transp && copy_from_user(umap.transp, cmap->transp, size))) { - fb_dealloc_cmap(&umap); - return -EFAULT; + rc = -EFAULT; + goto out; } umap.start = cmap->start; + if (!lock_fb_info(info)) { + rc = -ENODEV; + goto out; + } + if (cmap->start < 0 || (!info->fbops->fb_setcolreg && + !info->fbops->fb_setcmap)) { + rc = -EINVAL; + goto out1; + } rc = fb_set_cmap(&umap, info); +out1: + unlock_fb_info(info); +out: fb_dealloc_cmap(&umap); return rc; } diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 756efeb91ab..cfd9dce1ce0 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -1013,132 +1013,139 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, struct fb_var_screeninfo var; struct fb_fix_screeninfo fix; struct fb_con2fbmap con2fb; + struct fb_cmap cmap_from; struct fb_cmap_user cmap; struct fb_event event; void __user *argp = (void __user *)arg; long ret = 0; - fb = info->fbops; - if (!fb) - return -ENODEV; - switch (cmd) { case FBIOGET_VSCREENINFO: - ret = copy_to_user(argp, &info->var, - sizeof(var)) ? -EFAULT : 0; + if (!lock_fb_info(info)) + return -ENODEV; + var = info->var; + unlock_fb_info(info); + + ret = copy_to_user(argp, &var, sizeof(var)) ? -EFAULT : 0; break; case FBIOPUT_VSCREENINFO: - if (copy_from_user(&var, argp, sizeof(var))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&var, argp, sizeof(var))) + return -EFAULT; + if (!lock_fb_info(info)) + return -ENODEV; acquire_console_sem(); info->flags |= FBINFO_MISC_USEREVENT; ret = fb_set_var(info, &var); info->flags &= ~FBINFO_MISC_USEREVENT; release_console_sem(); - if (ret == 0 && copy_to_user(argp, &var, sizeof(var))) + unlock_fb_info(info); + if (!ret && copy_to_user(argp, &var, sizeof(var))) ret = -EFAULT; break; case FBIOGET_FSCREENINFO: - ret = copy_to_user(argp, &info->fix, - sizeof(fix)) ? -EFAULT : 0; + if (!lock_fb_info(info)) + return -ENODEV; + fix = info->fix; + unlock_fb_info(info); + + ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0; break; case FBIOPUTCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) - ret = -EFAULT; - else - ret = fb_set_user_cmap(&cmap, info); + return -EFAULT; + ret = fb_set_user_cmap(&cmap, info); break; case FBIOGETCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) - ret = -EFAULT; - else - ret = fb_cmap_to_user(&info->cmap, &cmap); + return -EFAULT; + if (!lock_fb_info(info)) + return -ENODEV; + cmap_from = info->cmap; + unlock_fb_info(info); + ret = fb_cmap_to_user(&cmap_from, &cmap); break; case FBIOPAN_DISPLAY: - if (copy_from_user(&var, argp, sizeof(var))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&var, argp, sizeof(var))) + return -EFAULT; + if (!lock_fb_info(info)) + return -ENODEV; acquire_console_sem(); ret = fb_pan_display(info, &var); release_console_sem(); + unlock_fb_info(info); if (ret == 0 && copy_to_user(argp, &var, sizeof(var))) - ret = -EFAULT; + return -EFAULT; break; case FBIO_CURSOR: ret = -EINVAL; break; case FBIOGET_CON2FBMAP: if (copy_from_user(&con2fb, argp, sizeof(con2fb))) - ret = -EFAULT; - else if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) - ret = -EINVAL; - else { - con2fb.framebuffer = -1; - event.info = info; - event.data = &con2fb; - fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, - &event); - ret = copy_to_user(argp, &con2fb, - sizeof(con2fb)) ? -EFAULT : 0; - } + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; + con2fb.framebuffer = -1; + event.data = &con2fb; + + if (!lock_fb_info(info)) + return -ENODEV; + event.info = info; + fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event); + unlock_fb_info(info); + + ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0; break; case FBIOPUT_CON2FBMAP: - if (copy_from_user(&con2fb, argp, sizeof(con2fb))) { - ret = -EFAULT; - break; - } - if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) { - ret = -EINVAL; - break; - } - if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) { - ret = -EINVAL; - break; - } + if (copy_from_user(&con2fb, argp, sizeof(con2fb))) + return -EFAULT; + if (con2fb.console < 1 || con2fb.console > MAX_NR_CONSOLES) + return -EINVAL; + if (con2fb.framebuffer < 0 || con2fb.framebuffer >= FB_MAX) + return -EINVAL; if (!registered_fb[con2fb.framebuffer]) request_module("fb%d", con2fb.framebuffer); if (!registered_fb[con2fb.framebuffer]) { ret = -EINVAL; break; } - event.info = info; event.data = &con2fb; + if (!lock_fb_info(info)) + return -ENODEV; + event.info = info; ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event); + unlock_fb_info(info); break; case FBIOBLANK: + if (!lock_fb_info(info)) + return -ENODEV; acquire_console_sem(); info->flags |= FBINFO_MISC_USEREVENT; ret = fb_blank(info, arg); info->flags &= ~FBINFO_MISC_USEREVENT; release_console_sem(); - break;; + unlock_fb_info(info); + break; default: - if (fb->fb_ioctl == NULL) - ret = -ENOTTY; - else + if (!lock_fb_info(info)) + return -ENODEV; + fb = info->fbops; + if (fb->fb_ioctl) ret = fb->fb_ioctl(info, cmd, arg); + else + ret = -ENOTTY; + unlock_fb_info(info); } return ret; } static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -__acquires(&info->lock) -__releases(&info->lock) { struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); - struct fb_info *info; - long ret; + struct fb_info *info = registered_fb[fbidx]; - info = registered_fb[fbidx]; - mutex_lock(&info->lock); - ret = do_fb_ioctl(info, cmd, arg); - mutex_unlock(&info->lock); - return ret; + return do_fb_ioctl(info, cmd, arg); } #ifdef CONFIG_COMPAT @@ -1257,8 +1264,6 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, static long fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -__acquires(&info->lock) -__releases(&info->lock) { struct inode *inode = file->f_path.dentry->d_inode; int fbidx = iminor(inode); @@ -1266,7 +1271,6 @@ __releases(&info->lock) struct fb_ops *fb = info->fbops; long ret = -ENOIOCTLCMD; - mutex_lock(&info->lock); switch(cmd) { case FBIOGET_VSCREENINFO: case FBIOPUT_VSCREENINFO: @@ -1292,7 +1296,6 @@ __releases(&info->lock) ret = fb->fb_compat_ioctl(info, cmd, arg); break; } - mutex_unlock(&info->lock); return ret; } #endif diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e3ff2b9e602..33b7235f853 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1208,9 +1208,11 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, * check for an ELF header. If we find one, dump the first page to * aid in determining what was mapped here. */ - if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) { + if (FILTER(ELF_HEADERS) && + vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) { u32 __user *header = (u32 __user *) vma->vm_start; u32 word; + mm_segment_t fs = get_fs(); /* * Doing it this way gets the constant folded by GCC. */ @@ -1223,7 +1225,15 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma, magic.elfmag[EI_MAG1] = ELFMAG1; magic.elfmag[EI_MAG2] = ELFMAG2; magic.elfmag[EI_MAG3] = ELFMAG3; - if (get_user(word, header) == 0 && word == magic.cmp) + /* + * Switch to the user "segment" for get_user(), + * then put back what elf_core_dump() had in place. + */ + set_fs(USER_DS); + if (unlikely(get_user(word, header))) + word = 0; + set_fs(fs); + if (word == magic.cmp) return PAGE_SIZE; } diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index f8fcf999ea1..7bb3c020e57 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -16,3 +16,16 @@ config BTRFS_FS module will be called btrfs. If unsure, say N. + +config BTRFS_FS_POSIX_ACL + bool "Btrfs POSIX Access Control Lists" + depends on BTRFS_FS + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support permissions for users and + groups beyond the owner/group/world scheme. + + To learn more about Access Control Lists, visit the POSIX ACLs for + Linux website <http://acl.bestbits.at/>. + + If you don't know what Access Control Lists are, say N diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 8e2fec05dbe..c84ca1f5259 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -16,11 +16,11 @@ * Boston, MA 021110-1307, USA. */ -#include <linux/version.h> #include <linux/kthread.h> #include <linux/list.h> #include <linux/spinlock.h> -# include <linux/freezer.h> +#include <linux/freezer.h> +#include <linux/ftrace.h> #include "async-thread.h" #define WORK_QUEUED_BIT 0 @@ -143,6 +143,7 @@ static int worker_loop(void *arg) struct btrfs_work *work; do { spin_lock_irq(&worker->lock); +again_locked: while (!list_empty(&worker->pending)) { cur = worker->pending.next; work = list_entry(cur, struct btrfs_work, list); @@ -165,14 +166,50 @@ static int worker_loop(void *arg) check_idle_worker(worker); } - worker->working = 0; if (freezing(current)) { + worker->working = 0; + spin_unlock_irq(&worker->lock); refrigerator(); } else { - set_current_state(TASK_INTERRUPTIBLE); spin_unlock_irq(&worker->lock); - if (!kthread_should_stop()) + if (!kthread_should_stop()) { + cpu_relax(); + /* + * we've dropped the lock, did someone else + * jump_in? + */ + smp_mb(); + if (!list_empty(&worker->pending)) + continue; + + /* + * this short schedule allows more work to + * come in without the queue functions + * needing to go through wake_up_process() + * + * worker->working is still 1, so nobody + * is going to try and wake us up + */ + schedule_timeout(1); + smp_mb(); + if (!list_empty(&worker->pending)) + continue; + + /* still no more work?, sleep for real */ + spin_lock_irq(&worker->lock); + set_current_state(TASK_INTERRUPTIBLE); + if (!list_empty(&worker->pending)) + goto again_locked; + + /* + * this makes sure we get a wakeup when someone + * adds something new to the queue + */ + worker->working = 0; + spin_unlock_irq(&worker->lock); + schedule(); + } __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); @@ -350,13 +387,14 @@ int btrfs_requeue_work(struct btrfs_work *work) { struct btrfs_worker_thread *worker = work->worker; unsigned long flags; + int wake = 0; if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) goto out; spin_lock_irqsave(&worker->lock, flags); - atomic_inc(&worker->num_pending); list_add_tail(&work->list, &worker->pending); + atomic_inc(&worker->num_pending); /* by definition we're busy, take ourselves off the idle * list @@ -368,10 +406,16 @@ int btrfs_requeue_work(struct btrfs_work *work) &worker->workers->worker_list); spin_unlock_irqrestore(&worker->workers->lock, flags); } + if (!worker->working) { + wake = 1; + worker->working = 1; + } spin_unlock_irqrestore(&worker->lock, flags); - + if (wake) + wake_up_process(worker->task); out: + return 0; } @@ -398,9 +442,10 @@ int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) } spin_lock_irqsave(&worker->lock, flags); + + list_add_tail(&work->list, &worker->pending); atomic_inc(&worker->num_pending); check_busy_worker(worker); - list_add_tail(&work->list, &worker->pending); /* * avoid calling into wake_up_process if this thread has already diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ee848d8585d..ab07627084f 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -32,7 +32,6 @@ #include <linux/swap.h> #include <linux/writeback.h> #include <linux/bit_spinlock.h> -#include <linux/version.h> #include <linux/pagevec.h> #include "compat.h" #include "ctree.h" diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 9e46c077681..551177c0011 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -54,6 +54,31 @@ struct btrfs_path *btrfs_alloc_path(void) return path; } +/* + * set all locked nodes in the path to blocking locks. This should + * be done before scheduling + */ +noinline void btrfs_set_path_blocking(struct btrfs_path *p) +{ + int i; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + if (p->nodes[i] && p->locks[i]) + btrfs_set_lock_blocking(p->nodes[i]); + } +} + +/* + * reset all the locked nodes in the patch to spinning locks. + */ +noinline void btrfs_clear_path_blocking(struct btrfs_path *p) +{ + int i; + for (i = 0; i < BTRFS_MAX_LEVEL; i++) { + if (p->nodes[i] && p->locks[i]) + btrfs_clear_lock_blocking(p->nodes[i]); + } +} + /* this also releases the path */ void btrfs_free_path(struct btrfs_path *p) { @@ -272,6 +297,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, if (IS_ERR(cow)) return PTR_ERR(cow); + /* cow is set to blocking by btrfs_init_new_buffer */ + copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); @@ -388,17 +415,20 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(1); } - spin_lock(&root->fs_info->hash_lock); if (btrfs_header_generation(buf) == trans->transid && btrfs_header_owner(buf) == root->root_key.objectid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { *cow_ret = buf; - spin_unlock(&root->fs_info->hash_lock); WARN_ON(prealloc_dest); return 0; } - spin_unlock(&root->fs_info->hash_lock); + search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); + + if (parent) + btrfs_set_lock_blocking(parent); + btrfs_set_lock_blocking(buf); + ret = __btrfs_cow_block(trans, root, buf, parent, parent_slot, cow_ret, search_start, 0, prealloc_dest); @@ -504,6 +534,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, if (parent_nritems == 1) return 0; + btrfs_set_lock_blocking(parent); + for (i = start_slot; i < end_slot; i++) { int close = 1; @@ -564,6 +596,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, search_start = last_block; btrfs_tree_lock(cur); + btrfs_set_lock_blocking(cur); err = __btrfs_cow_block(trans, root, cur, parent, i, &cur, search_start, min(16 * blocksize, @@ -862,6 +895,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, return 0; mid = path->nodes[level]; + WARN_ON(!path->locks[level]); WARN_ON(btrfs_header_generation(mid) != trans->transid); @@ -884,6 +918,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* promote the child to a root */ child = read_node_slot(root, mid, 0); btrfs_tree_lock(child); + btrfs_set_lock_blocking(child); BUG_ON(!child); ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); BUG_ON(ret); @@ -900,6 +935,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, add_root_to_dirty_list(root); btrfs_tree_unlock(child); + path->locks[level] = 0; path->nodes[level] = NULL; clean_tree_block(trans, root, mid); @@ -924,6 +960,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, left = read_node_slot(root, parent, pslot - 1); if (left) { btrfs_tree_lock(left); + btrfs_set_lock_blocking(left); wret = btrfs_cow_block(trans, root, left, parent, pslot - 1, &left, 0); if (wret) { @@ -934,6 +971,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, right = read_node_slot(root, parent, pslot + 1); if (right) { btrfs_tree_lock(right); + btrfs_set_lock_blocking(right); wret = btrfs_cow_block(trans, root, right, parent, pslot + 1, &right, 0); if (wret) { @@ -1109,6 +1147,8 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, u32 left_nr; btrfs_tree_lock(left); + btrfs_set_lock_blocking(left); + left_nr = btrfs_header_nritems(left); if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -1155,7 +1195,10 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, */ if (right) { u32 right_nr; + btrfs_tree_lock(right); + btrfs_set_lock_blocking(right); + right_nr = btrfs_header_nritems(right); if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) { wret = 1; @@ -1210,8 +1253,7 @@ static noinline void reada_for_search(struct btrfs_root *root, struct btrfs_disk_key disk_key; u32 nritems; u64 search; - u64 lowest_read; - u64 highest_read; + u64 target; u64 nread = 0; int direction = path->reada; struct extent_buffer *eb; @@ -1235,8 +1277,7 @@ static noinline void reada_for_search(struct btrfs_root *root, return; } - highest_read = search; - lowest_read = search; + target = search; nritems = btrfs_header_nritems(node); nr = slot; @@ -1256,27 +1297,80 @@ static noinline void reada_for_search(struct btrfs_root *root, break; } search = btrfs_node_blockptr(node, nr); - if ((search >= lowest_read && search <= highest_read) || - (search < lowest_read && lowest_read - search <= 16384) || - (search > highest_read && search - highest_read <= 16384)) { + if ((search <= target && target - search <= 65536) || + (search > target && search - target <= 65536)) { readahead_tree_block(root, search, blocksize, btrfs_node_ptr_generation(node, nr)); nread += blocksize; } nscan++; - if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32)) + if ((nread > 65536 || nscan > 32)) break; + } +} - if (nread > (256 * 1024) || nscan > 128) - break; +/* + * returns -EAGAIN if it had to drop the path, or zero if everything was in + * cache + */ +static noinline int reada_for_balance(struct btrfs_root *root, + struct btrfs_path *path, int level) +{ + int slot; + int nritems; + struct extent_buffer *parent; + struct extent_buffer *eb; + u64 gen; + u64 block1 = 0; + u64 block2 = 0; + int ret = 0; + int blocksize; - if (search < lowest_read) - lowest_read = search; - if (search > highest_read) - highest_read = search; + parent = path->nodes[level - 1]; + if (!parent) + return 0; + + nritems = btrfs_header_nritems(parent); + slot = path->slots[level]; + blocksize = btrfs_level_size(root, level); + + if (slot > 0) { + block1 = btrfs_node_blockptr(parent, slot - 1); + gen = btrfs_node_ptr_generation(parent, slot - 1); + eb = btrfs_find_tree_block(root, block1, blocksize); + if (eb && btrfs_buffer_uptodate(eb, gen)) + block1 = 0; + free_extent_buffer(eb); + } + if (slot < nritems) { + block2 = btrfs_node_blockptr(parent, slot + 1); + gen = btrfs_node_ptr_generation(parent, slot + 1); + eb = btrfs_find_tree_block(root, block2, blocksize); + if (eb && btrfs_buffer_uptodate(eb, gen)) + block2 = 0; + free_extent_buffer(eb); + } + if (block1 || block2) { + ret = -EAGAIN; + btrfs_release_path(root, path); + if (block1) + readahead_tree_block(root, block1, blocksize, 0); + if (block2) + readahead_tree_block(root, block2, blocksize, 0); + + if (block1) { + eb = read_tree_block(root, block1, blocksize, 0); + free_extent_buffer(eb); + } + if (block1) { + eb = read_tree_block(root, block2, blocksize, 0); + free_extent_buffer(eb); + } } + return ret; } + /* * when we walk down the tree, it is usually safe to unlock the higher layers * in the tree. The exceptions are when our path goes through slot 0, because @@ -1328,6 +1422,32 @@ static noinline void unlock_up(struct btrfs_path *path, int level, } /* + * This releases any locks held in the path starting at level and + * going all the way up to the root. + * + * btrfs_search_slot will keep the lock held on higher nodes in a few + * corner cases, such as COW of the block at slot zero in the node. This + * ignores those rules, and it should only be called when there are no + * more updates to be done higher up in the tree. + */ +noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) +{ + int i; + + if (path->keep_locks || path->lowest_level) + return; + + for (i = level; i < BTRFS_MAX_LEVEL; i++) { + if (!path->nodes[i]) + continue; + if (!path->locks[i]) + continue; + btrfs_tree_unlock(path->nodes[i]); + path->locks[i] = 0; + } +} + +/* * look for key in the tree. path is filled in with nodes along the way * if key is found, we return zero and you can find the item in the leaf * level of the path (level 0) @@ -1387,31 +1507,30 @@ again: int wret; /* is a cow on this block not required */ - spin_lock(&root->fs_info->hash_lock); if (btrfs_header_generation(b) == trans->transid && btrfs_header_owner(b) == root->root_key.objectid && !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { - spin_unlock(&root->fs_info->hash_lock); goto cow_done; } - spin_unlock(&root->fs_info->hash_lock); /* ok, we have to cow, is our old prealloc the right * size? */ if (prealloc_block.objectid && prealloc_block.offset != b->len) { + btrfs_release_path(root, p); btrfs_free_reserved_extent(root, prealloc_block.objectid, prealloc_block.offset); prealloc_block.objectid = 0; + goto again; } /* * for higher level blocks, try not to allocate blocks * with the block and the parent locks held. */ - if (level > 1 && !prealloc_block.objectid && + if (level > 0 && !prealloc_block.objectid && btrfs_path_lock_waiting(p, level)) { u32 size = b->len; u64 hint = b->start; @@ -1425,6 +1544,8 @@ again: goto again; } + btrfs_set_path_blocking(p); + wret = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], @@ -1446,6 +1567,22 @@ cow_done: if (!p->skip_locking) p->locks[level] = 1; + btrfs_clear_path_blocking(p); + + /* + * we have a lock on b and as long as we aren't changing + * the tree, there is no way to for the items in b to change. + * It is safe to drop the lock on our parent before we + * go through the expensive btree search on b. + * + * If cow is true, then we might be changing slot zero, + * which may require changing the parent. So, we can't + * drop the lock until after we know which slot we're + * operating on. + */ + if (!cow) + btrfs_unlock_up_safe(p, level + 1); + ret = check_block(root, p, level); if (ret) { ret = -1; @@ -1453,6 +1590,7 @@ cow_done: } ret = bin_search(b, key, level, &slot); + if (level != 0) { if (ret && slot > 0) slot -= 1; @@ -1460,7 +1598,16 @@ cow_done: if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { - int sret = split_node(trans, root, p, level); + int sret; + + sret = reada_for_balance(root, p, level); + if (sret) + goto again; + + btrfs_set_path_blocking(p); + sret = split_node(trans, root, p, level); + btrfs_clear_path_blocking(p); + BUG_ON(sret > 0); if (sret) { ret = sret; @@ -1468,9 +1615,19 @@ cow_done: } b = p->nodes[level]; slot = p->slots[level]; - } else if (ins_len < 0) { - int sret = balance_level(trans, root, p, - level); + } else if (ins_len < 0 && + btrfs_header_nritems(b) < + BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { + int sret; + + sret = reada_for_balance(root, p, level); + if (sret) + goto again; + + btrfs_set_path_blocking(p); + sret = balance_level(trans, root, p, level); + btrfs_clear_path_blocking(p); + if (sret) { ret = sret; goto done; @@ -1504,7 +1661,7 @@ cow_done: * of the btree by dropping locks before * we read. */ - if (level > 1) { + if (level > 0) { btrfs_release_path(NULL, p); if (tmp) free_extent_buffer(tmp); @@ -1519,6 +1676,7 @@ cow_done: free_extent_buffer(tmp); goto again; } else { + btrfs_set_path_blocking(p); if (tmp) free_extent_buffer(tmp); if (should_reada) @@ -1528,14 +1686,29 @@ cow_done: b = read_node_slot(root, b, slot); } } - if (!p->skip_locking) - btrfs_tree_lock(b); + if (!p->skip_locking) { + int lret; + + btrfs_clear_path_blocking(p); + lret = btrfs_try_spin_lock(b); + + if (!lret) { + btrfs_set_path_blocking(p); + btrfs_tree_lock(b); + btrfs_clear_path_blocking(p); + } + } } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < ins_len) { - int sret = split_leaf(trans, root, key, + int sret; + + btrfs_set_path_blocking(p); + sret = split_leaf(trans, root, key, p, ins_len, ret == 0); + btrfs_clear_path_blocking(p); + BUG_ON(sret > 0); if (sret) { ret = sret; @@ -1549,12 +1722,16 @@ cow_done: } ret = 1; done: + /* + * we don't really know what they plan on doing with the path + * from here on, so for now just mark it as blocking + */ + btrfs_set_path_blocking(p); if (prealloc_block.objectid) { btrfs_free_reserved_extent(root, prealloc_block.objectid, prealloc_block.offset); } - return ret; } @@ -1578,6 +1755,8 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); BUG_ON(ret); + btrfs_set_lock_blocking(eb); + parent = eb; while (1) { level = btrfs_header_level(parent); @@ -1602,6 +1781,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, eb = read_tree_block(root, bytenr, blocksize, generation); btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); } /* @@ -1626,6 +1806,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, eb = read_tree_block(root, bytenr, blocksize, generation); btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); } ret = btrfs_cow_block(trans, root, eb, parent, slot, @@ -2172,6 +2353,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root right = read_node_slot(root, upper, slot + 1); btrfs_tree_lock(right); + btrfs_set_lock_blocking(right); + free_space = btrfs_leaf_free_space(root, right); if (free_space < data_size) goto out_unlock; @@ -2367,6 +2550,8 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root left = read_node_slot(root, path->nodes[1], slot - 1); btrfs_tree_lock(left); + btrfs_set_lock_blocking(left); + free_space = btrfs_leaf_free_space(root, left); if (free_space < data_size) { ret = 1; @@ -2825,6 +3010,12 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, path->keep_locks = 0; BUG_ON(ret); + /* + * make sure any changes to the path from split_leaf leave it + * in a blocking state + */ + btrfs_set_path_blocking(path); + leaf = path->nodes[0]; BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); @@ -3354,6 +3545,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, BUG(); } out: + btrfs_unlock_up_safe(path, 1); return ret; } @@ -3441,15 +3633,22 @@ noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, { int ret; u64 root_gen = btrfs_header_generation(path->nodes[1]); + u64 parent_start = path->nodes[1]->start; + u64 parent_owner = btrfs_header_owner(path->nodes[1]); ret = del_ptr(trans, root, path, 1, path->slots[1]); if (ret) return ret; + /* + * btrfs_free_extent is expensive, we want to make sure we + * aren't holding any locks when we call it + */ + btrfs_unlock_up_safe(path, 0); + ret = btrfs_free_extent(trans, root, bytenr, btrfs_level_size(root, 0), - path->nodes[1]->start, - btrfs_header_owner(path->nodes[1]), + parent_start, parent_owner, root_gen, 0, 1); return ret; } @@ -3721,12 +3920,14 @@ find_next_key: */ if (slot >= nritems) { path->slots[level] = slot; + btrfs_set_path_blocking(path); sret = btrfs_find_next_key(root, path, min_key, level, cache_only, min_trans); if (sret == 0) { btrfs_release_path(root, path); goto again; } else { + btrfs_clear_path_blocking(path); goto out; } } @@ -3738,16 +3939,20 @@ find_next_key: unlock_up(path, level, 1); goto out; } + btrfs_set_path_blocking(path); cur = read_node_slot(root, cur, slot); btrfs_tree_lock(cur); + path->locks[level - 1] = 1; path->nodes[level - 1] = cur; unlock_up(path, level, 1); + btrfs_clear_path_blocking(path); } out: if (ret == 0) memcpy(min_key, &found_key, sizeof(found_key)); + btrfs_set_path_blocking(path); return ret; } @@ -3843,6 +4048,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (ret < 0) return ret; + btrfs_set_path_blocking(path); nritems = btrfs_header_nritems(path->nodes[0]); /* * by releasing the path above we dropped all our locks. A balance @@ -3873,6 +4079,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) free_extent_buffer(next); } + /* the path was set to blocking above */ if (level == 1 && (path->locks[1] || path->skip_locking) && path->reada) reada_for_search(root, path, level, slot, 0); @@ -3881,6 +4088,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) if (!path->skip_locking) { WARN_ON(!btrfs_tree_locked(c)); btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); } break; } @@ -3897,12 +4105,15 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) path->locks[level] = 1; if (!level) break; + + btrfs_set_path_blocking(path); if (level == 1 && path->locks[1] && path->reada) reada_for_search(root, path, level, slot, 0); next = read_node_slot(root, next, 0); if (!path->skip_locking) { WARN_ON(!btrfs_tree_locked(path->nodes[level])); btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); } } done: @@ -3927,6 +4138,7 @@ int btrfs_previous_item(struct btrfs_root *root, while (1) { if (path->slots[0] == 0) { + btrfs_set_path_blocking(path); ret = btrfs_prev_leaf(root, path); if (ret != 0) return ret; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eee060f8811..531db112c8b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -454,17 +454,11 @@ struct btrfs_timespec { __le32 nsec; } __attribute__ ((__packed__)); -typedef enum { +enum btrfs_compression_type { BTRFS_COMPRESS_NONE = 0, BTRFS_COMPRESS_ZLIB = 1, BTRFS_COMPRESS_LAST = 2, -} btrfs_compression_type; - -/* we don't understand any encryption methods right now */ -typedef enum { - BTRFS_ENCRYPTION_NONE = 0, - BTRFS_ENCRYPTION_LAST = 1, -} btrfs_encryption_type; +}; struct btrfs_inode_item { /* nfs style generation number */ @@ -701,9 +695,7 @@ struct btrfs_fs_info { struct btrfs_transaction *running_transaction; wait_queue_head_t transaction_throttle; wait_queue_head_t transaction_wait; - wait_queue_head_t async_submit_wait; - wait_queue_head_t tree_log_wait; struct btrfs_super_block super_copy; struct btrfs_super_block super_for_commit; @@ -711,7 +703,6 @@ struct btrfs_fs_info { struct super_block *sb; struct inode *btree_inode; struct backing_dev_info bdi; - spinlock_t hash_lock; struct mutex trans_mutex; struct mutex tree_log_mutex; struct mutex transaction_kthread_mutex; @@ -730,10 +721,6 @@ struct btrfs_fs_info { atomic_t async_submit_draining; atomic_t nr_async_bios; atomic_t async_delalloc_pages; - atomic_t tree_log_writers; - atomic_t tree_log_commit; - unsigned long tree_log_batch; - u64 tree_log_transid; /* * this is used by the balancing code to wait for all the pending @@ -833,7 +820,14 @@ struct btrfs_root { struct kobject root_kobj; struct completion kobj_unregister; struct mutex objectid_mutex; + struct mutex log_mutex; + wait_queue_head_t log_writer_wait; + wait_queue_head_t log_commit_wait[2]; + atomic_t log_writers; + atomic_t log_commit[2]; + unsigned long log_transid; + unsigned long log_batch; u64 objectid; u64 last_trans; @@ -1841,6 +1835,10 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_init_path(struct btrfs_path *p); +void btrfs_set_path_blocking(struct btrfs_path *p); +void btrfs_clear_path_blocking(struct btrfs_path *p); +void btrfs_unlock_up_safe(struct btrfs_path *p, int level); + int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, int slot, int nr); int btrfs_del_leaf(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81a313874ae..5aebddd7119 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -16,7 +16,6 @@ * Boston, MA 021110-1307, USA. */ -#include <linux/version.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/scatterlist.h> @@ -800,7 +799,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) - buf->flags |= EXTENT_UPTODATE; + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); else WARN_ON(1); return buf; @@ -814,6 +813,10 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, if (btrfs_header_generation(buf) == root->fs_info->running_transaction->transid) { WARN_ON(!btrfs_tree_locked(buf)); + + /* ugh, clear_extent_buffer_dirty can be expensive */ + btrfs_set_lock_blocking(buf); + clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); } @@ -850,6 +853,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->list_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); + init_waitqueue_head(&root->log_writer_wait); + init_waitqueue_head(&root->log_commit_wait[0]); + init_waitqueue_head(&root->log_commit_wait[1]); + atomic_set(&root->log_commit[0], 0); + atomic_set(&root->log_commit[1], 0); + atomic_set(&root->log_writers, 0); + root->log_batch = 0; + root->log_transid = 0; extent_io_tree_init(&root->dirty_log_pages, fs_info->btree_inode->i_mapping, GFP_NOFS); @@ -934,15 +945,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, return 0; } -int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info) +static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) { struct btrfs_root *root; struct btrfs_root *tree_root = fs_info->tree_root; + struct extent_buffer *leaf; root = kzalloc(sizeof(*root), GFP_NOFS); if (!root) - return -ENOMEM; + return ERR_PTR(-ENOMEM); __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, @@ -951,12 +963,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID; root->root_key.type = BTRFS_ROOT_ITEM_KEY; root->root_key.offset = BTRFS_TREE_LOG_OBJECTID; + /* + * log trees do not get reference counted because they go away + * before a real commit is actually done. They do store pointers + * to file data extents, and those reference counts still get + * updated (along with back refs to the log tree). + */ root->ref_cows = 0; - root->node = btrfs_alloc_free_block(trans, root, root->leafsize, - 0, BTRFS_TREE_LOG_OBJECTID, - trans->transid, 0, 0, 0); + leaf = btrfs_alloc_free_block(trans, root, root->leafsize, + 0, BTRFS_TREE_LOG_OBJECTID, + trans->transid, 0, 0, 0); + if (IS_ERR(leaf)) { + kfree(root); + return ERR_CAST(leaf); + } + root->node = leaf; btrfs_set_header_nritems(root->node, 0); btrfs_set_header_level(root->node, 0); btrfs_set_header_bytenr(root->node, root->node->start); @@ -968,7 +991,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, BTRFS_FSID_SIZE); btrfs_mark_buffer_dirty(root->node); btrfs_tree_unlock(root->node); - fs_info->log_root_tree = root; + return root; +} + +int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *log_root; + + log_root = alloc_log_tree(trans, fs_info); + if (IS_ERR(log_root)) + return PTR_ERR(log_root); + WARN_ON(fs_info->log_root_tree); + fs_info->log_root_tree = log_root; + return 0; +} + +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *log_root; + struct btrfs_inode_item *inode_item; + + log_root = alloc_log_tree(trans, root->fs_info); + if (IS_ERR(log_root)) + return PTR_ERR(log_root); + + log_root->last_trans = trans->transid; + log_root->root_key.offset = root->root_key.objectid; + + inode_item = &log_root->root_item.inode; + inode_item->generation = cpu_to_le64(1); + inode_item->size = cpu_to_le64(3); + inode_item->nlink = cpu_to_le32(1); + inode_item->nbytes = cpu_to_le64(root->leafsize); + inode_item->mode = cpu_to_le32(S_IFDIR | 0755); + + btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start); + btrfs_set_root_generation(&log_root->root_item, trans->transid); + + WARN_ON(root->log_root); + root->log_root = log_root; + root->log_transid = 0; return 0; } @@ -1136,7 +1200,6 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) { struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data; int ret = 0; - struct list_head *cur; struct btrfs_device *device; struct backing_dev_info *bdi; #if 0 @@ -1144,8 +1207,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) btrfs_congested_async(info, 0)) return 1; #endif - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; bdi = blk_get_backing_dev_info(device->bdev); @@ -1163,13 +1225,11 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits) */ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page) { - struct list_head *cur; struct btrfs_device *device; struct btrfs_fs_info *info; info = (struct btrfs_fs_info *)bdi->unplug_io_data; - list_for_each(cur, &info->fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &info->fs_devices->devices, dev_list) { if (!device->bdev) continue; @@ -1447,7 +1507,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->delalloc_inodes); - spin_lock_init(&fs_info->hash_lock); spin_lock_init(&fs_info->delalloc_lock); spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->ref_cache_lock); @@ -1535,10 +1594,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); - init_waitqueue_head(&fs_info->tree_log_wait); - atomic_set(&fs_info->tree_log_commit, 0); - atomic_set(&fs_info->tree_log_writers, 0); - fs_info->tree_log_transid = 0; __setup_root(4096, 4096, 4096, 4096, tree_root, fs_info, BTRFS_ROOT_TREE_OBJECTID); @@ -1627,6 +1682,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, * low idle thresh */ fs_info->endio_workers.idle_thresh = 4; + fs_info->endio_meta_workers.idle_thresh = 4; + fs_info->endio_write_workers.idle_thresh = 64; fs_info->endio_meta_write_workers.idle_thresh = 64; @@ -1740,13 +1797,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, "btrfs-cleaner"); - if (!fs_info->cleaner_kthread) + if (IS_ERR(fs_info->cleaner_kthread)) goto fail_csum_root; fs_info->transaction_kthread = kthread_run(transaction_kthread, tree_root, "btrfs-transaction"); - if (!fs_info->transaction_kthread) + if (IS_ERR(fs_info->transaction_kthread)) goto fail_cleaner; if (btrfs_super_log_root(disk_super) != 0) { @@ -1828,13 +1885,14 @@ fail_sb_buffer: fail_iput: invalidate_inode_pages2(fs_info->btree_inode->i_mapping); iput(fs_info->btree_inode); -fail: + btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); + bdi_destroy(&fs_info->bdi); +fail: kfree(extent_root); kfree(tree_root); - bdi_destroy(&fs_info->bdi); kfree(fs_info); kfree(chunk_root); kfree(dev_root); @@ -1995,7 +2053,6 @@ static int write_dev_supers(struct btrfs_device *device, int write_all_supers(struct btrfs_root *root, int max_mirrors) { - struct list_head *cur; struct list_head *head = &root->fs_info->fs_devices->devices; struct btrfs_device *dev; struct btrfs_super_block *sb; @@ -2011,8 +2068,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) sb = &root->fs_info->super_for_commit; dev_item = &sb->dev_item; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; @@ -2045,8 +2101,7 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) } total_errors = 0; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; if (!dev->in_fs_metadata || !dev->writeable) @@ -2260,6 +2315,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) u64 transid = btrfs_header_generation(buf); struct inode *btree_inode = root->fs_info->btree_inode; + btrfs_set_lock_blocking(buf); + WARN_ON(!btrfs_tree_locked(buf)); if (transid != root->fs_info->generation) { printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " @@ -2302,14 +2359,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) int ret; ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); if (ret == 0) - buf->flags |= EXTENT_UPTODATE; + set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); return ret; } int btree_lock_page_hook(struct page *page) { struct inode *inode = page->mapping->host; - struct btrfs_root *root = BTRFS_I(inode)->root; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_buffer *eb; unsigned long len; @@ -2324,9 +2380,7 @@ int btree_lock_page_hook(struct page *page) goto out; btrfs_tree_lock(eb); - spin_lock(&root->fs_info->hash_lock); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); - spin_unlock(&root->fs_info->hash_lock); btrfs_tree_unlock(eb); free_extent_buffer(eb); out: diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c0ff404c31b..494a56eb298 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -98,5 +98,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); +int btrfs_add_log_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root); int btree_lock_page_hook(struct page *page); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 293da650873..7527523c2d2 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -19,7 +19,7 @@ #include <linux/pagemap.h> #include <linux/writeback.h> #include <linux/blkdev.h> -#include <linux/version.h> +#include <linux/sort.h> #include "compat.h" #include "hash.h" #include "crc32c.h" @@ -30,7 +30,6 @@ #include "volumes.h" #include "locking.h" #include "ref-cache.h" -#include "compat.h" #define PENDING_EXTENT_INSERT 0 #define PENDING_EXTENT_DELETE 1 @@ -326,10 +325,8 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, u64 flags) { struct list_head *head = &info->space_info; - struct list_head *cur; struct btrfs_space_info *found; - list_for_each(cur, head) { - found = list_entry(cur, struct btrfs_space_info, list); + list_for_each_entry(found, head, list) { if (found->flags == flags) return found; } @@ -1525,15 +1522,55 @@ out: return ret; } -int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *orig_buf, struct extent_buffer *buf, - u32 *nr_extents) +/* when a block goes through cow, we update the reference counts of + * everything that block points to. The internal pointers of the block + * can be in just about any order, and it is likely to have clusters of + * things that are close together and clusters of things that are not. + * + * To help reduce the seeks that come with updating all of these reference + * counts, sort them by byte number before actual updates are done. + * + * struct refsort is used to match byte number to slot in the btree block. + * we sort based on the byte number and then use the slot to actually + * find the item. + * + * struct refsort is smaller than strcut btrfs_item and smaller than + * struct btrfs_key_ptr. Since we're currently limited to the page size + * for a btree block, there's no way for a kmalloc of refsorts for a + * single node to be bigger than a page. + */ +struct refsort { + u64 bytenr; + u32 slot; +}; + +/* + * for passing into sort() + */ +static int refsort_cmp(const void *a_void, const void *b_void) +{ + const struct refsort *a = a_void; + const struct refsort *b = b_void; + + if (a->bytenr < b->bytenr) + return -1; + if (a->bytenr > b->bytenr) + return 1; + return 0; +} + + +noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *orig_buf, + struct extent_buffer *buf, u32 *nr_extents) { u64 bytenr; u64 ref_root; u64 orig_root; u64 ref_generation; u64 orig_generation; + struct refsort *sorted; u32 nritems; u32 nr_file_extents = 0; struct btrfs_key key; @@ -1542,6 +1579,8 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int level; int ret = 0; int faili = 0; + int refi = 0; + int slot; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, u64, u64, u64, u64, u64, u64, u64, u64); @@ -1553,6 +1592,9 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, nritems = btrfs_header_nritems(buf); level = btrfs_header_level(buf); + sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); + BUG_ON(!sorted); + if (root->ref_cows) { process_func = __btrfs_inc_extent_ref; } else { @@ -1565,6 +1607,11 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, process_func = __btrfs_update_extent_ref; } + /* + * we make two passes through the items. In the first pass we + * only record the byte number and slot. Then we sort based on + * byte number and do the actual work based on the sorted results + */ for (i = 0; i < nritems; i++) { cond_resched(); if (level == 0) { @@ -1581,6 +1628,32 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, continue; nr_file_extents++; + sorted[refi].bytenr = bytenr; + sorted[refi].slot = i; + refi++; + } else { + bytenr = btrfs_node_blockptr(buf, i); + sorted[refi].bytenr = bytenr; + sorted[refi].slot = i; + refi++; + } + } + /* + * if refi == 0, we didn't actually put anything into the sorted + * array and we're done + */ + if (refi == 0) + goto out; + + sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); + + for (i = 0; i < refi; i++) { + cond_resched(); + slot = sorted[i].slot; + bytenr = sorted[i].bytenr; + + if (level == 0) { + btrfs_item_key_to_cpu(buf, &key, slot); ret = process_func(trans, root, bytenr, orig_buf->start, buf->start, @@ -1589,25 +1662,25 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, key.objectid); if (ret) { - faili = i; + faili = slot; WARN_ON(1); goto fail; } } else { - bytenr = btrfs_node_blockptr(buf, i); ret = process_func(trans, root, bytenr, orig_buf->start, buf->start, orig_root, ref_root, orig_generation, ref_generation, level - 1); if (ret) { - faili = i; + faili = slot; WARN_ON(1); goto fail; } } } out: + kfree(sorted); if (nr_extents) { if (level == 0) *nr_extents = nr_file_extents; @@ -1616,6 +1689,7 @@ out: } return 0; fail: + kfree(sorted); WARN_ON(1); return ret; } @@ -2159,7 +2233,8 @@ again: ret = find_first_extent_bit(&info->extent_ins, search, &start, &end, EXTENT_WRITEBACK); if (ret) { - if (skipped && all && !num_inserts) { + if (skipped && all && !num_inserts && + list_empty(&update_list)) { skipped = 0; search = 0; continue; @@ -2547,6 +2622,7 @@ again: if (ret) { if (all && skipped && !nr) { search = 0; + skipped = 0; continue; } mutex_unlock(&info->extent_ins_mutex); @@ -2700,13 +2776,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, /* if metadata always pin */ if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { - struct btrfs_block_group_cache *cache; - - /* btrfs_free_reserved_extent */ - cache = btrfs_lookup_block_group(root->fs_info, bytenr); - BUG_ON(!cache); - btrfs_add_free_space(cache, bytenr, num_bytes); - put_block_group(cache); + mutex_lock(&root->fs_info->pinned_mutex); + btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); + mutex_unlock(&root->fs_info->pinned_mutex); update_reserved_extents(root, bytenr, num_bytes, 0); return 0; } @@ -3014,7 +3086,6 @@ loop_check: static void dump_space_info(struct btrfs_space_info *info, u64 bytes) { struct btrfs_block_group_cache *cache; - struct list_head *l; printk(KERN_INFO "space_info has %llu free, is %sfull\n", (unsigned long long)(info->total_bytes - info->bytes_used - @@ -3022,8 +3093,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes) (info->full) ? "" : "not "); down_read(&info->groups_sem); - list_for_each(l, &info->block_groups) { - cache = list_entry(l, struct btrfs_block_group_cache, list); + list_for_each_entry(cache, &info->block_groups, list) { spin_lock(&cache->lock); printk(KERN_INFO "block group %llu has %llu bytes, %llu used " "%llu pinned %llu reserved\n", @@ -3342,7 +3412,10 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, btrfs_set_header_generation(buf, trans->transid); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); + + btrfs_set_lock_blocking(buf); btrfs_set_buffer_uptodate(buf); + if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { set_extent_dirty(&root->dirty_log_pages, buf->start, buf->start + buf->len - 1, GFP_NOFS); @@ -3351,6 +3424,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, buf->start + buf->len - 1, GFP_NOFS); } trans->blocks_used++; + /* this returns a buffer locked for blocking */ return buf; } @@ -3388,36 +3462,73 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, { u64 leaf_owner; u64 leaf_generation; + struct refsort *sorted; struct btrfs_key key; struct btrfs_file_extent_item *fi; int i; int nritems; int ret; + int refi = 0; + int slot; BUG_ON(!btrfs_is_leaf(leaf)); nritems = btrfs_header_nritems(leaf); leaf_owner = btrfs_header_owner(leaf); leaf_generation = btrfs_header_generation(leaf); + sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); + /* we do this loop twice. The first time we build a list + * of the extents we have a reference on, then we sort the list + * by bytenr. The second time around we actually do the + * extent freeing. + */ for (i = 0; i < nritems; i++) { u64 disk_bytenr; cond_resched(); btrfs_item_key_to_cpu(leaf, &key, i); + + /* only extents have references, skip everything else */ if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) continue; + fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); + + /* inline extents live in the btree, they don't have refs */ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) continue; - /* - * FIXME make sure to insert a trans record that - * repeats the snapshot del on crash - */ + disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + + /* holes don't have refs */ if (disk_bytenr == 0) continue; + sorted[refi].bytenr = disk_bytenr; + sorted[refi].slot = i; + refi++; + } + + if (refi == 0) + goto out; + + sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); + + for (i = 0; i < refi; i++) { + u64 disk_bytenr; + + disk_bytenr = sorted[i].bytenr; + slot = sorted[i].slot; + + cond_resched(); + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) + continue; + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + ret = __btrfs_free_extent(trans, root, disk_bytenr, btrfs_file_extent_disk_num_bytes(leaf, fi), leaf->start, leaf_owner, leaf_generation, @@ -3428,6 +3539,8 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, wake_up(&root->fs_info->transaction_throttle); cond_resched(); } +out: + kfree(sorted); return 0; } @@ -3437,9 +3550,25 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, { int i; int ret; - struct btrfs_extent_info *info = ref->extents; + struct btrfs_extent_info *info; + struct refsort *sorted; + + if (ref->nritems == 0) + return 0; + sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); for (i = 0; i < ref->nritems; i++) { + sorted[i].bytenr = ref->extents[i].bytenr; + sorted[i].slot = i; + } + sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); + + /* + * the items in the ref were sorted when the ref was inserted + * into the ref cache, so this is already in order + */ + for (i = 0; i < ref->nritems; i++) { + info = ref->extents + sorted[i].slot; ret = __btrfs_free_extent(trans, root, info->bytenr, info->num_bytes, ref->bytenr, ref->owner, ref->generation, @@ -3453,6 +3582,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, info++; } + kfree(sorted); return 0; } @@ -3497,6 +3627,152 @@ static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, } /* + * this is used while deleting old snapshots, and it drops the refs + * on a whole subtree starting from a level 1 node. + * + * The idea is to sort all the leaf pointers, and then drop the + * ref on all the leaves in order. Most of the time the leaves + * will have ref cache entries, so no leaf IOs will be required to + * find the extents they have references on. + * + * For each leaf, any references it has are also dropped in order + * + * This ends up dropping the references in something close to optimal + * order for reading and modifying the extent allocation tree. + */ +static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path) +{ + u64 bytenr; + u64 root_owner; + u64 root_gen; + struct extent_buffer *eb = path->nodes[1]; + struct extent_buffer *leaf; + struct btrfs_leaf_ref *ref; + struct refsort *sorted = NULL; + int nritems = btrfs_header_nritems(eb); + int ret; + int i; + int refi = 0; + int slot = path->slots[1]; + u32 blocksize = btrfs_level_size(root, 0); + u32 refs; + + if (nritems == 0) + goto out; + + root_owner = btrfs_header_owner(eb); + root_gen = btrfs_header_generation(eb); + sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); + + /* + * step one, sort all the leaf pointers so we don't scribble + * randomly into the extent allocation tree + */ + for (i = slot; i < nritems; i++) { + sorted[refi].bytenr = btrfs_node_blockptr(eb, i); + sorted[refi].slot = i; + refi++; + } + + /* + * nritems won't be zero, but if we're picking up drop_snapshot + * after a crash, slot might be > 0, so double check things + * just in case. + */ + if (refi == 0) + goto out; + + sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); + + /* + * the first loop frees everything the leaves point to + */ + for (i = 0; i < refi; i++) { + u64 ptr_gen; + + bytenr = sorted[i].bytenr; + + /* + * check the reference count on this leaf. If it is > 1 + * we just decrement it below and don't update any + * of the refs the leaf points to. + */ + ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); + BUG_ON(ret); + if (refs != 1) + continue; + + ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); + + /* + * the leaf only had one reference, which means the + * only thing pointing to this leaf is the snapshot + * we're deleting. It isn't possible for the reference + * count to increase again later + * + * The reference cache is checked for the leaf, + * and if found we'll be able to drop any refs held by + * the leaf without needing to read it in. + */ + ref = btrfs_lookup_leaf_ref(root, bytenr); + if (ref && ref->generation != ptr_gen) { + btrfs_free_leaf_ref(root, ref); + ref = NULL; + } + if (ref) { + ret = cache_drop_leaf_ref(trans, root, ref); + BUG_ON(ret); + btrfs_remove_leaf_ref(root, ref); + btrfs_free_leaf_ref(root, ref); + } else { + /* + * the leaf wasn't in the reference cache, so + * we have to read it. + */ + leaf = read_tree_block(root, bytenr, blocksize, + ptr_gen); + ret = btrfs_drop_leaf_ref(trans, root, leaf); + BUG_ON(ret); + free_extent_buffer(leaf); + } + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + cond_resched(); + } + + /* + * run through the loop again to free the refs on the leaves. + * This is faster than doing it in the loop above because + * the leaves are likely to be clustered together. We end up + * working in nice chunks on the extent allocation tree. + */ + for (i = 0; i < refi; i++) { + bytenr = sorted[i].bytenr; + ret = __btrfs_free_extent(trans, root, bytenr, + blocksize, eb->start, + root_owner, root_gen, 0, 1); + BUG_ON(ret); + + atomic_inc(&root->fs_info->throttle_gen); + wake_up(&root->fs_info->transaction_throttle); + cond_resched(); + } +out: + kfree(sorted); + + /* + * update the path to show we've processed the entire level 1 + * node. This will get saved into the root's drop_snapshot_progress + * field so these drops are not repeated again if this transaction + * commits. + */ + path->slots[1] = nritems; + return 0; +} + +/* * helper function for drop_snapshot, this walks down the tree dropping ref * counts as it goes. */ @@ -3511,7 +3787,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, struct extent_buffer *next; struct extent_buffer *cur; struct extent_buffer *parent; - struct btrfs_leaf_ref *ref; u32 blocksize; int ret; u32 refs; @@ -3538,17 +3813,46 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, if (path->slots[*level] >= btrfs_header_nritems(cur)) break; + + /* the new code goes down to level 1 and does all the + * leaves pointed to that node in bulk. So, this check + * for level 0 will always be false. + * + * But, the disk format allows the drop_snapshot_progress + * field in the root to leave things in a state where + * a leaf will need cleaning up here. If someone crashes + * with the old code and then boots with the new code, + * we might find a leaf here. + */ if (*level == 0) { ret = btrfs_drop_leaf_ref(trans, root, cur); BUG_ON(ret); break; } + + /* + * once we get to level one, process the whole node + * at once, including everything below it. + */ + if (*level == 1) { + ret = drop_level_one_refs(trans, root, path); + BUG_ON(ret); + break; + } + bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); blocksize = btrfs_level_size(root, *level - 1); ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); BUG_ON(ret); + + /* + * if there is more than one reference, we don't need + * to read that node to drop any references it has. We + * just drop the ref we hold on that node and move on to the + * next slot in this level. + */ if (refs != 1) { parent = path->nodes[*level]; root_owner = btrfs_header_owner(parent); @@ -3567,46 +3871,12 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, continue; } + /* - * at this point, we have a single ref, and since the - * only place referencing this extent is a dead root - * the reference count should never go higher. - * So, we don't need to check it again + * we need to keep freeing things in the next level down. + * read the block and loop around to process it */ - if (*level == 1) { - ref = btrfs_lookup_leaf_ref(root, bytenr); - if (ref && ref->generation != ptr_gen) { - btrfs_free_leaf_ref(root, ref); - ref = NULL; - } - if (ref) { - ret = cache_drop_leaf_ref(trans, root, ref); - BUG_ON(ret); - btrfs_remove_leaf_ref(root, ref); - btrfs_free_leaf_ref(root, ref); - *level = 0; - break; - } - } - next = btrfs_find_tree_block(root, bytenr, blocksize); - if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) { - free_extent_buffer(next); - - next = read_tree_block(root, bytenr, blocksize, - ptr_gen); - cond_resched(); -#if 0 - /* - * this is a debugging check and can go away - * the ref should never go all the way down to 1 - * at this point - */ - ret = lookup_extent_ref(NULL, root, bytenr, blocksize, - &refs); - BUG_ON(ret); - WARN_ON(refs != 1); -#endif - } + next = read_tree_block(root, bytenr, blocksize, ptr_gen); WARN_ON(*level <= 0); if (path->nodes[*level-1]) free_extent_buffer(path->nodes[*level-1]); @@ -3631,11 +3901,16 @@ out: root_owner = btrfs_header_owner(parent); root_gen = btrfs_header_generation(parent); + /* + * cleanup and free the reference on the last node + * we processed + */ ret = __btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, root_owner, root_gen, *level, 1); free_extent_buffer(path->nodes[*level]); path->nodes[*level] = NULL; + *level += 1; BUG_ON(ret); @@ -3687,6 +3962,7 @@ static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, next = read_tree_block(root, bytenr, blocksize, ptr_gen); btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, &refs); @@ -3754,6 +4030,13 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { struct extent_buffer *node; struct btrfs_disk_key disk_key; + + /* + * there is more work to do in this level. + * Update the drop_progress marker to reflect + * the work we've done so far, and then bump + * the slot number + */ node = path->nodes[i]; path->slots[i]++; *level = i; @@ -3765,6 +4048,11 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, return 0; } else { struct extent_buffer *parent; + + /* + * this whole node is done, free our reference + * on it and go up one level + */ if (path->nodes[*level] == root->node) parent = path->nodes[*level]; else @@ -4444,7 +4732,7 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, u64 lock_end = 0; u64 num_bytes; u64 ext_offset; - u64 first_pos; + u64 search_end = (u64)-1; u32 nritems; int nr_scaned = 0; int extent_locked = 0; @@ -4452,7 +4740,6 @@ static noinline int replace_one_extent(struct btrfs_trans_handle *trans, int ret; memcpy(&key, leaf_key, sizeof(key)); - first_pos = INT_LIMIT(loff_t) - extent_key->offset; if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { if (key.objectid < ref_path->owner_objectid || (key.objectid == ref_path->owner_objectid && @@ -4501,7 +4788,7 @@ next: if ((key.objectid > ref_path->owner_objectid) || (key.objectid == ref_path->owner_objectid && key.type > BTRFS_EXTENT_DATA_KEY) || - (key.offset >= first_pos + extent_key->offset)) + key.offset >= search_end) break; } @@ -4534,8 +4821,10 @@ next: num_bytes = btrfs_file_extent_num_bytes(leaf, fi); ext_offset = btrfs_file_extent_offset(leaf, fi); - if (first_pos > key.offset - ext_offset) - first_pos = key.offset - ext_offset; + if (search_end == (u64)-1) { + search_end = key.offset - ext_offset + + btrfs_file_extent_ram_bytes(leaf, fi); + } if (!extent_locked) { lock_start = key.offset; @@ -4724,7 +5013,7 @@ next: } skip: if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && - key.offset >= first_pos + extent_key->offset) + key.offset >= search_end) break; cond_resched(); @@ -4778,6 +5067,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, ref->bytenr = buf->start; ref->owner = btrfs_header_owner(buf); ref->generation = btrfs_header_generation(buf); + ret = btrfs_add_leaf_ref(root, ref, 0); WARN_ON(ret); btrfs_free_leaf_ref(root, ref); @@ -5957,9 +6247,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); BUG_ON(!path); - btrfs_remove_free_space_cache(block_group); + spin_lock(&root->fs_info->block_group_cache_lock); rb_erase(&block_group->cache_node, &root->fs_info->block_group_cache_tree); + spin_unlock(&root->fs_info->block_group_cache_lock); + btrfs_remove_free_space_cache(block_group); down_write(&block_group->space_info->groups_sem); list_del(&block_group->list); up_write(&block_group->space_info->groups_sem); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e086d407f1f..37d43b516b7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -9,7 +9,6 @@ #include <linux/spinlock.h> #include <linux/blkdev.h> #include <linux/swap.h> -#include <linux/version.h> #include <linux/writeback.h> #include <linux/pagevec.h> #include "extent_io.h" @@ -31,7 +30,7 @@ static LIST_HEAD(buffers); static LIST_HEAD(states); #define LEAK_DEBUG 0 -#ifdef LEAK_DEBUG +#if LEAK_DEBUG static DEFINE_SPINLOCK(leak_lock); #endif @@ -120,7 +119,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, static struct extent_state *alloc_extent_state(gfp_t mask) { struct extent_state *state; -#ifdef LEAK_DEBUG +#if LEAK_DEBUG unsigned long flags; #endif @@ -130,7 +129,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) state->state = 0; state->private = 0; state->tree = NULL; -#ifdef LEAK_DEBUG +#if LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&state->leak_list, &states); spin_unlock_irqrestore(&leak_lock, flags); @@ -145,11 +144,11 @@ static void free_extent_state(struct extent_state *state) if (!state) return; if (atomic_dec_and_test(&state->refs)) { -#ifdef LEAK_DEBUG +#if LEAK_DEBUG unsigned long flags; #endif WARN_ON(state->tree); -#ifdef LEAK_DEBUG +#if LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_del(&state->leak_list); spin_unlock_irqrestore(&leak_lock, flags); @@ -2378,11 +2377,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, int scanned = 0; int range_whole = 0; - if (wbc->nonblocking && bdi_write_congested(bdi)) { - wbc->encountered_congestion = 1; - return 0; - } - pagevec_init(&pvec, 0); if (wbc->range_cyclic) { index = mapping->writeback_index; /* Start from prev offset */ @@ -2855,6 +2849,98 @@ out: return sector; } +int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len, get_extent_t *get_extent) +{ + int ret; + u64 off = start; + u64 max = start + len; + u32 flags = 0; + u64 disko = 0; + struct extent_map *em = NULL; + int end = 0; + u64 em_start = 0, em_len = 0; + unsigned long emflags; + ret = 0; + + if (len == 0) + return -EINVAL; + + lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, + GFP_NOFS); + em = get_extent(inode, NULL, 0, off, max - off, 0); + if (!em) + goto out; + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + while (!end) { + off = em->start + em->len; + if (off >= max) + end = 1; + + em_start = em->start; + em_len = em->len; + + disko = 0; + flags = 0; + + switch (em->block_start) { + case EXTENT_MAP_LAST_BYTE: + end = 1; + flags |= FIEMAP_EXTENT_LAST; + break; + case EXTENT_MAP_HOLE: + flags |= FIEMAP_EXTENT_UNWRITTEN; + break; + case EXTENT_MAP_INLINE: + flags |= (FIEMAP_EXTENT_DATA_INLINE | + FIEMAP_EXTENT_NOT_ALIGNED); + break; + case EXTENT_MAP_DELALLOC: + flags |= (FIEMAP_EXTENT_DELALLOC | + FIEMAP_EXTENT_UNKNOWN); + break; + default: + disko = em->block_start; + break; + } + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) + flags |= FIEMAP_EXTENT_ENCODED; + + emflags = em->flags; + free_extent_map(em); + em = NULL; + + if (!end) { + em = get_extent(inode, NULL, 0, off, max - off, 0); + if (!em) + goto out; + if (IS_ERR(em)) { + ret = PTR_ERR(em); + goto out; + } + emflags = em->flags; + } + if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) { + flags |= FIEMAP_EXTENT_LAST; + end = 1; + } + + ret = fiemap_fill_next_extent(fieinfo, em_start, disko, + em_len, flags); + if (ret) + goto out_free; + } +out_free: + free_extent_map(em); +out: + unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, + GFP_NOFS); + return ret; +} + static inline struct page *extent_buffer_page(struct extent_buffer *eb, unsigned long i) { @@ -2892,15 +2978,17 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, gfp_t mask) { struct extent_buffer *eb = NULL; -#ifdef LEAK_DEBUG +#if LEAK_DEBUG unsigned long flags; #endif eb = kmem_cache_zalloc(extent_buffer_cache, mask); eb->start = start; eb->len = len; - mutex_init(&eb->mutex); -#ifdef LEAK_DEBUG + spin_lock_init(&eb->lock); + init_waitqueue_head(&eb->lock_wq); + +#if LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); list_add(&eb->leak_list, &buffers); spin_unlock_irqrestore(&leak_lock, flags); @@ -2912,7 +3000,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, static void __free_extent_buffer(struct extent_buffer *eb) { -#ifdef LEAK_DEBUG +#if LEAK_DEBUG unsigned long flags; spin_lock_irqsave(&leak_lock, flags); list_del(&eb->leak_list); @@ -2980,8 +3068,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, unlock_page(p); } if (uptodate) - eb->flags |= EXTENT_UPTODATE; - eb->flags |= EXTENT_BUFFER_FILLED; + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); spin_lock(&tree->buffer_lock); exists = buffer_tree_insert(tree, start, &eb->rb_node); @@ -3135,7 +3222,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, unsigned long num_pages; num_pages = num_extent_pages(eb->start, eb->len); - eb->flags &= ~EXTENT_UPTODATE; + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); @@ -3206,7 +3293,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, struct page *page; int pg_uptodate = 1; - if (eb->flags & EXTENT_UPTODATE) + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 1; ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, @@ -3242,7 +3329,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, struct bio *bio = NULL; unsigned long bio_flags = 0; - if (eb->flags & EXTENT_UPTODATE) + if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, @@ -3273,7 +3360,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (all_uptodate) { if (start_i == 0) - eb->flags |= EXTENT_UPTODATE; + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); goto unlock_exit; } @@ -3309,7 +3396,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } if (!ret) - eb->flags |= EXTENT_UPTODATE; + set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); return ret; unlock_exit: @@ -3406,7 +3493,6 @@ int map_extent_buffer(struct extent_buffer *eb, unsigned long start, unmap_extent_buffer(eb, eb->map_token, km); eb->map_token = NULL; save = 1; - WARN_ON(!mutex_is_locked(&eb->mutex)); } err = map_private_extent_buffer(eb, start, min_len, token, map, map_start, map_len, km); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c5b483a7913..1f9df88afbf 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -22,6 +22,10 @@ /* flags for bio submission */ #define EXTENT_BIO_COMPRESSED 1 +/* these are bit numbers for test/set bit */ +#define EXTENT_BUFFER_UPTODATE 0 +#define EXTENT_BUFFER_BLOCKING 1 + /* * page->private values. Every page that is controlled by the extent * map has page->private set to one. @@ -95,11 +99,19 @@ struct extent_buffer { unsigned long map_start; unsigned long map_len; struct page *first_page; + unsigned long bflags; atomic_t refs; - int flags; struct list_head leak_list; struct rb_node rb_node; - struct mutex mutex; + + /* the spinlock is used to protect most operations */ + spinlock_t lock; + + /* + * when we keep the lock held while blocking, waiters go onto + * the wq + */ + wait_queue_head_t lock_wq; }; struct extent_map_tree; @@ -193,6 +205,8 @@ int extent_commit_write(struct extent_io_tree *tree, unsigned from, unsigned to); sector_t extent_bmap(struct address_space *mapping, sector_t iblock, get_extent_t *get_extent); +int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len, get_extent_t *get_extent); int set_range_dirty(struct extent_io_tree *tree, u64 start, u64 end); int set_state_private(struct extent_io_tree *tree, u64 start, u64 private); int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 4a83e33ada3..50da69da20c 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -3,7 +3,6 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/spinlock.h> -#include <linux/version.h> #include <linux/hardirq.h> #include "extent_map.h" diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 90268334145..3e8023efaff 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -29,7 +29,6 @@ #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> -#include <linux/version.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -1215,10 +1214,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) } mutex_unlock(&root->fs_info->trans_mutex); - root->fs_info->tree_log_batch++; + root->log_batch++; filemap_fdatawrite(inode->i_mapping); btrfs_wait_ordered_range(inode, 0, (u64)-1); - root->fs_info->tree_log_batch++; + root->log_batch++; /* * ok we haven't committed the transaction yet, lets do a commit diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8adfe059ab4..8f0706210a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -34,7 +34,6 @@ #include <linux/statfs.h> #include <linux/compat.h> #include <linux/bit_spinlock.h> -#include <linux/version.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <linux/falloc.h> @@ -51,6 +50,7 @@ #include "tree-log.h" #include "ref-cache.h" #include "compression.h" +#include "locking.h" struct btrfs_iget_args { u64 ino; @@ -91,6 +91,16 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); +static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) +{ + int err; + + err = btrfs_init_acl(inode, dir); + if (!err) + err = btrfs_xattr_security_init(inode, dir); + return err; +} + /* * a very lame attempt at stopping writes when the FS is 85% full. There * are countless ways this is incorrect, but it is better than nothing. @@ -350,6 +360,19 @@ again: nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); + /* + * we don't want to send crud past the end of i_size through + * compression, that's just a waste of CPU time. So, if the + * end of the file is before the start of our current + * requested range of bytes, we bail out to the uncompressed + * cleanup code that can deal with all of this. + * + * It isn't really the fastest way to fix things, but this is a + * very uncommon corner. + */ + if (actual_end <= start) + goto cleanup_and_bail_uncompressed; + total_compressed = actual_end - start; /* we want to make sure that amount of ram required to uncompress @@ -494,6 +517,7 @@ again: goto again; } } else { +cleanup_and_bail_uncompressed: /* * No compression, but we still need to write the pages in * the file we've been given so far. redirty the locked @@ -1324,12 +1348,11 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_offset, struct list_head *list) { - struct list_head *cur; struct btrfs_ordered_sum *sum; btrfs_set_trans_block_group(trans, inode); - list_for_each(cur, list) { - sum = list_entry(cur, struct btrfs_ordered_sum, list); + + list_for_each_entry(sum, list, list) { btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root->fs_info->csum_root, sum); } @@ -2013,6 +2036,7 @@ void btrfs_read_locked_inode(struct inode *inode) BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); alloc_group_block = btrfs_inode_block_group(leaf, inode_item); + BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_group_block, 0); btrfs_free_path(path); @@ -2039,6 +2063,7 @@ void btrfs_read_locked_inode(struct inode *inode) inode->i_mapping->backing_dev_info = &root->fs_info->bdi; break; default: + inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); break; } @@ -2108,6 +2133,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, goto failed; } + btrfs_unlock_up_safe(path, 1); leaf = path->nodes[0]; inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); @@ -2429,6 +2455,8 @@ next_node: ref->generation = leaf_gen; ref->nritems = 0; + btrfs_sort_leaf_ref(ref); + ret = btrfs_add_leaf_ref(root, ref, 0); WARN_ON(ret); btrfs_free_leaf_ref(root, ref); @@ -2476,7 +2504,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_path *path; struct btrfs_key key; struct btrfs_key found_key; - u32 found_type; + u32 found_type = (u8)-1; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; u64 extent_start = 0; @@ -2663,6 +2691,8 @@ next: if (pending_del_nr) goto del_pending; btrfs_release_path(root, path); + if (found_type == BTRFS_INODE_ITEM_KEY) + break; goto search_again; } @@ -2679,6 +2709,8 @@ del_pending: BUG_ON(ret); pending_del_nr = 0; btrfs_release_path(root, path); + if (found_type == BTRFS_INODE_ITEM_KEY) + break; goto search_again; } } @@ -3265,7 +3297,7 @@ skip: /* Reached end of directory/root. Bump pos past the last item. */ if (key_type == BTRFS_DIR_INDEX_KEY) - filp->f_pos = INT_LIMIT(typeof(filp->f_pos)); + filp->f_pos = INT_LIMIT(off_t); else filp->f_pos++; nopos: @@ -3458,7 +3490,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, root->highest_inode = objectid; inode->i_uid = current_fsuid(); - inode->i_gid = current_fsgid(); + + if (dir && (dir->i_mode & S_ISGID)) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else + inode->i_gid = current_fsgid(); + inode->i_mode = mode; inode->i_ino = objectid; inode_set_bytes(inode, 0); @@ -3586,7 +3625,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_inode_security(inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -3649,7 +3688,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_inode_security(inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -3772,7 +3811,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_inode_security(inode, dir); if (err) goto out_fail; @@ -4158,9 +4197,10 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, return -EINVAL; } -static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock) +static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) { - return extent_bmap(mapping, iblock, btrfs_get_extent); + return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent); } int btrfs_readpage(struct file *file, struct page *page) @@ -4733,7 +4773,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_inode_security(inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4987,13 +5027,24 @@ static struct extent_io_ops btrfs_extent_io_ops = { .clear_bit_hook = btrfs_clear_bit_hook, }; +/* + * btrfs doesn't support the bmap operation because swapfiles + * use bmap to make a mapping of extents in the file. They assume + * these extents won't change over the life of the file and they + * use the bmap result to do IO directly to the drive. + * + * the btrfs bmap call would return logical addresses that aren't + * suitable for IO and they also will change frequently as COW + * operations happen. So, swapfile + btrfs == corruption. + * + * For now we're avoiding this by dropping bmap. + */ static struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, .readpages = btrfs_readpages, .sync_page = block_sync_page, - .bmap = btrfs_bmap, .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, @@ -5017,6 +5068,7 @@ static struct inode_operations btrfs_file_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, .fallocate = btrfs_fallocate, + .fiemap = btrfs_fiemap, }; static struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -5032,4 +5084,8 @@ static struct inode_operations btrfs_symlink_inode_operations = { .follow_link = page_follow_link_light, .put_link = page_put_link, .permission = btrfs_permission, + .setxattr = btrfs_setxattr, + .getxattr = btrfs_getxattr, + .listxattr = btrfs_listxattr, + .removexattr = btrfs_removexattr, }; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c2aa33e3feb..988fdc8b49e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -38,7 +38,6 @@ #include <linux/compat.h> #include <linux/bit_spinlock.h> #include <linux/security.h> -#include <linux/version.h> #include <linux/xattr.h> #include <linux/vmalloc.h> #include "compat.h" diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 39bae7761db..68fd9ccf180 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -26,45 +26,215 @@ #include "locking.h" /* - * locks the per buffer mutex in an extent buffer. This uses adaptive locks - * and the spin is not tuned very extensively. The spinning does make a big - * difference in almost every workload, but spinning for the right amount of - * time needs some help. - * - * In general, we want to spin as long as the lock holder is doing btree - * searches, and we should give up if they are in more expensive code. + * btrfs_header_level() isn't free, so don't call it when lockdep isn't + * on */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void spin_nested(struct extent_buffer *eb) +{ + spin_lock_nested(&eb->lock, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); +} +#else +static inline void spin_nested(struct extent_buffer *eb) +{ + spin_lock(&eb->lock); +} +#endif -int btrfs_tree_lock(struct extent_buffer *eb) +/* + * Setting a lock to blocking will drop the spinlock and set the + * flag that forces other procs who want the lock to wait. After + * this you can safely schedule with the lock held. + */ +void btrfs_set_lock_blocking(struct extent_buffer *eb) { - int i; + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { + set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); + spin_unlock(&eb->lock); + } + /* exit with the spin lock released and the bit set */ +} - if (mutex_trylock(&eb->mutex)) - return 0; +/* + * clearing the blocking flag will take the spinlock again. + * After this you can't safely schedule + */ +void btrfs_clear_lock_blocking(struct extent_buffer *eb) +{ + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { + spin_nested(eb); + clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); + smp_mb__after_clear_bit(); + } + /* exit with the spin lock held */ +} + +/* + * unfortunately, many of the places that currently set a lock to blocking + * don't end up blocking for every long, and often they don't block + * at all. For a dbench 50 run, if we don't spin one the blocking bit + * at all, the context switch rate can jump up to 400,000/sec or more. + * + * So, we're still stuck with this crummy spin on the blocking bit, + * at least until the most common causes of the short blocks + * can be dealt with. + */ +static int btrfs_spin_on_block(struct extent_buffer *eb) +{ + int i; for (i = 0; i < 512; i++) { cpu_relax(); - if (mutex_trylock(&eb->mutex)) + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + return 1; + if (need_resched()) + break; + } + return 0; +} + +/* + * This is somewhat different from trylock. It will take the + * spinlock but if it finds the lock is set to blocking, it will + * return without the lock held. + * + * returns 1 if it was able to take the lock and zero otherwise + * + * After this call, scheduling is not safe without first calling + * btrfs_set_lock_blocking() + */ +int btrfs_try_spin_lock(struct extent_buffer *eb) +{ + int i; + + spin_nested(eb); + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + return 1; + spin_unlock(&eb->lock); + + /* spin for a bit on the BLOCKING flag */ + for (i = 0; i < 2; i++) { + if (!btrfs_spin_on_block(eb)) + break; + + spin_nested(eb); + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + return 1; + spin_unlock(&eb->lock); + } + return 0; +} + +/* + * the autoremove wake function will return 0 if it tried to wake up + * a process that was already awake, which means that process won't + * count as an exclusive wakeup. The waitq code will continue waking + * procs until it finds one that was actually sleeping. + * + * For btrfs, this isn't quite what we want. We want a single proc + * to be notified that the lock is ready for taking. If that proc + * already happen to be awake, great, it will loop around and try for + * the lock. + * + * So, btrfs_wake_function always returns 1, even when the proc that we + * tried to wake up was already awake. + */ +static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, + int sync, void *key) +{ + autoremove_wake_function(wait, mode, sync, key); + return 1; +} + +/* + * returns with the extent buffer spinlocked. + * + * This will spin and/or wait as required to take the lock, and then + * return with the spinlock held. + * + * After this call, scheduling is not safe without first calling + * btrfs_set_lock_blocking() + */ +int btrfs_tree_lock(struct extent_buffer *eb) +{ + DEFINE_WAIT(wait); + wait.func = btrfs_wake_function; + + while(1) { + spin_nested(eb); + + /* nobody is blocking, exit with the spinlock held */ + if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) return 0; + + /* + * we have the spinlock, but the real owner is blocking. + * wait for them + */ + spin_unlock(&eb->lock); + + /* + * spin for a bit, and if the blocking flag goes away, + * loop around + */ + if (btrfs_spin_on_block(eb)) + continue; + + prepare_to_wait_exclusive(&eb->lock_wq, &wait, + TASK_UNINTERRUPTIBLE); + + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + schedule(); + + finish_wait(&eb->lock_wq, &wait); } - cpu_relax(); - mutex_lock_nested(&eb->mutex, BTRFS_MAX_LEVEL - btrfs_header_level(eb)); return 0; } +/* + * Very quick trylock, this does not spin or schedule. It returns + * 1 with the spinlock held if it was able to take the lock, or it + * returns zero if it was unable to take the lock. + * + * After this call, scheduling is not safe without first calling + * btrfs_set_lock_blocking() + */ int btrfs_try_tree_lock(struct extent_buffer *eb) { - return mutex_trylock(&eb->mutex); + if (spin_trylock(&eb->lock)) { + if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { + /* + * we've got the spinlock, but the real owner is + * blocking. Drop the spinlock and return failure + */ + spin_unlock(&eb->lock); + return 0; + } + return 1; + } + /* someone else has the spinlock giveup */ + return 0; } int btrfs_tree_unlock(struct extent_buffer *eb) { - mutex_unlock(&eb->mutex); + /* + * if we were a blocking owner, we don't have the spinlock held + * just clear the bit and look for waiters + */ + if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) + smp_mb__after_clear_bit(); + else + spin_unlock(&eb->lock); + + if (waitqueue_active(&eb->lock_wq)) + wake_up(&eb->lock_wq); return 0; } int btrfs_tree_locked(struct extent_buffer *eb) { - return mutex_is_locked(&eb->mutex); + return test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags) || + spin_is_locked(&eb->lock); } /* @@ -75,12 +245,14 @@ int btrfs_path_lock_waiting(struct btrfs_path *path, int level) { int i; struct extent_buffer *eb; + for (i = level; i <= level + 1 && i < BTRFS_MAX_LEVEL; i++) { eb = path->nodes[i]; if (!eb) break; smp_mb(); - if (!list_empty(&eb->mutex.wait_list)) + if (spin_is_contended(&eb->lock) || + waitqueue_active(&eb->lock_wq)) return 1; } return 0; diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index bc1faef1251..d92e707f587 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -22,6 +22,12 @@ int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_tree_locked(struct extent_buffer *eb); + int btrfs_try_tree_lock(struct extent_buffer *eb); +int btrfs_try_spin_lock(struct extent_buffer *eb); + int btrfs_path_lock_waiting(struct btrfs_path *path, int level); + +void btrfs_set_lock_blocking(struct extent_buffer *eb); +void btrfs_clear_lock_blocking(struct extent_buffer *eb); #endif diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a2094017027..77c2411a5f0 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -613,7 +613,6 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, struct btrfs_sector_sum *sector_sums; struct btrfs_ordered_extent *ordered; struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; - struct list_head *cur; unsigned long num_sectors; unsigned long i; u32 sectorsize = BTRFS_I(inode)->root->sectorsize; @@ -624,8 +623,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, return 1; mutex_lock(&tree->mutex); - list_for_each_prev(cur, &ordered->list) { - ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); + list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { if (disk_bytenr >= ordered_sum->bytenr) { num_sectors = ordered_sum->len / sectorsize; sector_sums = ordered_sum->sums; diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index 6f0acc4c9ea..d0cc62bccb9 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c @@ -17,6 +17,7 @@ */ #include <linux/sched.h> +#include <linux/sort.h> #include "ctree.h" #include "ref-cache.h" #include "transaction.h" diff --git a/fs/btrfs/ref-cache.h b/fs/btrfs/ref-cache.h index 16f3183d7c5..bc283ad2db7 100644 --- a/fs/btrfs/ref-cache.h +++ b/fs/btrfs/ref-cache.h @@ -73,5 +73,4 @@ int btrfs_add_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref, int btrfs_remove_leaf_refs(struct btrfs_root *root, u64 max_root_gen, int shared); int btrfs_remove_leaf_ref(struct btrfs_root *root, struct btrfs_leaf_ref *ref); - #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index db9fb3bc1e3..f3fd7e2cbc3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -37,7 +37,6 @@ #include <linux/ctype.h> #include <linux/namei.h> #include <linux/miscdevice.h> -#include <linux/version.h> #include <linux/magic.h> #include "compat.h" #include "ctree.h" @@ -583,17 +582,18 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, struct btrfs_ioctl_vol_args *vol; struct btrfs_fs_devices *fs_devices; int ret = -ENOTTY; - int len; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol = kmalloc(sizeof(*vol), GFP_KERNEL); + if (!vol) + return -ENOMEM; + if (copy_from_user(vol, (void __user *)arg, sizeof(*vol))) { ret = -EFAULT; goto out; } - len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); switch (cmd) { case BTRFS_IOC_SCAN_DEV: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8a08f944334..919172de5c9 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -852,11 +852,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, { struct btrfs_pending_snapshot *pending; struct list_head *head = &trans->transaction->pending_snapshots; - struct list_head *cur; int ret; - list_for_each(cur, head) { - pending = list_entry(cur, struct btrfs_pending_snapshot, list); + list_for_each_entry(pending, head, list) { ret = create_pending_snapshot(trans, fs_info, pending); BUG_ON(ret); } diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3e8358c3616..98d25fa4570 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -74,6 +74,7 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, u32 nritems; root_node = btrfs_lock_root_node(root); + btrfs_set_lock_blocking(root_node); nritems = btrfs_header_nritems(root_node); root->defrag_max.objectid = 0; /* from above we know this is not a leaf */ diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d81cda2e077..20794290256 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -78,104 +78,6 @@ static int link_to_fixup_dir(struct btrfs_trans_handle *trans, */ /* - * btrfs_add_log_tree adds a new per-subvolume log tree into the - * tree of log tree roots. This must be called with a tree log transaction - * running (see start_log_trans). - */ -static int btrfs_add_log_tree(struct btrfs_trans_handle *trans, - struct btrfs_root *root) -{ - struct btrfs_key key; - struct btrfs_root_item root_item; - struct btrfs_inode_item *inode_item; - struct extent_buffer *leaf; - struct btrfs_root *new_root = root; - int ret; - u64 objectid = root->root_key.objectid; - - leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0, - BTRFS_TREE_LOG_OBJECTID, - trans->transid, 0, 0, 0); - if (IS_ERR(leaf)) { - ret = PTR_ERR(leaf); - return ret; - } - - btrfs_set_header_nritems(leaf, 0); - btrfs_set_header_level(leaf, 0); - btrfs_set_header_bytenr(leaf, leaf->start); - btrfs_set_header_generation(leaf, trans->transid); - btrfs_set_header_owner(leaf, BTRFS_TREE_LOG_OBJECTID); - - write_extent_buffer(leaf, root->fs_info->fsid, - (unsigned long)btrfs_header_fsid(leaf), - BTRFS_FSID_SIZE); - btrfs_mark_buffer_dirty(leaf); - - inode_item = &root_item.inode; - memset(inode_item, 0, sizeof(*inode_item)); - inode_item->generation = cpu_to_le64(1); - inode_item->size = cpu_to_le64(3); - inode_item->nlink = cpu_to_le32(1); - inode_item->nbytes = cpu_to_le64(root->leafsize); - inode_item->mode = cpu_to_le32(S_IFDIR | 0755); - - btrfs_set_root_bytenr(&root_item, leaf->start); - btrfs_set_root_generation(&root_item, trans->transid); - btrfs_set_root_level(&root_item, 0); - btrfs_set_root_refs(&root_item, 0); - btrfs_set_root_used(&root_item, 0); - - memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); - root_item.drop_level = 0; - - btrfs_tree_unlock(leaf); - free_extent_buffer(leaf); - leaf = NULL; - - btrfs_set_root_dirid(&root_item, 0); - - key.objectid = BTRFS_TREE_LOG_OBJECTID; - key.offset = objectid; - btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); - ret = btrfs_insert_root(trans, root->fs_info->log_root_tree, &key, - &root_item); - if (ret) - goto fail; - - new_root = btrfs_read_fs_root_no_radix(root->fs_info->log_root_tree, - &key); - BUG_ON(!new_root); - - WARN_ON(root->log_root); - root->log_root = new_root; - - /* - * log trees do not get reference counted because they go away - * before a real commit is actually done. They do store pointers - * to file data extents, and those reference counts still get - * updated (along with back refs to the log tree). - */ - new_root->ref_cows = 0; - new_root->last_trans = trans->transid; - - /* - * we need to make sure the root block for this new tree - * is marked as dirty in the dirty_log_pages tree. This - * is how it gets flushed down to disk at tree log commit time. - * - * the tree logging mutex keeps others from coming in and changing - * the new_root->node, so we can safely access it here - */ - set_extent_dirty(&new_root->dirty_log_pages, new_root->node->start, - new_root->node->start + new_root->node->len - 1, - GFP_NOFS); - -fail: - return ret; -} - -/* * start a sub transaction and setup the log tree * this increments the log tree writer count to make the people * syncing the tree wait for us to finish @@ -184,6 +86,14 @@ static int start_log_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { int ret; + + mutex_lock(&root->log_mutex); + if (root->log_root) { + root->log_batch++; + atomic_inc(&root->log_writers); + mutex_unlock(&root->log_mutex); + return 0; + } mutex_lock(&root->fs_info->tree_log_mutex); if (!root->fs_info->log_root_tree) { ret = btrfs_init_log_root_tree(trans, root->fs_info); @@ -193,9 +103,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans, ret = btrfs_add_log_tree(trans, root); BUG_ON(ret); } - atomic_inc(&root->fs_info->tree_log_writers); - root->fs_info->tree_log_batch++; mutex_unlock(&root->fs_info->tree_log_mutex); + root->log_batch++; + atomic_inc(&root->log_writers); + mutex_unlock(&root->log_mutex); return 0; } @@ -212,13 +123,12 @@ static int join_running_log_trans(struct btrfs_root *root) if (!root->log_root) return -ENOENT; - mutex_lock(&root->fs_info->tree_log_mutex); + mutex_lock(&root->log_mutex); if (root->log_root) { ret = 0; - atomic_inc(&root->fs_info->tree_log_writers); - root->fs_info->tree_log_batch++; + atomic_inc(&root->log_writers); } - mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->log_mutex); return ret; } @@ -228,10 +138,11 @@ static int join_running_log_trans(struct btrfs_root *root) */ static int end_log_trans(struct btrfs_root *root) { - atomic_dec(&root->fs_info->tree_log_writers); - smp_mb(); - if (waitqueue_active(&root->fs_info->tree_log_wait)) - wake_up(&root->fs_info->tree_log_wait); + if (atomic_dec_and_test(&root->log_writers)) { + smp_mb(); + if (waitqueue_active(&root->log_writer_wait)) + wake_up(&root->log_writer_wait); + } return 0; } @@ -1704,6 +1615,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); clean_tree_block(trans, root, next); + btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1750,6 +1662,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[*level]; btrfs_tree_lock(next); clean_tree_block(trans, root, next); + btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1807,6 +1720,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); clean_tree_block(trans, root, next); + btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1879,6 +1793,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, btrfs_tree_lock(next); clean_tree_block(trans, log, next); + btrfs_set_lock_blocking(next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1902,26 +1817,65 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, } } btrfs_free_path(path); - if (wc->free) - free_extent_buffer(log->node); return ret; } -static int wait_log_commit(struct btrfs_root *log) +/* + * helper function to update the item for a given subvolumes log root + * in the tree of log roots + */ +static int update_log_root(struct btrfs_trans_handle *trans, + struct btrfs_root *log) +{ + int ret; + + if (log->log_transid == 1) { + /* insert root item on the first sync */ + ret = btrfs_insert_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + } else { + ret = btrfs_update_root(trans, log->fs_info->log_root_tree, + &log->root_key, &log->root_item); + } + return ret; +} + +static int wait_log_commit(struct btrfs_root *root, unsigned long transid) { DEFINE_WAIT(wait); - u64 transid = log->fs_info->tree_log_transid; + int index = transid % 2; + /* + * we only allow two pending log transactions at a time, + * so we know that if ours is more than 2 older than the + * current transaction, we're done + */ do { - prepare_to_wait(&log->fs_info->tree_log_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&log->fs_info->tree_log_mutex); - if (atomic_read(&log->fs_info->tree_log_commit)) + prepare_to_wait(&root->log_commit_wait[index], + &wait, TASK_UNINTERRUPTIBLE); + mutex_unlock(&root->log_mutex); + if (root->log_transid < transid + 2 && + atomic_read(&root->log_commit[index])) schedule(); - finish_wait(&log->fs_info->tree_log_wait, &wait); - mutex_lock(&log->fs_info->tree_log_mutex); - } while (transid == log->fs_info->tree_log_transid && - atomic_read(&log->fs_info->tree_log_commit)); + finish_wait(&root->log_commit_wait[index], &wait); + mutex_lock(&root->log_mutex); + } while (root->log_transid < transid + 2 && + atomic_read(&root->log_commit[index])); + return 0; +} + +static int wait_for_writer(struct btrfs_root *root) +{ + DEFINE_WAIT(wait); + while (atomic_read(&root->log_writers)) { + prepare_to_wait(&root->log_writer_wait, + &wait, TASK_UNINTERRUPTIBLE); + mutex_unlock(&root->log_mutex); + if (atomic_read(&root->log_writers)) + schedule(); + mutex_lock(&root->log_mutex); + finish_wait(&root->log_writer_wait, &wait); + } return 0; } @@ -1933,57 +1887,114 @@ static int wait_log_commit(struct btrfs_root *log) int btrfs_sync_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) { + int index1; + int index2; int ret; - unsigned long batch; struct btrfs_root *log = root->log_root; + struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; - mutex_lock(&log->fs_info->tree_log_mutex); - if (atomic_read(&log->fs_info->tree_log_commit)) { - wait_log_commit(log); - goto out; + mutex_lock(&root->log_mutex); + index1 = root->log_transid % 2; + if (atomic_read(&root->log_commit[index1])) { + wait_log_commit(root, root->log_transid); + mutex_unlock(&root->log_mutex); + return 0; } - atomic_set(&log->fs_info->tree_log_commit, 1); + atomic_set(&root->log_commit[index1], 1); + + /* wait for previous tree log sync to complete */ + if (atomic_read(&root->log_commit[(index1 + 1) % 2])) + wait_log_commit(root, root->log_transid - 1); while (1) { - batch = log->fs_info->tree_log_batch; - mutex_unlock(&log->fs_info->tree_log_mutex); + unsigned long batch = root->log_batch; + mutex_unlock(&root->log_mutex); schedule_timeout_uninterruptible(1); - mutex_lock(&log->fs_info->tree_log_mutex); - - while (atomic_read(&log->fs_info->tree_log_writers)) { - DEFINE_WAIT(wait); - prepare_to_wait(&log->fs_info->tree_log_wait, &wait, - TASK_UNINTERRUPTIBLE); - mutex_unlock(&log->fs_info->tree_log_mutex); - if (atomic_read(&log->fs_info->tree_log_writers)) - schedule(); - mutex_lock(&log->fs_info->tree_log_mutex); - finish_wait(&log->fs_info->tree_log_wait, &wait); - } - if (batch == log->fs_info->tree_log_batch) + mutex_lock(&root->log_mutex); + wait_for_writer(root); + if (batch == root->log_batch) break; } ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); BUG_ON(ret); - ret = btrfs_write_and_wait_marked_extents(root->fs_info->log_root_tree, - &root->fs_info->log_root_tree->dirty_log_pages); + + btrfs_set_root_bytenr(&log->root_item, log->node->start); + btrfs_set_root_generation(&log->root_item, trans->transid); + btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); + + root->log_batch = 0; + root->log_transid++; + log->log_transid = root->log_transid; + smp_mb(); + /* + * log tree has been flushed to disk, new modifications of + * the log will be written to new positions. so it's safe to + * allow log writers to go in. + */ + mutex_unlock(&root->log_mutex); + + mutex_lock(&log_root_tree->log_mutex); + log_root_tree->log_batch++; + atomic_inc(&log_root_tree->log_writers); + mutex_unlock(&log_root_tree->log_mutex); + + ret = update_log_root(trans, log); + BUG_ON(ret); + + mutex_lock(&log_root_tree->log_mutex); + if (atomic_dec_and_test(&log_root_tree->log_writers)) { + smp_mb(); + if (waitqueue_active(&log_root_tree->log_writer_wait)) + wake_up(&log_root_tree->log_writer_wait); + } + + index2 = log_root_tree->log_transid % 2; + if (atomic_read(&log_root_tree->log_commit[index2])) { + wait_log_commit(log_root_tree, log_root_tree->log_transid); + mutex_unlock(&log_root_tree->log_mutex); + goto out; + } + atomic_set(&log_root_tree->log_commit[index2], 1); + + if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) + wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); + + wait_for_writer(log_root_tree); + + ret = btrfs_write_and_wait_marked_extents(log_root_tree, + &log_root_tree->dirty_log_pages); BUG_ON(ret); btrfs_set_super_log_root(&root->fs_info->super_for_commit, - log->fs_info->log_root_tree->node->start); + log_root_tree->node->start); btrfs_set_super_log_root_level(&root->fs_info->super_for_commit, - btrfs_header_level(log->fs_info->log_root_tree->node)); + btrfs_header_level(log_root_tree->node)); + + log_root_tree->log_batch = 0; + log_root_tree->log_transid++; + smp_mb(); + + mutex_unlock(&log_root_tree->log_mutex); + + /* + * nobody else is going to jump in and write the the ctree + * super here because the log_commit atomic below is protecting + * us. We must be called with a transaction handle pinning + * the running transaction open, so a full commit can't hop + * in and cause problems either. + */ + write_ctree_super(trans, root->fs_info->tree_root, 2); - write_ctree_super(trans, log->fs_info->tree_root, 2); - log->fs_info->tree_log_transid++; - log->fs_info->tree_log_batch = 0; - atomic_set(&log->fs_info->tree_log_commit, 0); + atomic_set(&log_root_tree->log_commit[index2], 0); smp_mb(); - if (waitqueue_active(&log->fs_info->tree_log_wait)) - wake_up(&log->fs_info->tree_log_wait); + if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) + wake_up(&log_root_tree->log_commit_wait[index2]); out: - mutex_unlock(&log->fs_info->tree_log_mutex); + atomic_set(&root->log_commit[index1], 0); + smp_mb(); + if (waitqueue_active(&root->log_commit_wait[index1])) + wake_up(&root->log_commit_wait[index1]); return 0; } @@ -2019,38 +2030,18 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) start, end, GFP_NOFS); } - log = root->log_root; - ret = btrfs_del_root(trans, root->fs_info->log_root_tree, - &log->root_key); - BUG_ON(ret); + if (log->log_transid > 0) { + ret = btrfs_del_root(trans, root->fs_info->log_root_tree, + &log->root_key); + BUG_ON(ret); + } root->log_root = NULL; - kfree(root->log_root); + free_extent_buffer(log->node); + kfree(log); return 0; } /* - * helper function to update the item for a given subvolumes log root - * in the tree of log roots - */ -static int update_log_root(struct btrfs_trans_handle *trans, - struct btrfs_root *log) -{ - u64 bytenr = btrfs_root_bytenr(&log->root_item); - int ret; - - if (log->node->start == bytenr) - return 0; - - btrfs_set_root_bytenr(&log->root_item, log->node->start); - btrfs_set_root_generation(&log->root_item, trans->transid); - btrfs_set_root_level(&log->root_item, btrfs_header_level(log->node)); - ret = btrfs_update_root(trans, log->fs_info->log_root_tree, - &log->root_key, &log->root_item); - BUG_ON(ret); - return ret; -} - -/* * If both a file and directory are logged, and unlinks or renames are * mixed in, we have a few interesting corners: * @@ -2711,11 +2702,6 @@ next_slot: btrfs_free_path(path); btrfs_free_path(dst_path); - - mutex_lock(&root->fs_info->tree_log_mutex); - ret = update_log_root(trans, log); - BUG_ON(ret); - mutex_unlock(&root->fs_info->tree_log_mutex); out: return 0; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3451e1cca2b..bcd14ebccae 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -20,7 +20,6 @@ #include <linux/buffer_head.h> #include <linux/blkdev.h> #include <linux/random.h> -#include <linux/version.h> #include <asm/div64.h> #include "compat.h" #include "ctree.h" @@ -104,10 +103,8 @@ static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { struct btrfs_device *dev; - struct list_head *cur; - list_for_each(cur, head) { - dev = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(dev, head, dev_list) { if (dev->devid == devid && (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) { return dev; @@ -118,11 +115,9 @@ static noinline struct btrfs_device *__find_device(struct list_head *head, static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) { - struct list_head *cur; struct btrfs_fs_devices *fs_devices; - list_for_each(cur, &fs_uuids) { - fs_devices = list_entry(cur, struct btrfs_fs_devices, list); + list_for_each_entry(fs_devices, &fs_uuids, list) { if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0) return fs_devices; } @@ -159,6 +154,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) loop: spin_lock(&device->io_lock); +loop_lock: /* take all the bios off the list at once and process them * later on (without the lock held). But, remember the * tail and other pointers so the bios can be properly reinserted @@ -208,7 +204,7 @@ loop: * is now congested. Back off and let other work structs * run instead */ - if (pending && bdi_write_congested(bdi) && + if (pending && bdi_write_congested(bdi) && num_run > 16 && fs_info->fs_devices->open_devices > 1) { struct bio *old_head; @@ -220,7 +216,8 @@ loop: tail->bi_next = old_head; else device->pending_bio_tail = tail; - device->running_pending = 0; + + device->running_pending = 1; spin_unlock(&device->io_lock); btrfs_requeue_work(&device->work); @@ -229,6 +226,11 @@ loop: } if (again) goto loop; + + spin_lock(&device->io_lock); + if (device->pending_bios) + goto loop_lock; + spin_unlock(&device->io_lock); done: return 0; } @@ -345,14 +347,11 @@ error: int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *tmp; - struct list_head *cur; - struct btrfs_device *device; + struct btrfs_device *device, *next; mutex_lock(&uuid_mutex); again: - list_for_each_safe(cur, tmp, &fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { if (device->in_fs_metadata) continue; @@ -383,14 +382,12 @@ again: static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *cur; struct btrfs_device *device; if (--fs_devices->opened > 0) return 0; - list_for_each(cur, &fs_devices->devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, &fs_devices->devices, dev_list) { if (device->bdev) { close_bdev_exclusive(device->bdev, device->mode); fs_devices->open_devices--; @@ -439,7 +436,6 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, { struct block_device *bdev; struct list_head *head = &fs_devices->devices; - struct list_head *cur; struct btrfs_device *device; struct block_device *latest_bdev = NULL; struct buffer_head *bh; @@ -450,8 +446,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, int seeding = 1; int ret = 0; - list_for_each(cur, head) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, head, dev_list) { if (device->bdev) continue; if (!device->name) @@ -578,7 +573,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, *(unsigned long long *)disk_super->fsid, *(unsigned long long *)(disk_super->fsid + 8)); } - printk(KERN_INFO "devid %llu transid %llu %s\n", + printk(KERN_CONT "devid %llu transid %llu %s\n", (unsigned long long)devid, (unsigned long long)transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); @@ -1017,14 +1012,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } if (strcmp(device_path, "missing") == 0) { - struct list_head *cur; struct list_head *devices; struct btrfs_device *tmp; device = NULL; devices = &root->fs_info->fs_devices->devices; - list_for_each(cur, devices) { - tmp = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(tmp, devices, dev_list) { if (tmp->in_fs_metadata && !tmp->bdev) { device = tmp; break; @@ -1280,7 +1273,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; - struct list_head *cur; struct list_head *devices; struct super_block *sb = root->fs_info->sb; u64 total_bytes; @@ -1304,8 +1296,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) mutex_lock(&root->fs_info->volume_mutex); devices = &root->fs_info->fs_devices->devices; - list_for_each(cur, devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { ret = -EEXIST; goto error; @@ -1704,7 +1695,6 @@ static u64 div_factor(u64 num, int factor) int btrfs_balance(struct btrfs_root *dev_root) { int ret; - struct list_head *cur; struct list_head *devices = &dev_root->fs_info->fs_devices->devices; struct btrfs_device *device; u64 old_size; @@ -1723,8 +1713,7 @@ int btrfs_balance(struct btrfs_root *dev_root) dev_root = dev_root->fs_info->dev_root; /* step one make some room on all the devices */ - list_for_each(cur, devices) { - device = list_entry(cur, struct btrfs_device, dev_list); + list_for_each_entry(device, devices, dev_list) { old_size = device->total_bytes; size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 7f332e27089..a9d3bf4d268 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/rwsem.h> #include <linux/xattr.h> +#include <linux/security.h> #include "ctree.h" #include "btrfs_inode.h" #include "transaction.h" @@ -45,9 +46,12 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, /* lookup the xattr by name */ di = btrfs_lookup_xattr(NULL, root, path, inode->i_ino, name, strlen(name), 0); - if (!di || IS_ERR(di)) { + if (!di) { ret = -ENODATA; goto out; + } else if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; } leaf = path->nodes[0]; @@ -62,6 +66,14 @@ ssize_t __btrfs_getxattr(struct inode *inode, const char *name, ret = -ERANGE; goto out; } + + /* + * The way things are packed into the leaf is like this + * |struct btrfs_dir_item|name|data| + * where name is the xattr name, so security.foo, and data is the + * content of the xattr. data_ptr points to the location in memory + * where the data starts in the in memory leaf + */ data_ptr = (unsigned long)((char *)(di + 1) + btrfs_dir_name_len(leaf, di)); read_extent_buffer(leaf, buffer, data_ptr, @@ -86,7 +98,7 @@ int __btrfs_setxattr(struct inode *inode, const char *name, if (!path) return -ENOMEM; - trans = btrfs_start_transaction(root, 1); + trans = btrfs_join_transaction(root, 1); btrfs_set_trans_block_group(trans, inode); /* first lets see if we already have this xattr */ @@ -176,7 +188,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto err; - ret = 0; advance = 0; while (1) { leaf = path->nodes[0]; @@ -320,3 +331,34 @@ int btrfs_removexattr(struct dentry *dentry, const char *name) return -EOPNOTSUPP; return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE); } + +int btrfs_xattr_security_init(struct inode *inode, struct inode *dir) +{ + int err; + size_t len; + void *value; + char *suffix; + char *name; + + err = security_inode_init_security(inode, dir, &suffix, &value, &len); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + name = kmalloc(XATTR_SECURITY_PREFIX_LEN + strlen(suffix) + 1, + GFP_NOFS); + if (!name) { + err = -ENOMEM; + } else { + strcpy(name, XATTR_SECURITY_PREFIX); + strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix); + err = __btrfs_setxattr(inode, name, value, len, 0); + kfree(name); + } + + kfree(suffix); + kfree(value); + return err; +} diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 5b1d08f8e68..c71e9c3cf3f 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h @@ -36,4 +36,6 @@ extern int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); extern int btrfs_removexattr(struct dentry *dentry, const char *name); +extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir); + #endif /* __XATTR__ */ diff --git a/fs/buffer.c b/fs/buffer.c index b58208f1640..665d446b25b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2688,7 +2688,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping, struct buffer_head *bh; BUG_ON(fsdata != NULL && page_has_buffers(page)); - if (unlikely(copied < len) && !page_has_buffers(page)) + if (unlikely(copied < len) && head) attach_nobh_buffers(page, head); if (page_has_buffers(page)) return generic_write_end(file, mapping, pos, len, diff --git a/fs/compat.c b/fs/compat.c index 65a070e705a..d0145ca2757 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1407,7 +1407,7 @@ int compat_do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + check_unsafe_exec(bprm, current->files); file = open_exec(filename); retval = PTR_ERR(file); diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c8f8d5904f5..9c6d815dd19 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -785,7 +785,7 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd, unsigned long arg) if (copy_in_user(&sgio->status, &sgio32->status, (4 * sizeof(unsigned char)) + - (2 * sizeof(unsigned (short))) + + (2 * sizeof(unsigned short)) + (3 * sizeof(int)))) return -EFAULT; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9c235839114..8e93341f3e8 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -553,24 +553,12 @@ static void detach_groups(struct config_group *group) child = sd->s_dentry; - /* - * Note: we hide this from lockdep since we have no way - * to teach lockdep about recursive - * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path - * in an inode tree, which are valid as soon as - * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a - * parent inode to one of its children. - */ - lockdep_off(); mutex_lock(&child->d_inode->i_mutex); - lockdep_on(); configfs_detach_group(sd->s_element); child->d_inode->i_flags |= S_DEAD; - lockdep_off(); mutex_unlock(&child->d_inode->i_mutex); - lockdep_on(); d_delete(child); dput(child); @@ -760,22 +748,11 @@ static int configfs_attach_item(struct config_item *parent_item, * We are going to remove an inode and its dentry but * the VFS may already have hit and used them. Thus, * we must lock them as rmdir() would. - * - * Note: we hide this from lockdep since we have no way - * to teach lockdep about recursive - * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path - * in an inode tree, which are valid as soon as - * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a - * parent inode to one of its children. */ - lockdep_off(); mutex_lock(&dentry->d_inode->i_mutex); - lockdep_on(); configfs_remove_dir(item); dentry->d_inode->i_flags |= S_DEAD; - lockdep_off(); mutex_unlock(&dentry->d_inode->i_mutex); - lockdep_on(); d_delete(dentry); } } @@ -810,25 +787,14 @@ static int configfs_attach_group(struct config_item *parent_item, * * We must also lock the inode to remove it safely in case of * error, as rmdir() would. - * - * Note: we hide this from lockdep since we have no way - * to teach lockdep about recursive - * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path - * in an inode tree, which are valid as soon as - * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a - * parent inode to one of its children. */ - lockdep_off(); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); - lockdep_on(); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); dentry->d_inode->i_flags |= S_DEAD; } - lockdep_off(); mutex_unlock(&dentry->d_inode->i_mutex); - lockdep_on(); if (ret) d_delete(dentry); } @@ -990,17 +956,7 @@ static int configfs_depend_prep(struct dentry *origin, BUG_ON(!origin || !sd); /* Lock this guy on the way down */ - /* - * Note: we hide this from lockdep since we have no way - * to teach lockdep about recursive - * I_MUTEX_PARENT -> I_MUTEX_CHILD patterns along a path - * in an inode tree, which are valid as soon as - * I_MUTEX_PARENT -> I_MUTEX_CHILD is valid from a - * parent inode to one of its children. - */ - lockdep_off(); mutex_lock(&sd->s_dentry->d_inode->i_mutex); - lockdep_on(); if (sd->s_element == target) /* Boo-yah */ goto out; @@ -1014,9 +970,7 @@ static int configfs_depend_prep(struct dentry *origin, } /* We looped all our children and didn't find target */ - lockdep_off(); mutex_unlock(&sd->s_dentry->d_inode->i_mutex); - lockdep_on(); ret = -ENOENT; out: @@ -1036,16 +990,11 @@ static void configfs_depend_rollback(struct dentry *origin, struct dentry *dentry = item->ci_dentry; while (dentry != origin) { - /* See comments in configfs_depend_prep() */ - lockdep_off(); mutex_unlock(&dentry->d_inode->i_mutex); - lockdep_on(); dentry = dentry->d_parent; } - lockdep_off(); mutex_unlock(&origin->d_inode->i_mutex); - lockdep_on(); } int configfs_depend_item(struct configfs_subsystem *subsys, @@ -1380,16 +1329,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } /* Wait until the racing operation terminates */ - /* - * Note: we hide this from lockdep since we are locked - * with subclass I_MUTEX_NORMAL from vfs_rmdir() (why - * not I_MUTEX_CHILD?), and I_MUTEX_XATTR or - * I_MUTEX_QUOTA are not relevant for the locked inode. - */ - lockdep_off(); mutex_lock(wait_mutex); mutex_unlock(wait_mutex); - lockdep_on(); } } while (ret == -EAGAIN); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index c01e043670e..f6caeb1d110 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1716,7 +1716,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size, { int rc = 0; - (*copied_name) = kmalloc((name_size + 2), GFP_KERNEL); + (*copied_name) = kmalloc((name_size + 1), GFP_KERNEL); if (!(*copied_name)) { rc = -ENOMEM; goto out; @@ -1726,7 +1726,7 @@ static int ecryptfs_copy_filename(char **copied_name, size_t *copied_name_size, * in printing out the * string in debug * messages */ - (*copied_name_size) = (name_size + 1); + (*copied_name_size) = name_size; out: return rc; } diff --git a/fs/exec.c b/fs/exec.c index 0dd60a01f1b..929b58004b7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1049,16 +1049,32 @@ EXPORT_SYMBOL(install_exec_creds); * - the caller must hold current->cred_exec_mutex to protect against * PTRACE_ATTACH */ -void check_unsafe_exec(struct linux_binprm *bprm) +void check_unsafe_exec(struct linux_binprm *bprm, struct files_struct *files) { - struct task_struct *p = current; + struct task_struct *p = current, *t; + unsigned long flags; + unsigned n_fs, n_files, n_sighand; bprm->unsafe = tracehook_unsafe_exec(p); - if (atomic_read(&p->fs->count) > 1 || - atomic_read(&p->files->count) > 1 || - atomic_read(&p->sighand->count) > 1) + n_fs = 1; + n_files = 1; + n_sighand = 1; + lock_task_sighand(p, &flags); + for (t = next_thread(p); t != p; t = next_thread(t)) { + if (t->fs == p->fs) + n_fs++; + if (t->files == files) + n_files++; + n_sighand++; + } + + if (atomic_read(&p->fs->count) > n_fs || + atomic_read(&p->files->count) > n_files || + atomic_read(&p->sighand->count) > n_sighand) bprm->unsafe |= LSM_UNSAFE_SHARE; + + unlock_task_sighand(p, &flags); } /* @@ -1273,7 +1289,7 @@ int do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + check_unsafe_exec(bprm, displaced); file = open_exec(filename); retval = PTR_ERR(file); diff --git a/fs/internal.h b/fs/internal.h index 53af885f173..0d8ac497b3d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -43,7 +43,7 @@ extern void __init chrdev_init(void); /* * exec.c */ -extern void check_unsafe_exec(struct linux_binprm *); +extern void check_unsafe_exec(struct linux_binprm *, struct files_struct *); /* * namespace.c diff --git a/fs/seq_file.c b/fs/seq_file.c index b569ff1c4dc..5267098532b 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -54,6 +54,64 @@ int seq_open(struct file *file, const struct seq_operations *op) } EXPORT_SYMBOL(seq_open); +static int traverse(struct seq_file *m, loff_t offset) +{ + loff_t pos = 0, index; + int error = 0; + void *p; + + m->version = 0; + index = 0; + m->count = m->from = 0; + if (!offset) { + m->index = index; + return 0; + } + if (!m->buf) { + m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); + if (!m->buf) + return -ENOMEM; + } + p = m->op->start(m, &index); + while (p) { + error = PTR_ERR(p); + if (IS_ERR(p)) + break; + error = m->op->show(m, p); + if (error < 0) + break; + if (unlikely(error)) { + error = 0; + m->count = 0; + } + if (m->count == m->size) + goto Eoverflow; + if (pos + m->count > offset) { + m->from = offset - pos; + m->count -= m->from; + m->index = index; + break; + } + pos += m->count; + m->count = 0; + if (pos == offset) { + index++; + m->index = index; + break; + } + p = m->op->next(m, p, &index); + } + m->op->stop(m, p); + m->index = index; + return error; + +Eoverflow: + m->op->stop(m, p); + kfree(m->buf); + m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); + return !m->buf ? -ENOMEM : -EAGAIN; +} + /** * seq_read - ->read() method for sequential files. * @file: the file to read from @@ -186,63 +244,6 @@ Efault: } EXPORT_SYMBOL(seq_read); -static int traverse(struct seq_file *m, loff_t offset) -{ - loff_t pos = 0, index; - int error = 0; - void *p; - - m->version = 0; - index = 0; - m->count = m->from = 0; - if (!offset) { - m->index = index; - return 0; - } - if (!m->buf) { - m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL); - if (!m->buf) - return -ENOMEM; - } - p = m->op->start(m, &index); - while (p) { - error = PTR_ERR(p); - if (IS_ERR(p)) - break; - error = m->op->show(m, p); - if (error < 0) - break; - if (unlikely(error)) { - error = 0; - m->count = 0; - } - if (m->count == m->size) - goto Eoverflow; - if (pos + m->count > offset) { - m->from = offset - pos; - m->count -= m->from; - m->index = index; - break; - } - pos += m->count; - m->count = 0; - if (pos == offset) { - index++; - m->index = index; - break; - } - p = m->op->next(m, p, &index); - } - m->op->stop(m, p); - return error; - -Eoverflow: - m->op->stop(m, p); - kfree(m->buf); - m->buf = kmalloc(m->size <<= 1, GFP_KERNEL); - return !m->buf ? -ENOMEM : -EAGAIN; -} - /** * seq_lseek - ->llseek() method for sequential files. * @file: the file in question diff --git a/fs/super.c b/fs/super.c index 645e5403f2a..61dce001dd5 100644 --- a/fs/super.c +++ b/fs/super.c @@ -301,7 +301,7 @@ void generic_shutdown_super(struct super_block *sb) /* * wait for asynchronous fs operations to finish before going further */ - async_synchronize_full_special(&sb->s_async_list); + async_synchronize_full_domain(&sb->s_async_list); /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); @@ -470,7 +470,7 @@ restart: sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); - async_synchronize_full_special(&sb->s_async_list); + async_synchronize_full_domain(&sb->s_async_list); if (sb->s_root && (wait || sb->s_dirt)) sb->s_op->sync_fs(sb, wait); up_read(&sb->s_umount); diff --git a/include/acpi/pdc_intel.h b/include/acpi/pdc_intel.h index e72bfdd887f..552637b0d05 100644 --- a/include/acpi/pdc_intel.h +++ b/include/acpi/pdc_intel.h @@ -14,6 +14,7 @@ #define ACPI_PDC_SMP_T_SWCOORD (0x0080) #define ACPI_PDC_C_C1_FFH (0x0100) #define ACPI_PDC_C_C2C3_FFH (0x0200) +#define ACPI_PDC_SMP_P_HWCOORD (0x0800) #define ACPI_PDC_EST_CAPABILITY_SMP (ACPI_PDC_SMP_C1PT | \ ACPI_PDC_C_C1_HALT | \ @@ -22,6 +23,7 @@ #define ACPI_PDC_EST_CAPABILITY_SWSMP (ACPI_PDC_SMP_C1PT | \ ACPI_PDC_C_C1_HALT | \ ACPI_PDC_SMP_P_SWCOORD | \ + ACPI_PDC_SMP_P_HWCOORD | \ ACPI_PDC_P_FFH) #define ACPI_PDC_C_CAPABILITY_SMP (ACPI_PDC_SMP_C2C3 | \ diff --git a/include/linux/async.h b/include/linux/async.h index c4ecacd0b32..68a9530196f 100644 --- a/include/linux/async.h +++ b/include/linux/async.h @@ -17,9 +17,11 @@ typedef u64 async_cookie_t; typedef void (async_func_ptr) (void *data, async_cookie_t cookie); extern async_cookie_t async_schedule(async_func_ptr *ptr, void *data); -extern async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *list); +extern async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, + struct list_head *list); extern void async_synchronize_full(void); -extern void async_synchronize_full_special(struct list_head *list); +extern void async_synchronize_full_domain(struct list_head *list); extern void async_synchronize_cookie(async_cookie_t cookie); -extern void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *list); +extern void async_synchronize_cookie_domain(async_cookie_t cookie, + struct list_head *list); diff --git a/include/linux/fb.h b/include/linux/fb.h index 818fe21257e..31527e17076 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -960,6 +960,21 @@ extern struct fb_info *registered_fb[FB_MAX]; extern int num_registered_fb; extern struct class *fb_class; +static inline int lock_fb_info(struct fb_info *info) +{ + mutex_lock(&info->lock); + if (!info->fbops) { + mutex_unlock(&info->lock); + return 0; + } + return 1; +} + +static inline void unlock_fb_info(struct fb_info *info) +{ + mutex_unlock(&info->lock); +} + static inline void __fb_pad_aligned_buffer(u8 *dst, u32 d_pitch, u8 *src, u32 s_pitch, u32 height) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 343df9ef241..7fa371898e3 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -480,7 +480,8 @@ static inline char *pack_hex_byte(char *buf, u8 byte) /* * swap - swap value of @a and @b */ -#define swap(a, b) ({ typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; }) +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) /** * container_of - cast a member of a structure out to the containing structure diff --git a/include/linux/module.h b/include/linux/module.h index f3b8329eb5b..145a75528cc 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -407,7 +407,6 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu) static inline void __module_get(struct module *module) { if (module) { - BUG_ON(module_refcount(module) == 0); local_inc(__module_ref_addr(module, get_cpu())); put_cpu(); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 48890cf3f96..7bd624bfdcf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -684,7 +684,7 @@ int pci_enable_rom(struct pci_dev *pdev); void pci_disable_rom(struct pci_dev *pdev); void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); -size_t pci_get_rom_size(void __iomem *rom, size_t size); +size_t pci_get_rom_size(struct pci_dev *pdev, void __iomem *rom, size_t size); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index ac94c066f6e..3c103d636da 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -74,13 +74,10 @@ void ring_buffer_free(struct ring_buffer *buffer); int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size); -struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, - unsigned long length, - unsigned long *flags); +struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer, + unsigned long length); int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags); + struct ring_buffer_event *event); int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); diff --git a/include/linux/sched.h b/include/linux/sched.h index f3c23cf11ab..699edb8e185 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -445,6 +445,7 @@ struct pacct_struct { * @utime: time spent in user mode, in &cputime_t units * @stime: time spent in kernel mode, in &cputime_t units * @sum_exec_runtime: total time spent on the CPU, in nanoseconds + * @lock: lock for fields in this struct * * This structure groups together three kinds of CPU time that are * tracked for threads and thread groups. Most things considering diff --git a/include/linux/wait.h b/include/linux/wait.h index ef609f842fa..a210ede73b5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -132,6 +132,8 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } +void __wake_up_common(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, int sync, void *key); void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); @@ -333,16 +335,19 @@ do { \ for (;;) { \ prepare_to_wait_exclusive(&wq, &__wait, \ TASK_INTERRUPTIBLE); \ - if (condition) \ + if (condition) { \ + finish_wait(&wq, &__wait); \ break; \ + } \ if (!signal_pending(current)) { \ schedule(); \ continue; \ } \ ret = -ERESTARTSYS; \ + abort_exclusive_wait(&wq, &__wait, \ + TASK_INTERRUPTIBLE, NULL); \ break; \ } \ - finish_wait(&wq, &__wait); \ } while (0) #define wait_event_interruptible_exclusive(wq, condition) \ @@ -431,6 +436,8 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, + unsigned int mode, void *key); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); diff --git a/include/video/aty128.h b/include/video/aty128.h index 7079beb005e..51ac69f05bd 100644 --- a/include/video/aty128.h +++ b/include/video/aty128.h @@ -21,9 +21,9 @@ #define I2C_CNTL_1 0x0094 #define PALETTE_INDEX 0x00b0 #define PALETTE_DATA 0x00b4 -#define CONFIG_CNTL 0x00e0 +#define CNFG_CNTL 0x00e0 #define GEN_RESET_CNTL 0x00f0 -#define CONFIG_MEMSIZE 0x00f8 +#define CNFG_MEMSIZE 0x00f8 #define MEM_CNTL 0x0140 #define MEM_POWER_MISC 0x015c #define AGP_BASE 0x0170 diff --git a/include/video/mach64.h b/include/video/mach64.h index a8332e528ec..89e91c0cb73 100644 --- a/include/video/mach64.h +++ b/include/video/mach64.h @@ -103,7 +103,7 @@ #define CUR_HORZ_VERT_OFF 0x0070 /* Dword offset 0_1C */ #define CUR2_HORZ_VERT_OFF 0x0070 /* Dword offset 0_1C */ -#define CONFIG_PANEL_LG 0x0074 /* Dword offset 0_1D (LG) */ +#define CNFG_PANEL_LG 0x0074 /* Dword offset 0_1D (LG) */ /* General I/O Control */ #define GP_IO 0x0078 /* Dword offset 0_1E */ @@ -146,8 +146,8 @@ #define CLOCK_SEL_CNTL 0x0090 /* Dword offset 0_24 */ /* Configuration */ -#define CONFIG_STAT1 0x0094 /* Dword offset 0_25 */ -#define CONFIG_STAT2 0x0098 /* Dword offset 0_26 */ +#define CNFG_STAT1 0x0094 /* Dword offset 0_25 */ +#define CNFG_STAT2 0x0098 /* Dword offset 0_26 */ /* Bus Control */ #define BUS_CNTL 0x00A0 /* Dword offset 0_28 */ @@ -190,9 +190,9 @@ #define POWER_MANAGEMENT_LG 0x00D8 /* Dword offset 0_36 (LG) */ /* Configuration */ -#define CONFIG_CNTL 0x00DC /* Dword offset 0_37 (CT, ET, VT) */ -#define CONFIG_CHIP_ID 0x00E0 /* Dword offset 0_38 */ -#define CONFIG_STAT0 0x00E4 /* Dword offset 0_39 */ +#define CNFG_CNTL 0x00DC /* Dword offset 0_37 (CT, ET, VT) */ +#define CNFG_CHIP_ID 0x00E0 /* Dword offset 0_38 */ +#define CNFG_STAT0 0x00E4 /* Dword offset 0_39 */ /* Test and Debug */ #define CRC_SIG 0x00E8 /* Dword offset 0_3A */ @@ -851,17 +851,17 @@ #define PLL_YCLK_CNTL 0x29 #define PM_DYN_CLK_CNTL 0x2A -/* CONFIG_CNTL register constants */ +/* CNFG_CNTL register constants */ #define APERTURE_4M_ENABLE 1 #define APERTURE_8M_ENABLE 2 #define VGA_APERTURE_ENABLE 4 -/* CONFIG_STAT0 register constants (GX, CX) */ +/* CNFG_STAT0 register constants (GX, CX) */ #define CFG_BUS_TYPE 0x00000007 #define CFG_MEM_TYPE 0x00000038 #define CFG_INIT_DAC_TYPE 0x00000e00 -/* CONFIG_STAT0 register constants (CT, ET, VT) */ +/* CNFG_STAT0 register constants (CT, ET, VT) */ #define CFG_MEM_TYPE_xT 0x00000007 #define ISA 0 @@ -942,7 +942,7 @@ #define PCI_ATI_VENDOR_ID 0x1002 -/* CONFIG_CHIP_ID register constants */ +/* CNFG_CHIP_ID register constants */ #define CFG_CHIP_TYPE 0x0000FFFF #define CFG_CHIP_CLASS 0x00FF0000 #define CFG_CHIP_REV 0xFF000000 @@ -951,7 +951,7 @@ #define CFG_CHIP_MINOR 0xC0000000 -/* Chip IDs read from CONFIG_CHIP_ID */ +/* Chip IDs read from CNFG_CHIP_ID */ /* mach64GX family */ #define GX_CHIP_ID 0xD7 /* mach64GX (ATI888GX00) */ @@ -1254,7 +1254,7 @@ #define CRTC2_DISPLAY_DIS 0x00000400 /* LCD register indices */ -#define CONFIG_PANEL 0x00 +#define CNFG_PANEL 0x00 #define LCD_GEN_CNTL 0x01 #define DSTN_CONTROL 0x02 #define HFB_PITCH_ADDR 0x03 diff --git a/include/video/radeon.h b/include/video/radeon.h index 1cd09cc5b16..e072b16b39a 100644 --- a/include/video/radeon.h +++ b/include/video/radeon.h @@ -11,13 +11,13 @@ #define HI_STAT 0x004C #define BUS_CNTL1 0x0034 #define I2C_CNTL_1 0x0094 -#define CONFIG_CNTL 0x00E0 -#define CONFIG_MEMSIZE 0x00F8 -#define CONFIG_APER_0_BASE 0x0100 -#define CONFIG_APER_1_BASE 0x0104 -#define CONFIG_APER_SIZE 0x0108 -#define CONFIG_REG_1_BASE 0x010C -#define CONFIG_REG_APER_SIZE 0x0110 +#define CNFG_CNTL 0x00E0 +#define CNFG_MEMSIZE 0x00F8 +#define CNFG_APER_0_BASE 0x0100 +#define CNFG_APER_1_BASE 0x0104 +#define CNFG_APER_SIZE 0x0108 +#define CNFG_REG_1_BASE 0x010C +#define CNFG_REG_APER_SIZE 0x0110 #define PAD_AGPINPUT_DELAY 0x0164 #define PAD_CTLR_STRENGTH 0x0168 #define PAD_CTLR_UPDATE 0x016C @@ -509,7 +509,7 @@ /* CLOCK_CNTL_INDEX bit constants */ #define PLL_WR_EN 0x00000080 -/* CONFIG_CNTL bit constants */ +/* CNFG_CNTL bit constants */ #define CFG_VGA_RAM_EN 0x00000100 #define CFG_ATI_REV_ID_MASK (0xf << 16) #define CFG_ATI_REV_A11 (0 << 16) @@ -980,7 +980,7 @@ /* masks */ -#define CONFIG_MEMSIZE_MASK 0x1f000000 +#define CNFG_MEMSIZE_MASK 0x1f000000 #define MEM_CFG_TYPE 0x40000000 #define DST_OFFSET_MASK 0x003fffff #define DST_PITCH_MASK 0x3fc00000 diff --git a/ipc/shm.c b/ipc/shm.c index c0a021f7f41..f8f69fad3a2 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -565,11 +565,15 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss, struct hstate *h = hstate_file(shp->shm_file); *rss += pages_per_huge_page(h) * mapping->nrpages; } else { +#ifdef CONFIG_SHMEM struct shmem_inode_info *info = SHMEM_I(inode); spin_lock(&info->lock); *rss += inode->i_mapping->nrpages; *swp += info->swapped; spin_unlock(&info->lock); +#else + *rss += inode->i_mapping->nrpages; +#endif } total++; diff --git a/kernel/async.c b/kernel/async.c index 608b32b4281..f565891f2c9 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel. #include <linux/sched.h> #include <linux/init.h> #include <linux/kthread.h> +#include <linux/delay.h> #include <asm/atomic.h> static async_cookie_t next_cookie = 1; @@ -132,21 +133,23 @@ static void run_one_entry(void) entry = list_first_entry(&async_pending, struct async_entry, list); /* 2) move it to the running queue */ - list_del(&entry->list); - list_add_tail(&entry->list, &async_running); + list_move_tail(&entry->list, entry->running); spin_unlock_irqrestore(&async_lock, flags); /* 3) run it (and print duration)*/ if (initcall_debug && system_state == SYSTEM_BOOTING) { - printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); + printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, + entry->func, task_pid_nr(current)); calltime = ktime_get(); } entry->func(entry->data, entry->cookie); if (initcall_debug && system_state == SYSTEM_BOOTING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, - entry->func, ktime_to_ns(delta) >> 10); + printk("initcall %lli_%pF returned 0 after %lld usecs\n", + (long long)entry->cookie, + entry->func, + (long long)ktime_to_ns(delta) >> 10); } /* 4) remove it from the running queue */ @@ -205,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l return newcookie; } +/** + * async_schedule - schedule a function for asynchronous execution + * @ptr: function to execute asynchronously + * @data: data pointer to pass to the function + * + * Returns an async_cookie_t that may be used for checkpointing later. + * Note: This function may be called from atomic or non-atomic contexts. + */ async_cookie_t async_schedule(async_func_ptr *ptr, void *data) { - return __async_schedule(ptr, data, &async_pending); + return __async_schedule(ptr, data, &async_running); } EXPORT_SYMBOL_GPL(async_schedule); -async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) +/** + * async_schedule_domain - schedule a function for asynchronous execution within a certain domain + * @ptr: function to execute asynchronously + * @data: data pointer to pass to the function + * @running: running list for the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @running may be used in the async_synchronize_*_domain() functions + * to wait within a certain synchronization domain rather than globally. + * A synchronization domain is specified via the running queue @running to use. + * Note: This function may be called from atomic or non-atomic contexts. + */ +async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, + struct list_head *running) { return __async_schedule(ptr, data, running); } -EXPORT_SYMBOL_GPL(async_schedule_special); +EXPORT_SYMBOL_GPL(async_schedule_domain); +/** + * async_synchronize_full - synchronize all asynchronous function calls + * + * This function waits until all asynchronous function calls have been done. + */ void async_synchronize_full(void) { do { @@ -225,13 +254,30 @@ void async_synchronize_full(void) } EXPORT_SYMBOL_GPL(async_synchronize_full); -void async_synchronize_full_special(struct list_head *list) +/** + * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain + * @list: running list to synchronize on + * + * This function waits until all asynchronous function calls for the + * synchronization domain specified by the running list @list have been done. + */ +void async_synchronize_full_domain(struct list_head *list) { - async_synchronize_cookie_special(next_cookie, list); + async_synchronize_cookie_domain(next_cookie, list); } -EXPORT_SYMBOL_GPL(async_synchronize_full_special); +EXPORT_SYMBOL_GPL(async_synchronize_full_domain); -void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) +/** + * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing + * @cookie: async_cookie_t to use as checkpoint + * @running: running list to synchronize on + * + * This function waits until all asynchronous function calls for the + * synchronization domain specified by the running list @list submitted + * prior to @cookie have been done. + */ +void async_synchronize_cookie_domain(async_cookie_t cookie, + struct list_head *running) { ktime_t starttime, delta, endtime; @@ -247,14 +293,22 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r delta = ktime_sub(endtime, starttime); printk("async_continuing @ %i after %lli usec\n", - task_pid_nr(current), ktime_to_ns(delta) >> 10); + task_pid_nr(current), + (long long)ktime_to_ns(delta) >> 10); } } -EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); +EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain); +/** + * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing + * @cookie: async_cookie_t to use as checkpoint + * + * This function waits until all asynchronous function calls prior to @cookie + * have been done. + */ void async_synchronize_cookie(async_cookie_t cookie) { - async_synchronize_cookie_special(cookie, &async_running); + async_synchronize_cookie_domain(cookie, &async_running); } EXPORT_SYMBOL_GPL(async_synchronize_cookie); @@ -315,7 +369,11 @@ static int async_manager_thread(void *unused) ec = atomic_read(&entry_count); while (tc < ec && tc < MAX_THREADS) { - kthread_run(async_thread, NULL, "async/%i", tc); + if (IS_ERR(kthread_run(async_thread, NULL, "async/%i", + tc))) { + msleep(100); + continue; + } atomic_inc(&thread_count); tc++; } @@ -330,7 +388,9 @@ static int async_manager_thread(void *unused) static int __init async_init(void) { if (async_enabled) - kthread_run(async_manager_thread, NULL, "async/mgr"); + if (IS_ERR(kthread_run(async_manager_thread, NULL, + "async/mgr"))) + async_enabled = 0; return 0; } diff --git a/kernel/fork.c b/kernel/fork.c index 242a706e772..6d5dbb7a13e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1005,6 +1005,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, * triggers too late. This doesn't hurt, the check is only there * to stop root fork bombs. */ + retval = -EAGAIN; if (nr_threads >= max_threads) goto bad_fork_cleanup_count; diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index ecf765c6a77..acd88356ac7 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -71,7 +71,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, desc = irq_desc_ptrs[irq]; if (desc && old_desc != desc) - goto out_unlock; + goto out_unlock; node = cpu_to_node(cpu); desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); @@ -84,10 +84,15 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, init_copy_one_irq_desc(irq, old_desc, desc, cpu); irq_desc_ptrs[irq] = desc; + spin_unlock_irqrestore(&sparse_irq_lock, flags); /* free the old one */ free_one_irq_desc(old_desc, desc); + spin_unlock(&old_desc->lock); kfree(old_desc); + spin_lock(&desc->lock); + + return desc; out_unlock: spin_unlock_irqrestore(&sparse_irq_lock, flags); diff --git a/kernel/power/main.c b/kernel/power/main.c index 23998887397..b4d219016b6 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -57,16 +57,6 @@ int pm_notifier_call_chain(unsigned long val) #ifdef CONFIG_PM_DEBUG int pm_test_level = TEST_NONE; -static int suspend_test(int level) -{ - if (pm_test_level == level) { - printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); - mdelay(5000); - return 1; - } - return 0; -} - static const char * const pm_tests[__TEST_AFTER_LAST] = { [TEST_NONE] = "none", [TEST_CORE] = "core", @@ -125,14 +115,24 @@ static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, } power_attr(pm_test); -#else /* !CONFIG_PM_DEBUG */ -static inline int suspend_test(int level) { return 0; } -#endif /* !CONFIG_PM_DEBUG */ +#endif /* CONFIG_PM_DEBUG */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND +static int suspend_test(int level) +{ +#ifdef CONFIG_PM_DEBUG + if (pm_test_level == level) { + printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); + mdelay(5000); + return 1; + } +#endif /* !CONFIG_PM_DEBUG */ + return 0; +} + #ifdef CONFIG_PM_TEST_SUSPEND /* diff --git a/kernel/sched.c b/kernel/sched.c index 566c8c9e3a6..1ffb8951487 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4697,8 +4697,8 @@ EXPORT_SYMBOL(default_wake_function); * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * zero in this (rare) case, and we handle it by continuing to scan the queue. */ -static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, - int nr_exclusive, int sync, void *key) +void __wake_up_common(wait_queue_head_t *q, unsigned int mode, + int nr_exclusive, int sync, void *key) { wait_queue_t *curr, *next; diff --git a/kernel/sys.c b/kernel/sys.c index e7dc0e10a48..f145c415bc1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1525,22 +1525,14 @@ SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) return -EINVAL; if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT; + if (new_rlim.rlim_cur > new_rlim.rlim_max) + return -EINVAL; old_rlim = current->signal->rlim + resource; if ((new_rlim.rlim_max > old_rlim->rlim_max) && !capable(CAP_SYS_RESOURCE)) return -EPERM; - - if (resource == RLIMIT_NOFILE) { - if (new_rlim.rlim_max == RLIM_INFINITY) - new_rlim.rlim_max = sysctl_nr_open; - if (new_rlim.rlim_cur == RLIM_INFINITY) - new_rlim.rlim_cur = sysctl_nr_open; - if (new_rlim.rlim_max > sysctl_nr_open) - return -EPERM; - } - - if (new_rlim.rlim_cur > new_rlim.rlim_max) - return -EINVAL; + if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open) + return -EPERM; retval = security_task_setrlimit(resource, &new_rlim); if (retval) diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c index f04c0625f1c..ae201b3eda8 100644 --- a/kernel/trace/kmemtrace.c +++ b/kernel/trace/kmemtrace.c @@ -272,19 +272,16 @@ void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, struct ring_buffer_event *event; struct kmemtrace_alloc_entry *entry; struct trace_array *tr = kmemtrace_array; - unsigned long irq_flags; if (!kmem_tracing_enabled) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_KMEM_ALLOC, + sizeof(*entry), 0, 0); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_KMEM_ALLOC; entry->call_site = call_site; entry->ptr = ptr; entry->bytes_req = bytes_req; @@ -292,9 +289,7 @@ void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id, entry->gfp_flags = gfp_flags; entry->node = node; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, 0); } EXPORT_SYMBOL(kmemtrace_mark_alloc_node); @@ -305,26 +300,20 @@ void kmemtrace_mark_free(enum kmemtrace_type_id type_id, struct ring_buffer_event *event; struct kmemtrace_free_entry *entry; struct trace_array *tr = kmemtrace_array; - unsigned long irq_flags; if (!kmem_tracing_enabled) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_KMEM_FREE, + sizeof(*entry), 0, 0); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - - entry->ent.type = TRACE_KMEM_FREE; entry->type_id = type_id; entry->call_site = call_site; entry->ptr = ptr; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, 0); } EXPORT_SYMBOL(kmemtrace_mark_free); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 5ee344417cd..53ba3a6d16d 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1284,7 +1284,6 @@ static DEFINE_PER_CPU(int, rb_need_resched); * ring_buffer_lock_reserve - reserve a part of the buffer * @buffer: the ring buffer to reserve from * @length: the length of the data to reserve (excluding event header) - * @flags: a pointer to save the interrupt flags * * Returns a reseverd event on the ring buffer to copy directly to. * The user of this interface will need to get the body to write into @@ -1297,9 +1296,7 @@ static DEFINE_PER_CPU(int, rb_need_resched); * If NULL is returned, then nothing has been allocated or locked. */ struct ring_buffer_event * -ring_buffer_lock_reserve(struct ring_buffer *buffer, - unsigned long length, - unsigned long *flags) +ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length) { struct ring_buffer_per_cpu *cpu_buffer; struct ring_buffer_event *event; @@ -1366,15 +1363,13 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, * ring_buffer_unlock_commit - commit a reserved * @buffer: The buffer to commit to * @event: The event pointer to commit. - * @flags: the interrupt flags received from ring_buffer_lock_reserve. * * This commits the data to the ring buffer, and releases any locks held. * * Must be paired with ring_buffer_lock_reserve. */ int ring_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags) + struct ring_buffer_event *event) { struct ring_buffer_per_cpu *cpu_buffer; int cpu = raw_smp_processor_id(); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bd4d9f8818f..03fbd4c20bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -522,23 +522,24 @@ int register_tracer(struct tracer *type) tracing_selftest_running = false; mutex_unlock(&trace_types_lock); - if (!ret && default_bootup_tracer) { - if (!strncmp(default_bootup_tracer, type->name, - BOOTUP_TRACER_SIZE)) { - printk(KERN_INFO "Starting tracer '%s'\n", - type->name); - /* Do we want this tracer to start on bootup? */ - tracing_set_tracer(type->name); - default_bootup_tracer = NULL; - /* disable other selftests, since this will break it. */ - tracing_selftest_disabled = 1; + if (ret || !default_bootup_tracer) + goto out_unlock; + + if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE)) + goto out_unlock; + + printk(KERN_INFO "Starting tracer '%s'\n", type->name); + /* Do we want this tracer to start on bootup? */ + tracing_set_tracer(type->name); + default_bootup_tracer = NULL; + /* disable other selftests, since this will break it. */ + tracing_selftest_disabled = 1; #ifdef CONFIG_FTRACE_STARTUP_TEST - printk(KERN_INFO "Disabling FTRACE selftests due" - " to running tracer '%s'\n", type->name); + printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n", + type->name); #endif - } - } + out_unlock: lock_kernel(); return ret; } @@ -775,78 +776,100 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + unsigned char type, + unsigned long len, + unsigned long flags, int pc) +{ + struct ring_buffer_event *event; + + event = ring_buffer_lock_reserve(tr->buffer, len); + if (event != NULL) { + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, flags, pc); + ent->type = type; + } + + return event; +} +static void ftrace_trace_stack(struct trace_array *tr, + unsigned long flags, int skip, int pc); +static void ftrace_trace_userstack(struct trace_array *tr, + unsigned long flags, int pc); + +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc) +{ + ring_buffer_unlock_commit(tr->buffer, event); + + ftrace_trace_stack(tr, flags, 6, pc); + ftrace_trace_userstack(tr, flags, pc); + trace_wake_up(); +} + void -trace_function(struct trace_array *tr, struct trace_array_cpu *data, +trace_function(struct trace_array *tr, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc) { struct ring_buffer_event *event; struct ftrace_entry *entry; - unsigned long irq_flags; /* If we are reading the ring buffer, don't trace */ if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry), + flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_FN; entry->ip = ip; entry->parent_ip = parent_ip; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); } #ifdef CONFIG_FUNCTION_GRAPH_TRACER static void __trace_graph_entry(struct trace_array *tr, - struct trace_array_cpu *data, struct ftrace_graph_ent *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; struct ftrace_graph_ent_entry *entry; - unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_ENT; entry->graph_ent = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); + ring_buffer_unlock_commit(global_trace.buffer, event); } static void __trace_graph_return(struct trace_array *tr, - struct trace_array_cpu *data, struct ftrace_graph_ret *trace, unsigned long flags, int pc) { struct ring_buffer_event *event; struct ftrace_graph_ret_entry *entry; - unsigned long irq_flags; if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled)))) return; - event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_GRAPH_RET; entry->ret = *trace; - ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags); + ring_buffer_unlock_commit(global_trace.buffer, event); } #endif @@ -856,11 +879,10 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data, int pc) { if (likely(!atomic_read(&data->disabled))) - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); } static void __ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { @@ -868,16 +890,12 @@ static void __ftrace_trace_stack(struct trace_array *tr, struct ring_buffer_event *event; struct stack_entry *entry; struct stack_trace trace; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_STACK, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_STACK; - memset(&entry->caller, 0, sizeof(entry->caller)); trace.nr_entries = 0; @@ -886,49 +904,43 @@ static void __ftrace_trace_stack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); #endif } static void ftrace_trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { if (!(trace_flags & TRACE_ITER_STACKTRACE)) return; - __ftrace_trace_stack(tr, data, flags, skip, pc); + __ftrace_trace_stack(tr, flags, skip, pc); } void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc) { - __ftrace_trace_stack(tr, data, flags, skip, pc); + __ftrace_trace_stack(tr, flags, skip, pc); } static void ftrace_trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags, int pc) + unsigned long flags, int pc) { #ifdef CONFIG_STACKTRACE struct ring_buffer_event *event; struct userstack_entry *entry; struct stack_trace trace; - unsigned long irq_flags; if (!(trace_flags & TRACE_ITER_USERSTACKTRACE)) return; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_USER_STACK; memset(&entry->caller, 0, sizeof(entry->caller)); @@ -938,70 +950,56 @@ static void ftrace_trace_userstack(struct trace_array *tr, trace.entries = entry->caller; save_stack_trace_user(&trace); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); #endif } -void __trace_userstack(struct trace_array *tr, - struct trace_array_cpu *data, - unsigned long flags) +void __trace_userstack(struct trace_array *tr, unsigned long flags) { - ftrace_trace_userstack(tr, data, flags, preempt_count()); + ftrace_trace_userstack(tr, flags, preempt_count()); } static void -ftrace_trace_special(void *__tr, void *__data, +ftrace_trace_special(void *__tr, unsigned long arg1, unsigned long arg2, unsigned long arg3, int pc) { struct ring_buffer_event *event; - struct trace_array_cpu *data = __data; struct trace_array *tr = __tr; struct special_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL, + sizeof(*entry), 0, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, pc); - entry->ent.type = TRACE_SPECIAL; entry->arg1 = arg1; entry->arg2 = arg2; entry->arg3 = arg3; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, irq_flags, 4, pc); - ftrace_trace_userstack(tr, data, irq_flags, pc); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void __trace_special(void *__tr, void *__data, unsigned long arg1, unsigned long arg2, unsigned long arg3) { - ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count()); + ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count()); } void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc) { struct ring_buffer_event *event; struct ctx_switch_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_CTX, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_CTX; entry->prev_pid = prev->pid; entry->prev_prio = prev->prio; entry->prev_state = prev->state; @@ -1009,29 +1007,23 @@ tracing_sched_switch_trace(struct trace_array *tr, entry->next_prio = next->prio; entry->next_state = next->state; entry->next_cpu = task_cpu(next); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, flags, 5, pc); - ftrace_trace_userstack(tr, data, flags, pc); + trace_buffer_unlock_commit(tr, event, flags, pc); } void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *curr, unsigned long flags, int pc) { struct ring_buffer_event *event; struct ctx_switch_entry *entry; - unsigned long irq_flags; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_WAKE, + sizeof(*entry), flags, pc); if (!event) return; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_WAKE; entry->prev_pid = curr->pid; entry->prev_prio = curr->prio; entry->prev_state = curr->state; @@ -1039,11 +1031,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr, entry->next_prio = wakee->prio; entry->next_state = wakee->state; entry->next_cpu = task_cpu(wakee); - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - ftrace_trace_stack(tr, data, flags, 6, pc); - ftrace_trace_userstack(tr, data, flags, pc); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, flags, pc); } void @@ -1064,7 +1052,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) data = tr->data[cpu]; if (likely(atomic_inc_return(&data->disabled) == 1)) - ftrace_trace_special(tr, data, arg1, arg2, arg3, pc); + ftrace_trace_special(tr, arg1, arg2, arg3, pc); atomic_dec(&data->disabled); local_irq_restore(flags); @@ -1092,7 +1080,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_entry(tr, data, trace, flags, pc); + __trace_graph_entry(tr, trace, flags, pc); } /* Only do the atomic if it is not already set */ if (!test_tsk_trace_graph(current)) @@ -1118,7 +1106,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) { pc = preempt_count(); - __trace_graph_return(tr, data, trace, flags, pc); + __trace_graph_return(tr, trace, flags, pc); } if (!trace->depth) clear_tsk_trace_graph(current); @@ -1412,7 +1400,7 @@ static enum print_line_t print_lat_fmt(struct trace_iterator *iter) goto partial; } - if (event && event->latency_trace) + if (event) return event->latency_trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) @@ -1441,7 +1429,7 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter) goto partial; } - if (event && event->trace) + if (event) return event->trace(iter, sym_flags); if (!trace_seq_printf(s, "Unknown type %d\n", entry->type)) @@ -1467,7 +1455,7 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->raw) + if (event) return event->raw(iter, 0); if (!trace_seq_printf(s, "%d ?\n", entry->type)) @@ -1494,7 +1482,7 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->hex) { + if (event) { enum print_line_t ret = event->hex(iter, 0); if (ret != TRACE_TYPE_HANDLED) return ret; @@ -1536,10 +1524,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) } event = ftrace_find_event(entry->type); - if (event && event->binary) - return event->binary(iter, 0); - - return TRACE_TYPE_HANDLED; + return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED; } static int trace_empty(struct trace_iterator *iter) @@ -2186,6 +2171,12 @@ tracing_set_trace_read(struct file *filp, char __user *ubuf, return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); } +int tracer_init(struct tracer *t, struct trace_array *tr) +{ + tracing_reset_online_cpus(tr); + return t->init(tr); +} + static int tracing_set_tracer(const char *buf) { struct trace_array *tr = &global_trace; @@ -2210,7 +2201,7 @@ static int tracing_set_tracer(const char *buf) current_trace = t; if (t->init) { - ret = t->init(tr); + ret = tracer_init(t, tr); if (ret) goto out; } @@ -2854,18 +2845,16 @@ int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args) trace_buf[len] = 0; size = sizeof(*entry) + len + 1; - event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc); if (!event) goto out_unlock; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, irq_flags, pc); - entry->ent.type = TRACE_PRINT; entry->ip = ip; entry->depth = depth; memcpy(&entry->buf, trace_buf, len); entry->buf[len] = 0; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); out_unlock: spin_unlock_irqrestore(&trace_buf_lock, irq_flags); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f92aba52a89..b9838f4a692 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -394,6 +394,7 @@ struct trace_iterator { cpumask_var_t started; }; +int tracer_init(struct tracer *t, struct trace_array *tr); int tracing_is_enabled(void); void trace_wake_up(void); void tracing_reset(struct trace_array *tr, int cpu); @@ -402,6 +403,17 @@ int tracing_open_generic(struct inode *inode, struct file *filp); struct dentry *tracing_init_dentry(void); void init_tracer_sysprof_debugfs(struct dentry *d_tracer); +struct ring_buffer_event; + +struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, + unsigned char type, + unsigned long len, + unsigned long flags, + int pc); +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer_event *event, + unsigned long flags, int pc); + struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data); @@ -418,14 +430,12 @@ void ftrace(struct trace_array *tr, unsigned long parent_ip, unsigned long flags, int pc); void tracing_sched_switch_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *prev, struct task_struct *next, unsigned long flags, int pc); void tracing_record_cmdline(struct task_struct *tsk); void tracing_sched_wakeup_trace(struct trace_array *tr, - struct trace_array_cpu *data, struct task_struct *wakee, struct task_struct *cur, unsigned long flags, int pc); @@ -435,7 +445,6 @@ void trace_special(struct trace_array *tr, unsigned long arg2, unsigned long arg3, int pc); void trace_function(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long ip, unsigned long parent_ip, unsigned long flags, int pc); @@ -461,7 +470,6 @@ void update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu); void __trace_stack(struct trace_array *tr, - struct trace_array_cpu *data, unsigned long flags, int skip, int pc); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 1f07895977a..7a30fc4c364 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -132,7 +132,6 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { struct ring_buffer_event *event; struct trace_boot_call *entry; - unsigned long irq_flags; struct trace_array *tr = boot_trace; if (!tr || !pre_initcalls_finished) @@ -144,18 +143,13 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT_CALL; entry->boot_call = *bt; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -164,7 +158,6 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { struct ring_buffer_event *event; struct trace_boot_ret *entry; - unsigned long irq_flags; struct trace_array *tr = boot_trace; if (!tr || !pre_initcalls_finished) @@ -173,18 +166,13 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) sprint_symbol(bt->func, (unsigned long)fn); preempt_disable(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_BOOT_RET; entry->boot_ret = *bt; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 7ac72a44b2d..f8ae2c50e01 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -33,7 +33,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) struct trace_array *tr = branch_tracer; struct ring_buffer_event *event; struct trace_branch *entry; - unsigned long flags, irq_flags; + unsigned long flags; int cpu, pc; const char *p; @@ -52,15 +52,13 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + pc = preempt_count(); + event = trace_buffer_lock_reserve(tr, TRACE_BRANCH, + sizeof(*entry), flags, pc); if (!event) goto out; - pc = preempt_count(); entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, flags, pc); - entry->ent.type = TRACE_BRANCH; /* Strip off the path, only save the file */ p = f->file + strlen(f->file); @@ -75,7 +73,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect) entry->line = f->line; entry->correct = val == expect; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); + ring_buffer_unlock_commit(tr->buffer, event); out: atomic_dec(&tr->data[cpu]->disabled); @@ -133,7 +131,6 @@ static void stop_branch_trace(struct trace_array *tr) static int branch_trace_init(struct trace_array *tr) { - tracing_reset_online_cpus(tr); start_branch_trace(tr); return 0; } @@ -143,23 +140,6 @@ static void branch_trace_reset(struct trace_array *tr) stop_branch_trace(tr); } -static int -trace_print_print(struct trace_seq *s, struct trace_entry *entry, int flags) -{ - struct print_entry *field; - - trace_assign_type(field, entry); - - if (seq_print_ip_sym(s, field->ip, flags)) - goto partial; - - if (trace_seq_printf(s, ": %s", field->buf)) - goto partial; - - partial: - return TRACE_TYPE_PARTIAL_LINE; -} - static enum print_line_t trace_branch_print(struct trace_iterator *iter, int flags) { @@ -182,9 +162,6 @@ static struct trace_event trace_branch_event = { .type = TRACE_BRANCH, .trace = trace_branch_print, .latency_trace = trace_branch_print, - .raw = trace_nop_print, - .hex = trace_nop_print, - .binary = trace_nop_print, }; static struct tracer branch_trace __read_mostly = diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index b3a320f8aba..36bf9568ccd 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -24,32 +24,21 @@ static struct trace_array *func_trace; static void tracing_start_function_trace(void); static void tracing_stop_function_trace(void); -static void start_function_trace(struct trace_array *tr) +static int function_trace_init(struct trace_array *tr) { func_trace = tr; tr->cpu = get_cpu(); - tracing_reset_online_cpus(tr); put_cpu(); tracing_start_cmdline_record(); tracing_start_function_trace(); -} - -static void stop_function_trace(struct trace_array *tr) -{ - tracing_stop_function_trace(); - tracing_stop_cmdline_record(); -} - -static int function_trace_init(struct trace_array *tr) -{ - start_function_trace(tr); return 0; } static void function_trace_reset(struct trace_array *tr) { - stop_function_trace(tr); + tracing_stop_function_trace(); + tracing_stop_cmdline_record(); } static void function_trace_start(struct trace_array *tr) @@ -78,7 +67,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); atomic_dec(&data->disabled); ftrace_preempt_enable(resched); @@ -108,7 +97,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) if (likely(disabled == 1)) { pc = preempt_count(); - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); } atomic_dec(&data->disabled); @@ -139,7 +128,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) if (likely(disabled == 1)) { pc = preempt_count(); - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); /* * skip over 5 funcs: * __ftrace_trace_stack, @@ -148,7 +137,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip) * ftrace_list_func * ftrace_call */ - __trace_stack(tr, data, flags, 5, pc); + __trace_stack(tr, flags, 5, pc); } atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index c97594d826b..222f97d336a 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -56,7 +56,6 @@ static int graph_trace_init(struct trace_array *tr) &trace_graph_entry); if (ret) return ret; - tracing_reset_online_cpus(tr); tracing_start_cmdline_record(); return 0; diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 549238a9b13..e3e7db61c06 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c @@ -132,7 +132,6 @@ static int bts_trace_init(struct trace_array *tr) hw_branch_trace = tr; register_hotcpu_notifier(&bts_hotcpu_notifier); - tracing_reset_online_cpus(tr); bts_trace_start(tr); return 0; @@ -175,7 +174,7 @@ void trace_hw_branch(u64 from, u64 to) struct trace_array *tr = hw_branch_trace; struct ring_buffer_event *event; struct hw_branch_entry *entry; - unsigned long irq1, irq2; + unsigned long irq1; int cpu; if (unlikely(!tr)) @@ -189,7 +188,8 @@ void trace_hw_branch(u64 from, u64 to) if (atomic_inc_return(&tr->data[cpu]->disabled) != 1) goto out; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq2); + event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); @@ -197,7 +197,7 @@ void trace_hw_branch(u64 from, u64 to) entry->ent.type = TRACE_HW_BRANCHES; entry->from = from; entry->to = to; - ring_buffer_unlock_commit(tr->buffer, event, irq2); + trace_buffer_unlock_commit(tr, event, 0, 0); out: atomic_dec(&tr->data[cpu]->disabled); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index ed344b022a1..c6b442d88de 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -95,7 +95,7 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip) disabled = atomic_inc_return(&data->disabled); if (likely(disabled == 1)) - trace_function(tr, data, ip, parent_ip, flags, preempt_count()); + trace_function(tr, ip, parent_ip, flags, preempt_count()); atomic_dec(&data->disabled); } @@ -153,7 +153,7 @@ check_critical_timing(struct trace_array *tr, if (!report_latency(delta)) goto out_unlock; - trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); + trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); latency = nsecs_to_usecs(delta); @@ -177,7 +177,7 @@ out: data->critical_sequence = max_sequence; data->preempt_timestamp = ftrace_now(cpu); tracing_reset(tr, cpu); - trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc); + trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc); } static inline void @@ -210,7 +210,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip) local_save_flags(flags); - trace_function(tr, data, ip, parent_ip, flags, preempt_count()); + trace_function(tr, ip, parent_ip, flags, preempt_count()); per_cpu(tracing_cpu, cpu) = 1; @@ -244,7 +244,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip) atomic_inc(&data->disabled); local_save_flags(flags); - trace_function(tr, data, ip, parent_ip, flags, preempt_count()); + trace_function(tr, ip, parent_ip, flags, preempt_count()); check_critical_timing(tr, data, parent_ip ? : ip, cpu); data->critical_start = 0; atomic_dec(&data->disabled); diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index ec78e244242..c401b908e80 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -307,21 +307,17 @@ static void __trace_mmiotrace_rw(struct trace_array *tr, { struct ring_buffer_event *event; struct trace_mmiotrace_rw *entry; - unsigned long irq_flags; + int pc = preempt_count(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW, + sizeof(*entry), 0, pc); if (!event) { atomic_inc(&dropped_count); return; } entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, preempt_count()); - entry->ent.type = TRACE_MMIO_RW; entry->rw = *rw; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void mmio_trace_rw(struct mmiotrace_rw *rw) @@ -337,21 +333,17 @@ static void __trace_mmiotrace_map(struct trace_array *tr, { struct ring_buffer_event *event; struct trace_mmiotrace_map *entry; - unsigned long irq_flags; + int pc = preempt_count(); - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP, + sizeof(*entry), 0, pc); if (!event) { atomic_inc(&dropped_count); return; } entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, preempt_count()); - entry->ent.type = TRACE_MMIO_MAP; entry->map = *map; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); + trace_buffer_unlock_commit(tr, event, 0, pc); } void mmio_trace_mapping(struct mmiotrace_map *map) diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 087b6cbf4ea..9aa84bde23c 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -48,7 +48,6 @@ static void stop_nop_trace(struct trace_array *tr) static int nop_trace_init(struct trace_array *tr) { ctx_trace = tr; - tracing_reset_online_cpus(tr); start_nop_trace(tr); return 0; } diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 463a310b1d3..9fc815031b0 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -435,6 +435,17 @@ int register_ftrace_event(struct trace_event *event) if (ftrace_find_event(event->type)) goto out; + if (event->trace == NULL) + event->trace = trace_nop_print; + if (event->latency_trace == NULL) + event->latency_trace = trace_nop_print; + if (event->raw == NULL) + event->raw = trace_nop_print; + if (event->hex == NULL) + event->hex = trace_nop_print; + if (event->binary == NULL) + event->binary = trace_nop_print; + key = event->type & (EVENT_HASHSIZE - 1); hlist_add_head_rcu(&event->node, &event_hash[key]); @@ -874,8 +885,6 @@ static struct trace_event trace_print_event = { .trace = trace_print_print, .latency_trace = trace_print_print, .raw = trace_print_raw, - .hex = trace_nop_print, - .binary = trace_nop_print, }; static struct trace_event *events[] __initdata = { diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c index faa6ab7a1f5..bfc21f8079a 100644 --- a/kernel/trace/trace_power.c +++ b/kernel/trace/trace_power.c @@ -115,7 +115,6 @@ void trace_power_end(struct power_trace *it) struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; - unsigned long irq_flags; struct trace_array *tr = power_trace; if (!trace_power_enabled) @@ -125,18 +124,13 @@ void trace_power_end(struct power_trace *it) it->end = ktime_get(); data = tr->data[smp_processor_id()]; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_POWER, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_POWER; entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } @@ -148,7 +142,6 @@ void trace_power_mark(struct power_trace *it, unsigned int type, struct ring_buffer_event *event; struct trace_power *entry; struct trace_array_cpu *data; - unsigned long irq_flags; struct trace_array *tr = power_trace; if (!trace_power_enabled) @@ -162,18 +155,13 @@ void trace_power_mark(struct power_trace *it, unsigned int type, it->end = it->stamp; data = tr->data[smp_processor_id()]; - event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), - &irq_flags); + event = trace_buffer_lock_reserve(tr, TRACE_POWER, + sizeof(*entry), 0, 0); if (!event) goto out; entry = ring_buffer_event_data(event); - tracing_generic_entry_update(&entry->ent, 0, 0); - entry->ent.type = TRACE_POWER; entry->state_data = *it; - ring_buffer_unlock_commit(tr->buffer, event, irq_flags); - - trace_wake_up(); - + trace_buffer_unlock_commit(tr, event, 0, 0); out: preempt_enable(); } diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index df175cb4564..30e14fe8589 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -43,7 +43,7 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev, data = ctx_trace->data[cpu]; if (likely(!atomic_read(&data->disabled))) - tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc); + tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc); local_irq_restore(flags); } @@ -66,7 +66,7 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) data = ctx_trace->data[cpu]; if (likely(!atomic_read(&data->disabled))) - tracing_sched_wakeup_trace(ctx_trace, data, wakee, current, + tracing_sched_wakeup_trace(ctx_trace, wakee, current, flags, pc); local_irq_restore(flags); @@ -185,12 +185,6 @@ void tracing_sched_switch_assign_trace(struct trace_array *tr) ctx_trace = tr; } -static void start_sched_trace(struct trace_array *tr) -{ - tracing_reset_online_cpus(tr); - tracing_start_sched_switch_record(); -} - static void stop_sched_trace(struct trace_array *tr) { tracing_stop_sched_switch_record(); @@ -199,7 +193,7 @@ static void stop_sched_trace(struct trace_array *tr) static int sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; - start_sched_trace(tr); + tracing_start_sched_switch_record(); return 0; } diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index a48c9b4b0c8..96d71648589 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -72,7 +72,7 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip) if (task_cpu(wakeup_task) != cpu) goto unlock; - trace_function(tr, data, ip, parent_ip, flags, pc); + trace_function(tr, ip, parent_ip, flags, pc); unlock: __raw_spin_unlock(&wakeup_lock); @@ -152,8 +152,8 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, if (unlikely(!tracer_enabled || next != wakeup_task)) goto out_unlock; - trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc); - tracing_sched_switch_trace(wakeup_trace, data, prev, next, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); + tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); /* * usecs conversion is slow so we try to delay the conversion @@ -254,10 +254,8 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) data = wakeup_trace->data[wakeup_cpu]; data->preempt_timestamp = ftrace_now(cpu); - tracing_sched_wakeup_trace(wakeup_trace, data, p, current, - flags, pc); - trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, - flags, pc); + tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); + trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); out_locked: __raw_spin_unlock(&wakeup_lock); diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 5013812578b..445700e51f6 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -115,7 +115,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; @@ -189,7 +189,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; @@ -236,7 +236,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -290,7 +290,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -344,7 +344,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); goto out; @@ -476,7 +476,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -537,7 +537,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; @@ -569,7 +569,7 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return 0; @@ -596,7 +596,7 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - ret = trace->init(tr); + ret = tracer_init(trace, tr); if (ret) { warn_failed_init_tracer(trace, ret); return ret; diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index eaca5ad803f..84ca9d81e74 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -226,15 +226,6 @@ static void stop_stack_timers(void) stop_stack_timer(cpu); } -static void start_stack_trace(struct trace_array *tr) -{ - mutex_lock(&sample_timer_lock); - tracing_reset_online_cpus(tr); - start_stack_timers(); - tracer_enabled = 1; - mutex_unlock(&sample_timer_lock); -} - static void stop_stack_trace(struct trace_array *tr) { mutex_lock(&sample_timer_lock); @@ -247,7 +238,10 @@ static int stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; - start_stack_trace(tr); + mutex_lock(&sample_timer_lock); + start_stack_timers(); + tracer_enabled = 1; + mutex_unlock(&sample_timer_lock); return 0; } diff --git a/kernel/wait.c b/kernel/wait.c index cd87131f2fc..42a2dbc181c 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -91,6 +91,15 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) } EXPORT_SYMBOL(prepare_to_wait_exclusive); +/* + * finish_wait - clean up after waiting in a queue + * @q: waitqueue waited on + * @wait: wait descriptor + * + * Sets current thread back to running state and removes + * the wait descriptor from the given waitqueue if still + * queued. + */ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; @@ -117,6 +126,39 @@ void finish_wait(wait_queue_head_t *q, wait_queue_t *wait) } EXPORT_SYMBOL(finish_wait); +/* + * abort_exclusive_wait - abort exclusive waiting in a queue + * @q: waitqueue waited on + * @wait: wait descriptor + * @state: runstate of the waiter to be woken + * @key: key to identify a wait bit queue or %NULL + * + * Sets current thread back to running state and removes + * the wait descriptor from the given waitqueue if still + * queued. + * + * Wakes up the next waiter if the caller is concurrently + * woken up through the queue. + * + * This prevents waiter starvation where an exclusive waiter + * aborts and is woken up concurrently and noone wakes up + * the next waiter. + */ +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, + unsigned int mode, void *key) +{ + unsigned long flags; + + __set_current_state(TASK_RUNNING); + spin_lock_irqsave(&q->lock, flags); + if (!list_empty(&wait->task_list)) + list_del_init(&wait->task_list); + else if (waitqueue_active(q)) + __wake_up_common(q, mode, 1, 0, key); + spin_unlock_irqrestore(&q->lock, flags); +} +EXPORT_SYMBOL(abort_exclusive_wait); + int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); @@ -177,17 +219,20 @@ int __sched __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q, int (*action)(void *), unsigned mode) { - int ret = 0; - do { + int ret; + prepare_to_wait_exclusive(wq, &q->wait, mode); - if (test_bit(q->key.bit_nr, q->key.flags)) { - if ((ret = (*action)(q->key.flags))) - break; - } + if (!test_bit(q->key.bit_nr, q->key.flags)) + continue; + ret = action(q->key.flags); + if (!ret) + continue; + abort_exclusive_wait(wq, &q->wait, mode, &q->key); + return ret; } while (test_and_set_bit(q->key.bit_nr, q->key.flags)); finish_wait(wq, &q->wait); - return ret; + return 0; } EXPORT_SYMBOL(__wait_on_bit_lock); diff --git a/mm/memory.c b/mm/memory.c index 22bfa7a47a0..baa999e87cd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1999,7 +1999,7 @@ gotten: * Don't let another task, with possibly unlocked vma, * keep the mlocked page. */ - if (vma->vm_flags & VM_LOCKED) { + if ((vma->vm_flags & VM_LOCKED) && old_page) { lock_page(old_page); /* for LRU manipulation */ clear_page_mlock(old_page); unlock_page(old_page); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 557fe16cbfb..dda42f0bd7a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -663,14 +663,10 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = 0; /* The urg_mode check is necessary during a below snd_una win probe */ - if (unlikely(tcp_urg_mode(tp))) { - if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) { - th->urg_ptr = htons(tp->snd_up - tcb->seq); - th->urg = 1; - } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { - th->urg_ptr = 0xFFFF; - th->urg = 1; - } + if (unlikely(tcp_urg_mode(tp) && + between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { + th->urg_ptr = htons(tp->snd_up - tcb->seq); + th->urg = 1; } tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1ab180bad72..cc3a0a06c00 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1231,7 +1231,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { struct sock *sk; - struct udphdr *uh = udp_hdr(skb); + struct udphdr *uh; unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; __be32 saddr = ip_hdr(skb)->saddr; @@ -1244,6 +1244,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto drop; /* No space for header. */ + uh = udp_hdr(skb); ulen = ntohs(uh->len); if (ulen > skb->len) goto short_packet; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4b15938bef4..9fb49c3b518 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1105,6 +1105,18 @@ static inline int ip6_ufo_append_data(struct sock *sk, return err; } +static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, + gfp_t gfp) +{ + return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; +} + +static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, + gfp_t gfp) +{ + return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; +} + int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -1130,17 +1142,37 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * setup for corking */ if (opt) { - if (np->cork.opt == NULL) { - np->cork.opt = kmalloc(opt->tot_len, - sk->sk_allocation); - if (unlikely(np->cork.opt == NULL)) - return -ENOBUFS; - } else if (np->cork.opt->tot_len < opt->tot_len) { - printk(KERN_DEBUG "ip6_append_data: invalid option length\n"); + if (WARN_ON(np->cork.opt)) return -EINVAL; - } - memcpy(np->cork.opt, opt, opt->tot_len); - inet->cork.flags |= IPCORK_OPT; + + np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); + if (unlikely(np->cork.opt == NULL)) + return -ENOBUFS; + + np->cork.opt->tot_len = opt->tot_len; + np->cork.opt->opt_flen = opt->opt_flen; + np->cork.opt->opt_nflen = opt->opt_nflen; + + np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt, + sk->sk_allocation); + if (opt->dst0opt && !np->cork.opt->dst0opt) + return -ENOBUFS; + + np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt, + sk->sk_allocation); + if (opt->dst1opt && !np->cork.opt->dst1opt) + return -ENOBUFS; + + np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt, + sk->sk_allocation); + if (opt->hopopt && !np->cork.opt->hopopt) + return -ENOBUFS; + + np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt, + sk->sk_allocation); + if (opt->srcrt && !np->cork.opt->srcrt) + return -ENOBUFS; + /* need source address above miyazawa*/ } dst_hold(&rt->u.dst); @@ -1167,8 +1199,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, } else { rt = (struct rt6_info *)inet->cork.dst; fl = &inet->cork.fl; - if (inet->cork.flags & IPCORK_OPT) - opt = np->cork.opt; + opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; mtu = inet->cork.fragsize; @@ -1407,9 +1438,15 @@ error: static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) { - inet->cork.flags &= ~IPCORK_OPT; - kfree(np->cork.opt); - np->cork.opt = NULL; + if (np->cork.opt) { + kfree(np->cork.opt->dst0opt); + kfree(np->cork.opt->dst1opt); + kfree(np->cork.opt->hopopt); + kfree(np->cork.opt->srcrt); + kfree(np->cork.opt); + np->cork.opt = NULL; + } + if (inet->cork.dst) { dst_release(inet->cork.dst); inet->cork.dst = NULL; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index e17836680f4..0a1798eafb0 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -1767,7 +1767,7 @@ static int snd_pcm_oss_get_formats(struct snd_pcm_oss_file *pcm_oss_file) AFMT_S8 | AFMT_U16_LE | AFMT_U16_BE | AFMT_S32_LE | AFMT_S32_BE | - AFMT_S24_LE | AFMT_S24_LE | + AFMT_S24_LE | AFMT_S24_BE | AFMT_S24_PACKED; params = kmalloc(sizeof(*params), GFP_KERNEL); if (!params) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index b7bba7dc7cf..0b708134d12 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -487,7 +487,6 @@ int /*__devinit*/ snd_hda_bus_new(struct snd_card *card, { struct hda_bus *bus; int err; - char qname[8]; static struct snd_device_ops dev_ops = { .dev_register = snd_hda_bus_dev_register, .dev_free = snd_hda_bus_dev_free, @@ -517,10 +516,12 @@ int /*__devinit*/ snd_hda_bus_new(struct snd_card *card, mutex_init(&bus->cmd_mutex); INIT_LIST_HEAD(&bus->codec_list); - snprintf(qname, sizeof(qname), "hda%d", card->number); - bus->workq = create_workqueue(qname); + snprintf(bus->workq_name, sizeof(bus->workq_name), + "hd-audio%d", card->number); + bus->workq = create_singlethread_workqueue(bus->workq_name); if (!bus->workq) { - snd_printk(KERN_ERR "cannot create workqueue %s\n", qname); + snd_printk(KERN_ERR "cannot create workqueue %s\n", + bus->workq_name); kfree(bus); return -ENOMEM; } diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index 5810ef58840..09a332ada0c 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -614,6 +614,7 @@ struct hda_bus { /* unsolicited event queue */ struct hda_bus_unsolicited *unsol; + char workq_name[16]; struct workqueue_struct *workq; /* common workqueue for codecs */ /* assigned PCMs */ diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c index 7ca66d65414..144b85276d5 100644 --- a/sound/pci/hda/hda_proc.c +++ b/sound/pci/hda/hda_proc.c @@ -399,7 +399,8 @@ static void print_conn_list(struct snd_info_buffer *buffer, { int c, curr = -1; - if (conn_len > 1 && wid_type != AC_WID_AUD_MIX) + if (conn_len > 1 && wid_type != AC_WID_AUD_MIX && + wid_type != AC_WID_VOL_KNB) curr = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_CONNECT_SEL, 0); snd_iprintf(buffer, " Connection: %d\n", conn_len); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 7884a4e0706..ae5c8a0d147 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1037,6 +1037,7 @@ do_sku: case 0x10ec0267: case 0x10ec0268: case 0x10ec0269: + case 0x10ec0272: case 0x10ec0660: case 0x10ec0662: case 0x10ec0663: @@ -1065,6 +1066,7 @@ do_sku: case 0x10ec0882: case 0x10ec0883: case 0x10ec0885: + case 0x10ec0887: case 0x10ec0889: snd_hda_codec_write(codec, 0x20, 0, AC_VERB_SET_COEF_INDEX, 7); @@ -7012,6 +7014,7 @@ static int patch_alc882(struct hda_codec *codec) break; case 0x106b1000: /* iMac 24 */ case 0x106b2800: /* AppleTV */ + case 0x106b3e00: /* iMac 24 Aluminium */ board_config = ALC885_IMAC24; break; case 0x106b00a1: /* Macbook (might be wrong - PCI SSID?) */ @@ -8514,6 +8517,8 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = { SND_PCI_QUIRK(0x1558, 0, "Clevo laptop", ALC883_LAPTOP_EAPD), SND_PCI_QUIRK(0x15d9, 0x8780, "Supermicro PDSBA", ALC883_3ST_6ch), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_MEDION), + SND_PCI_QUIRK(0x1734, 0x1107, "FSC AMILO Xi2550", + ALC883_FUJITSU_PI2515), SND_PCI_QUIRK(0x1734, 0x1108, "Fujitsu AMILO Pi2515", ALC883_FUJITSU_PI2515), SND_PCI_QUIRK(0x1734, 0x113d, "Fujitsu AMILO Xa3530", ALC888_FUJITSU_XA3530), diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index b787b3cc096..38428e22428 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1804,6 +1804,8 @@ static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = { "HP dv4", STAC_HP_DV5), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x30fc, "HP dv7", STAC_HP_M4), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3600, + "HP dv5", STAC_HP_DV5), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3603, "HP dv5", STAC_HP_DV5), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x361a, diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c index 19d3391e229..e900cdc8484 100644 --- a/sound/pci/intel8x0.c +++ b/sound/pci/intel8x0.c @@ -617,7 +617,7 @@ static int snd_intel8x0_ali_codec_semaphore(struct intel8x0 *chip) int time = 100; if (chip->buggy_semaphore) return 0; /* just ignore ... */ - while (time-- && (igetdword(chip, ICHREG(ALI_CAS)) & ALI_CAS_SEM_BUSY)) + while (--time && (igetdword(chip, ICHREG(ALI_CAS)) & ALI_CAS_SEM_BUSY)) udelay(1); if (! time && ! chip->in_ac97_init) snd_printk(KERN_WARNING "ali_codec_semaphore timeout\n"); diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index c5d67900d66..ff0054b7650 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -10,7 +10,7 @@ * Based on at91-ssc.c by * Frank Mandarino <fmandarino@endrelia.com> * Based on pxa2xx Platform drivers by - * Liam Girdwood <liam.girdwood@wolfsonmicro.com> + * Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/sound/soc/atmel/atmel_ssc_dai.h b/sound/soc/atmel/atmel_ssc_dai.h index a828746e8a2..391135f9c6c 100644 --- a/sound/soc/atmel/atmel_ssc_dai.h +++ b/sound/soc/atmel/atmel_ssc_dai.h @@ -10,7 +10,7 @@ * Based on at91-ssc.c by * Frank Mandarino <fmandarino@endrelia.com> * Based on pxa2xx Platform drivers by - * Liam Girdwood <liam.girdwood@wolfsonmicro.com> + * Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c index e3989d406f5..35d99750c38 100644 --- a/sound/soc/codecs/wm8350.c +++ b/sound/soc/codecs/wm8350.c @@ -3,7 +3,7 @@ * * Copyright (C) 2007, 2008 Wolfson Microelectronics PLC. * - * Author: Liam Girdwood <lg@opensource.wolfsonmicro.com> + * Author: Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/sound/soc/codecs/wm8990.c b/sound/soc/codecs/wm8990.c index 5b5afc14447..1cbb7b9b51c 100644 --- a/sound/soc/codecs/wm8990.c +++ b/sound/soc/codecs/wm8990.c @@ -2,8 +2,7 @@ * wm8990.c -- WM8990 ALSA Soc Audio driver * * Copyright 2008 Wolfson Microelectronics PLC. - * Author: Liam Girdwood - * lg@opensource.wolfsonmicro.com or linux@wolfsonmicro.com + * Author: Liam Girdwood <lrg@slimlogic.co.uk> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c index b0362dfd5b7..dd3bb293376 100644 --- a/sound/soc/omap/omap-pcm.c +++ b/sound/soc/omap/omap-pcm.c @@ -175,9 +175,10 @@ static int omap_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_pcm_runtime *runtime = substream->runtime; struct omap_runtime_data *prtd = runtime->private_data; + unsigned long flags; int ret = 0; - spin_lock_irq(&prtd->lock); + spin_lock_irqsave(&prtd->lock, flags); switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: @@ -195,7 +196,7 @@ static int omap_pcm_trigger(struct snd_pcm_substream *substream, int cmd) default: ret = -EINVAL; } - spin_unlock_irq(&prtd->lock); + spin_unlock_irqrestore(&prtd->lock, flags); return ret; } diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index c709b956322..2ab83129d9b 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -2966,6 +2966,7 @@ static int create_fixed_stream_quirk(struct snd_usb_audio *chip, return -EINVAL; } alts = &iface->altsetting[fp->altset_idx]; + fp->maxpacksize = le16_to_cpu(get_endpoint(alts, 0)->wMaxPacketSize); usb_set_interface(chip->dev, fp->iface, 0); init_usb_pitch(chip->dev, fp->iface, alts, fp); init_usb_sample_rate(chip->dev, fp->iface, alts, fp, fp->rate_max); |