diff options
33 files changed, 1315 insertions, 613 deletions
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index c050a8d76df..8fed45a2fb8 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -19,6 +19,7 @@ #include <linux/smc91x.h> #include <linux/gpio.h> #include <linux/input.h> +#include <linux/usb/r8a66597.h> #include <video/sh_mobile_lcdc.h> #include <media/sh_mobile_ceu.h> #include <asm/io.h> @@ -302,6 +303,34 @@ static struct platform_device sh_eth_device = { .resource = sh_eth_resources, }; +static struct r8a66597_platdata sh7724_usb0_host_data = { +}; + +static struct resource sh7724_usb0_host_resources[] = { + [0] = { + .start = 0xa4d80000, + .end = 0xa4d800ff, + .flags = IORESOURCE_MEM, + }, + [1] = { + .start = 65, + .end = 65, + .flags = IORESOURCE_IRQ | IRQF_TRIGGER_LOW, + }, +}; + +static struct platform_device sh7724_usb0_host_device = { + .name = "r8a66597_hcd", + .id = 0, + .dev = { + .dma_mask = NULL, /* not use dma */ + .coherent_dma_mask = 0xffffffff, + .platform_data = &sh7724_usb0_host_data, + }, + .num_resources = ARRAY_SIZE(sh7724_usb0_host_resources), + .resource = sh7724_usb0_host_resources, +}; + static struct platform_device *ms7724se_devices[] __initdata = { &heartbeat_device, &smc91x_eth_device, @@ -311,6 +340,7 @@ static struct platform_device *ms7724se_devices[] __initdata = { &ceu1_device, &keysc_device, &sh_eth_device, + &sh7724_usb0_host_device, }; #define EEPROM_OP 0xBA206000 @@ -364,6 +394,7 @@ static void __init sh_eth_init(void) #define SW4140 0xBA201000 #define FPGA_OUT 0xBA200400 #define PORT_HIZA 0xA4050158 +#define PORT_MSELCRB 0xA4050182 #define SW41_A 0x0100 #define SW41_B 0x0200 @@ -373,6 +404,7 @@ static void __init sh_eth_init(void) #define SW41_F 0x2000 #define SW41_G 0x4000 #define SW41_H 0x8000 + static int __init devices_setup(void) { u16 sw = ctrl_inw(SW4140); /* select camera, monitor */ @@ -385,6 +417,12 @@ static int __init devices_setup(void) (1 << 14)), /* RMII */ FPGA_OUT); + /* turn on USB clocks, use external clock */ + ctrl_outw((ctrl_inw(PORT_MSELCRB) & ~0xc000) | 0x8000, PORT_MSELCRB); + + /* enable USB0 port */ + ctrl_outw(0x0600, 0xa40501d4); + /* enable IRQ 0,1,2 */ gpio_request(GPIO_FN_INTC_IRQ0, NULL); gpio_request(GPIO_FN_INTC_IRQ1, NULL); diff --git a/arch/sh/include/asm/hwblk.h b/arch/sh/include/asm/hwblk.h new file mode 100644 index 00000000000..51a46f49663 --- /dev/null +++ b/arch/sh/include/asm/hwblk.h @@ -0,0 +1,61 @@ +#ifndef __ASM_SH_HWBLK_H +#define __ASM_SH_HWBLK_H + +#include <asm/clock.h> +#include <asm/io.h> + +#define HWBLK_AREA_FLAG_PARENT (1 << 0) /* valid parent */ + +#define HWBLK_AREA(_flags, _parent) \ +{ \ + .flags = _flags, \ + .parent = _parent, \ +} + +struct hwblk_area { + unsigned long cnt; + unsigned char parent; + unsigned char flags; +}; + +#define HWBLK(_mstp, _bit, _area) \ +{ \ + .mstp = (void __iomem *)_mstp, \ + .bit = _bit, \ + .area = _area, \ +} + +struct hwblk { + void __iomem *mstp; + unsigned char bit; + unsigned char area; + unsigned long cnt; +}; + +struct hwblk_info { + struct hwblk_area *areas; + int nr_areas; + struct hwblk *hwblks; + int nr_hwblks; +}; + +/* Should be defined by processor-specific code */ +int arch_hwblk_init(void); +int arch_hwblk_sleep_mode(void); + +int hwblk_register(struct hwblk_info *info); +int hwblk_init(void); + +/* allow clocks to enable and disable hardware blocks */ +#define SH_HWBLK_CLK(_name, _id, _parent, _hwblk, _flags) \ +{ \ + .name = _name, \ + .id = _id, \ + .parent = _parent, \ + .arch_flags = _hwblk, \ + .flags = _flags, \ +} + +int sh_hwblk_clk_register(struct clk *clks, int nr); + +#endif /* __ASM_SH_HWBLK_H */ diff --git a/arch/sh/include/asm/suspend.h b/arch/sh/include/asm/suspend.h index b1b995370e7..5c8ea28ff7a 100644 --- a/arch/sh/include/asm/suspend.h +++ b/arch/sh/include/asm/suspend.h @@ -10,6 +10,15 @@ struct swsusp_arch_regs { struct pt_regs user_regs; unsigned long bank1_regs[8]; }; + +void sh_mobile_call_standby(unsigned long mode); + +#ifdef CONFIG_CPU_IDLE +void sh_mobile_setup_cpuidle(void); +#else +static inline void sh_mobile_setup_cpuidle(void) {} +#endif + #endif /* flags passed to assembly suspend code */ diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h index 738ea43c503..48560407cbe 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7722.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h @@ -221,4 +221,18 @@ enum { GPIO_FN_KEYOUT3, GPIO_FN_KEYOUT4_IN6, GPIO_FN_KEYOUT5_IN5, }; +enum { + HWBLK_UNKNOWN = 0, + HWBLK_TLB, HWBLK_IC, HWBLK_OC, HWBLK_URAM, HWBLK_XYMEM, + HWBLK_INTC, HWBLK_DMAC, HWBLK_SHYWAY, HWBLK_HUDI, + HWBLK_UBC, HWBLK_TMU, HWBLK_CMT, HWBLK_RWDT, HWBLK_FLCTL, + HWBLK_SCIF0, HWBLK_SCIF1, HWBLK_SCIF2, HWBLK_SIO, + HWBLK_SIOF0, HWBLK_SIOF1, HWBLK_IIC, HWBLK_RTC, + HWBLK_TPU, HWBLK_IRDA, HWBLK_SDHI, HWBLK_SIM, HWBLK_KEYSC, + HWBLK_TSIF, HWBLK_USBF, HWBLK_2DG, HWBLK_SIU, HWBLK_VOU, + HWBLK_JPU, HWBLK_BEU, HWBLK_CEU, HWBLK_VEU, HWBLK_VPU, + HWBLK_LCDC, + HWBLK_NR, +}; + #endif /* __ASM_SH7722_H__ */ diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile index eecad7cbd61..3d6b9312dc4 100644 --- a/arch/sh/kernel/cpu/Makefile +++ b/arch/sh/kernel/cpu/Makefile @@ -19,4 +19,4 @@ obj-$(CONFIG_UBC_WAKEUP) += ubc.o obj-$(CONFIG_SH_ADC) += adc.o obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o -obj-y += irq/ init.o clock.o +obj-y += irq/ init.o clock.o hwblk.o diff --git a/arch/sh/kernel/cpu/hwblk.c b/arch/sh/kernel/cpu/hwblk.c new file mode 100644 index 00000000000..7c3a73deff2 --- /dev/null +++ b/arch/sh/kernel/cpu/hwblk.c @@ -0,0 +1,130 @@ +#include <linux/clk.h> +#include <linux/compiler.h> +#include <linux/slab.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <asm/suspend.h> +#include <asm/hwblk.h> +#include <asm/clock.h> + +static DEFINE_SPINLOCK(hwblk_lock); + +static void hwblk_area_inc(struct hwblk_info *info, int area) +{ + struct hwblk_area *hap = info->areas + area; + + hap->cnt++; + if (hap->cnt == 1) + if (hap->flags & HWBLK_AREA_FLAG_PARENT) + hwblk_area_inc(info, hap->parent); +} + +static void hwblk_area_dec(struct hwblk_info *info, int area) +{ + struct hwblk_area *hap = info->areas + area; + + if (hap->cnt == 1) + if (hap->flags & HWBLK_AREA_FLAG_PARENT) + hwblk_area_dec(info, hap->parent); + hap->cnt--; +} + +static void hwblk_enable(struct hwblk_info *info, int hwblk) +{ + struct hwblk *hp = info->hwblks + hwblk; + unsigned long tmp; + unsigned long flags; + + spin_lock_irqsave(&hwblk_lock, flags); + + hp->cnt++; + if (hp->cnt == 1) { + hwblk_area_inc(info, hp->area); + + tmp = __raw_readl(hp->mstp); + tmp &= ~(1 << hp->bit); + __raw_writel(tmp, hp->mstp); + } + + spin_unlock_irqrestore(&hwblk_lock, flags); +} + +static void hwblk_disable(struct hwblk_info *info, int hwblk) +{ + struct hwblk *hp = info->hwblks + hwblk; + unsigned long tmp; + unsigned long flags; + + spin_lock_irqsave(&hwblk_lock, flags); + + if (hp->cnt == 1) { + hwblk_area_dec(info, hp->area); + + tmp = __raw_readl(hp->mstp); + tmp |= 1 << hp->bit; + __raw_writel(tmp, hp->mstp); + } + hp->cnt--; + + spin_unlock_irqrestore(&hwblk_lock, flags); +} + +static struct hwblk_info *hwblk_info; + +int __init hwblk_register(struct hwblk_info *info) +{ + hwblk_info = info; + return 0; +} + +int __init __weak arch_hwblk_init(void) +{ + return 0; +} + +int __weak arch_hwblk_sleep_mode(void) +{ + return SUSP_SH_SLEEP; +} + +int __init hwblk_init(void) +{ + return arch_hwblk_init(); +} + +/* allow clocks to enable and disable hardware blocks */ +static int sh_hwblk_clk_enable(struct clk *clk) +{ + if (!hwblk_info) + return -ENOENT; + + hwblk_enable(hwblk_info, clk->arch_flags); + return 0; +} + +static void sh_hwblk_clk_disable(struct clk *clk) +{ + if (hwblk_info) + hwblk_disable(hwblk_info, clk->arch_flags); +} + +static struct clk_ops sh_hwblk_clk_ops = { + .enable = sh_hwblk_clk_enable, + .disable = sh_hwblk_clk_disable, + .recalc = followparent_recalc, +}; + +int __init sh_hwblk_clk_register(struct clk *clks, int nr) +{ + struct clk *clkp; + int ret = 0; + int k; + + for (k = 0; !ret && (k < nr); k++) { + clkp = clks + k; + clkp->ops = &sh_hwblk_clk_ops; + ret |= clk_register(clkp); + } + + return ret; +} diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index ebdd391d5f4..3cafda69637 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile @@ -25,7 +25,7 @@ clock-$(CONFIG_CPU_SUBTYPE_SH7780) := clock-sh7780.o clock-$(CONFIG_CPU_SUBTYPE_SH7785) := clock-sh7785.o clock-$(CONFIG_CPU_SUBTYPE_SH7786) := clock-sh7786.o clock-$(CONFIG_CPU_SUBTYPE_SH7343) := clock-sh7343.o -clock-$(CONFIG_CPU_SUBTYPE_SH7722) := clock-sh7722.o +clock-$(CONFIG_CPU_SUBTYPE_SH7722) := clock-sh7722.o hwblk-sh7722.o clock-$(CONFIG_CPU_SUBTYPE_SH7723) := clock-sh7723.o clock-$(CONFIG_CPU_SUBTYPE_SH7724) := clock-sh7724.o clock-$(CONFIG_CPU_SUBTYPE_SH7366) := clock-sh7366.o diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c index 40f859354f7..1fa9e1dd1cc 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7722.c @@ -22,6 +22,8 @@ #include <linux/kernel.h> #include <linux/io.h> #include <asm/clock.h> +#include <asm/hwblk.h> +#include <cpu/sh7722.h> /* SH7722 registers */ #define FRQCR 0xa4150000 @@ -140,35 +142,37 @@ struct clk div6_clks[] = { SH_CLK_DIV6("video_clk", &pll_clk, VCLKCR, 0), }; -#define MSTP(_str, _parent, _reg, _bit, _flags) \ - SH_CLK_MSTP32(_str, -1, _parent, _reg, _bit, _flags) +#define R_CLK &r_clk +#define P_CLK &div4_clks[DIV4_P] +#define B_CLK &div4_clks[DIV4_B] +#define U_CLK &div4_clks[DIV4_U] static struct clk mstp_clks[] = { - MSTP("uram0", &div4_clks[DIV4_U], MSTPCR0, 28, CLK_ENABLE_ON_INIT), - MSTP("xymem0", &div4_clks[DIV4_B], MSTPCR0, 26, CLK_ENABLE_ON_INIT), - MSTP("tmu0", &div4_clks[DIV4_P], MSTPCR0, 15, 0), - MSTP("cmt0", &r_clk, MSTPCR0, 14, 0), - MSTP("rwdt0", &r_clk, MSTPCR0, 13, 0), - MSTP("flctl0", &div4_clks[DIV4_P], MSTPCR0, 10, 0), - MSTP("scif0", &div4_clks[DIV4_P], MSTPCR0, 7, 0), - MSTP("scif1", &div4_clks[DIV4_P], MSTPCR0, 6, 0), - MSTP("scif2", &div4_clks[DIV4_P], MSTPCR0, 5, 0), - - MSTP("i2c0", &div4_clks[DIV4_P], MSTPCR1, 9, 0), - MSTP("rtc0", &r_clk, MSTPCR1, 8, 0), - - MSTP("sdhi0", &div4_clks[DIV4_P], MSTPCR2, 18, 0), - MSTP("keysc0", &r_clk, MSTPCR2, 14, 0), - MSTP("usbf0", &div4_clks[DIV4_P], MSTPCR2, 11, 0), - MSTP("2dg0", &div4_clks[DIV4_B], MSTPCR2, 9, 0), - MSTP("siu0", &div4_clks[DIV4_B], MSTPCR2, 8, 0), - MSTP("vou0", &div4_clks[DIV4_B], MSTPCR2, 5, 0), - MSTP("jpu0", &div4_clks[DIV4_B], MSTPCR2, 6, CLK_ENABLE_ON_INIT), - MSTP("beu0", &div4_clks[DIV4_B], MSTPCR2, 4, 0), - MSTP("ceu0", &div4_clks[DIV4_B], MSTPCR2, 3, 0), - MSTP("veu0", &div4_clks[DIV4_B], MSTPCR2, 2, CLK_ENABLE_ON_INIT), - MSTP("vpu0", &div4_clks[DIV4_B], MSTPCR2, 1, CLK_ENABLE_ON_INIT), - MSTP("lcdc0", &div4_clks[DIV4_B], MSTPCR2, 0, 0), + SH_HWBLK_CLK("uram0", -1, U_CLK, HWBLK_URAM, CLK_ENABLE_ON_INIT), + SH_HWBLK_CLK("xymem0", -1, B_CLK, HWBLK_XYMEM, CLK_ENABLE_ON_INIT), + SH_HWBLK_CLK("tmu0", -1, P_CLK, HWBLK_TMU, 0), + SH_HWBLK_CLK("cmt0", -1, R_CLK, HWBLK_CMT, 0), + SH_HWBLK_CLK("rwdt0", -1, R_CLK, HWBLK_RWDT, 0), + SH_HWBLK_CLK("flctl0", -1, P_CLK, HWBLK_FLCTL, 0), + SH_HWBLK_CLK("scif0", -1, P_CLK, HWBLK_SCIF0, 0), + SH_HWBLK_CLK("scif1", -1, P_CLK, HWBLK_SCIF1, 0), + SH_HWBLK_CLK("scif2", -1, P_CLK, HWBLK_SCIF2, 0), + + SH_HWBLK_CLK("i2c0", -1, P_CLK, HWBLK_IIC, 0), + SH_HWBLK_CLK("rtc0", -1, R_CLK, HWBLK_RTC, 0), + + SH_HWBLK_CLK("sdhi0", -1, P_CLK, HWBLK_SDHI, 0), + SH_HWBLK_CLK("keysc0", -1, R_CLK, HWBLK_KEYSC, 0), + SH_HWBLK_CLK("usbf0", -1, P_CLK, HWBLK_USBF, 0), + SH_HWBLK_CLK("2dg0", -1, B_CLK, HWBLK_2DG, 0), + SH_HWBLK_CLK("siu0", -1, B_CLK, HWBLK_SIU, 0), + SH_HWBLK_CLK("vou0", -1, B_CLK, HWBLK_VOU, 0), + SH_HWBLK_CLK("jpu0", -1, B_CLK, HWBLK_JPU, CLK_ENABLE_ON_INIT), + SH_HWBLK_CLK("beu0", -1, B_CLK, HWBLK_BEU, 0), + SH_HWBLK_CLK("ceu0", -1, B_CLK, HWBLK_CEU, 0), + SH_HWBLK_CLK("veu0", -1, B_CLK, HWBLK_VEU, CLK_ENABLE_ON_INIT), + SH_HWBLK_CLK("vpu0", -1, B_CLK, HWBLK_VPU, CLK_ENABLE_ON_INIT), + SH_HWBLK_CLK("lcdc0", -1, P_CLK, HWBLK_LCDC, 0), }; int __init arch_clk_init(void) @@ -191,7 +195,7 @@ int __init arch_clk_init(void) ret = sh_clk_div6_register(div6_clks, ARRAY_SIZE(div6_clks)); if (!ret) - ret = sh_clk_mstp32_register(mstp_clks, ARRAY_SIZE(mstp_clks)); + ret = sh_hwblk_clk_register(mstp_clks, ARRAY_SIZE(mstp_clks)); return ret; } diff --git a/arch/sh/kernel/cpu/sh4a/hwblk-sh7722.c b/arch/sh/kernel/cpu/sh4a/hwblk-sh7722.c new file mode 100644 index 00000000000..00a1c02d82b --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/hwblk-sh7722.c @@ -0,0 +1,106 @@ +/* + * arch/sh/kernel/cpu/sh4a/hwblk-sh7722.c + * + * SH7722 hardware block support + * + * Copyright (C) 2009 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <asm/suspend.h> +#include <asm/hwblk.h> +#include <cpu/sh7722.h> + +/* SH7722 registers */ +#define MSTPCR0 0xa4150030 +#define MSTPCR1 0xa4150034 +#define MSTPCR2 0xa4150038 + +/* SH7722 Power Domains */ +enum { CORE_AREA, SUB_AREA, CORE_AREA_BM }; +static struct hwblk_area sh7722_hwblk_area[] = { + [CORE_AREA] = HWBLK_AREA(0, 0), + [CORE_AREA_BM] = HWBLK_AREA(HWBLK_AREA_FLAG_PARENT, CORE_AREA), + [SUB_AREA] = HWBLK_AREA(0, 0), +}; + +/* Table mapping HWBLK to Module Stop Bit and Power Domain */ +static struct hwblk sh7722_hwblk[HWBLK_NR] = { + [HWBLK_TLB] = HWBLK(MSTPCR0, 31, CORE_AREA), + [HWBLK_IC] = HWBLK(MSTPCR0, 30, CORE_AREA), + [HWBLK_OC] = HWBLK(MSTPCR0, 29, CORE_AREA), + [HWBLK_URAM] = HWBLK(MSTPCR0, 28, CORE_AREA), + [HWBLK_XYMEM] = HWBLK(MSTPCR0, 26, CORE_AREA), + [HWBLK_INTC] = HWBLK(MSTPCR0, 22, CORE_AREA), + [HWBLK_DMAC] = HWBLK(MSTPCR0, 21, CORE_AREA_BM), + [HWBLK_SHYWAY] = HWBLK(MSTPCR0, 20, CORE_AREA), + [HWBLK_HUDI] = HWBLK(MSTPCR0, 19, CORE_AREA), + [HWBLK_UBC] = HWBLK(MSTPCR0, 17, CORE_AREA), + [HWBLK_TMU] = HWBLK(MSTPCR0, 15, CORE_AREA), + [HWBLK_CMT] = HWBLK(MSTPCR0, 14, SUB_AREA), + [HWBLK_RWDT] = HWBLK(MSTPCR0, 13, SUB_AREA), + [HWBLK_FLCTL] = HWBLK(MSTPCR0, 10, CORE_AREA), + [HWBLK_SCIF0] = HWBLK(MSTPCR0, 7, CORE_AREA), + [HWBLK_SCIF1] = HWBLK(MSTPCR0, 6, CORE_AREA), + [HWBLK_SCIF2] = HWBLK(MSTPCR0, 5, CORE_AREA), + [HWBLK_SIO] = HWBLK(MSTPCR0, 3, CORE_AREA), + [HWBLK_SIOF0] = HWBLK(MSTPCR0, 2, CORE_AREA), + [HWBLK_SIOF1] = HWBLK(MSTPCR0, 1, CORE_AREA), + + [HWBLK_IIC] = HWBLK(MSTPCR1, 9, CORE_AREA), + [HWBLK_RTC] = HWBLK(MSTPCR1, 8, SUB_AREA), + + [HWBLK_TPU] = HWBLK(MSTPCR2, 25, CORE_AREA), + [HWBLK_IRDA] = HWBLK(MSTPCR2, 24, CORE_AREA), + [HWBLK_SDHI] = HWBLK(MSTPCR2, 18, CORE_AREA), + [HWBLK_SIM] = HWBLK(MSTPCR2, 16, CORE_AREA), + [HWBLK_KEYSC] = HWBLK(MSTPCR2, 14, SUB_AREA), + [HWBLK_TSIF] = HWBLK(MSTPCR2, 13, SUB_AREA), + [HWBLK_USBF] = HWBLK(MSTPCR2, 11, CORE_AREA), + [HWBLK_2DG] = HWBLK(MSTPCR2, 9, CORE_AREA_BM), + [HWBLK_SIU] = HWBLK(MSTPCR2, 8, CORE_AREA), + [HWBLK_JPU] = HWBLK(MSTPCR2, 6, CORE_AREA_BM), + [HWBLK_VOU] = HWBLK(MSTPCR2, 5, CORE_AREA_BM), + [HWBLK_BEU] = HWBLK(MSTPCR2, 4, CORE_AREA_BM), + [HWBLK_CEU] = HWBLK(MSTPCR2, 3, CORE_AREA_BM), + [HWBLK_VEU] = HWBLK(MSTPCR2, 2, CORE_AREA_BM), + [HWBLK_VPU] = HWBLK(MSTPCR2, 1, CORE_AREA_BM), + [HWBLK_LCDC] = HWBLK(MSTPCR2, 0, CORE_AREA_BM), +}; + +static struct hwblk_info sh7722_hwblk_info = { + .areas = sh7722_hwblk_area, + .nr_areas = ARRAY_SIZE(sh7722_hwblk_area), + .hwblks = sh7722_hwblk, + .nr_hwblks = ARRAY_SIZE(sh7722_hwblk), +}; + +int arch_hwblk_sleep_mode(void) +{ + if (!sh7722_hwblk_area[CORE_AREA].cnt) + return SUSP_SH_STANDBY | SUSP_SH_SF; + + if (!sh7722_hwblk_area[CORE_AREA_BM].cnt) + return SUSP_SH_SLEEP | SUSP_SH_SF; + + return SUSP_SH_SLEEP; +} + +int __init arch_hwblk_init(void) +{ + return hwblk_register(&sh7722_hwblk_info); +} diff --git a/arch/sh/kernel/cpu/shmobile/Makefile b/arch/sh/kernel/cpu/shmobile/Makefile index 08bfa7c7db2..e8a5111e848 100644 --- a/arch/sh/kernel/cpu/shmobile/Makefile +++ b/arch/sh/kernel/cpu/shmobile/Makefile @@ -4,3 +4,4 @@ # Power Management & Sleep mode obj-$(CONFIG_PM) += pm.o sleep.o +obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c new file mode 100644 index 00000000000..4afdd975cc6 --- /dev/null +++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c @@ -0,0 +1,102 @@ +/* + * arch/sh/kernel/cpu/shmobile/cpuidle.c + * + * Cpuidle support code for SuperH Mobile + * + * Copyright (C) 2009 Magnus Damm + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/io.h> +#include <linux/suspend.h> +#include <linux/cpuidle.h> +#include <asm/suspend.h> +#include <asm/uaccess.h> +#include <asm/hwblk.h> + +static unsigned long cpuidle_mode[] = { + SUSP_SH_SLEEP, /* regular sleep mode */ + SUSP_SH_SLEEP | SUSP_SH_SF, /* sleep mode + self refresh */ +}; + +static int cpuidle_sleep_enter(struct cpuidle_device *dev, + struct cpuidle_state *state) +{ + unsigned long allowed_mode = arch_hwblk_sleep_mode(); + ktime_t before, after; + int requested_state = state - &dev->states[0]; + int allowed_state; + int k; + + /* convert allowed mode to allowed state */ + for (k = ARRAY_SIZE(cpuidle_mode) - 1; k > 0; k--) + if (cpuidle_mode[k] == allowed_mode) + break; + + allowed_state = k; + + /* take the following into account for sleep mode selection: + * - allowed_state: best mode allowed by hardware (clock deps) + * - requested_state: best mode allowed by software (latencies) + */ + k = min_t(int, allowed_state, requested_state); + + dev->last_state = &dev->states[k]; + before = ktime_get(); + sh_mobile_call_standby(cpuidle_mode[k]); + after = ktime_get(); + return ktime_to_ns(ktime_sub(after, before)) >> 10; +} + +static struct cpuidle_device cpuidle_dev; +static struct cpuidle_driver cpuidle_driver = { + .name = "sh_idle", + .owner = THIS_MODULE, +}; + +void sh_mobile_setup_cpuidle(void) +{ + struct cpuidle_device *dev = &cpuidle_dev; + struct cpuidle_state *state; + int i; + + cpuidle_register_driver(&cpuidle_driver); + + for (i = 0; i < CPUIDLE_STATE_MAX; i++) { + dev->states[i].name[0] = '\0'; + dev->states[i].desc[0] = '\0'; + } + + i = CPUIDLE_DRIVER_STATE_START; + + state = &dev->states[i++]; + snprintf(state->name, CPUIDLE_NAME_LEN, "C0"); + strncpy(state->desc, "SuperH Sleep Mode", CPUIDLE_DESC_LEN); + state->exit_latency = 1; + state->target_residency = 1 * 2; + state->power_usage = 3; + state->flags = 0; + state->flags |= CPUIDLE_FLAG_SHALLOW; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = cpuidle_sleep_enter; + + dev->safe_state = state; + + state = &dev->states[i++]; + snprintf(state->name, CPUIDLE_NAME_LEN, "C1"); + strncpy(state->desc, "SuperH Sleep Mode [SF]", CPUIDLE_DESC_LEN); + state->exit_latency = 100; + state->target_residency = 1 * 2; + state->power_usage = 1; + state->flags = 0; + state->flags |= CPUIDLE_FLAG_TIME_VALID; + state->enter = cpuidle_sleep_enter; + + dev->state_count = i; + + cpuidle_register_device(dev); +} diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c index 8c067adf683..de078d24ce5 100644 --- a/arch/sh/kernel/cpu/shmobile/pm.c +++ b/arch/sh/kernel/cpu/shmobile/pm.c @@ -1,5 +1,5 @@ /* - * arch/sh/kernel/cpu/sh4a/pm-sh_mobile.c + * arch/sh/kernel/cpu/shmobile/pm.c * * Power management support code for SuperH Mobile * @@ -32,20 +32,17 @@ * * R-standby mode is unsupported, but will be added in the future * U-standby mode is low priority since it needs bootloader hacks - * - * All modes should be tied in with cpuidle. But before that can - * happen we need to keep track of enabled hardware blocks so we - * can avoid entering sleep modes that stop clocks to hardware - * blocks that are in use even though the cpu core is idle. */ +#define ILRAM_BASE 0xe5200000 + extern const unsigned char sh_mobile_standby[]; extern const unsigned int sh_mobile_standby_size; -static void sh_mobile_call_standby(unsigned long mode) +void sh_mobile_call_standby(unsigned long mode) { extern void *vbr_base; - void *onchip_mem = (void *)0xe5200000; /* ILRAM */ + void *onchip_mem = (void *)ILRAM_BASE; void (*standby_onchip_mem)(unsigned long) = onchip_mem; /* Note: Wake up from sleep may generate exceptions! @@ -55,11 +52,6 @@ static void sh_mobile_call_standby(unsigned long mode) if (mode & SUSP_SH_SF) asm volatile("ldc %0, vbr" : : "r" (onchip_mem) : "memory"); - /* Copy the assembly snippet to the otherwise ununsed ILRAM */ - memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size); - wmb(); - ctrl_barrier(); - /* Let assembly snippet in on-chip memory handle the rest */ standby_onchip_mem(mode); @@ -85,7 +77,15 @@ static struct platform_suspend_ops sh_pm_ops = { static int __init sh_pm_init(void) { + void *onchip_mem = (void *)ILRAM_BASE; + + /* Copy the assembly snippet to the otherwise ununsed ILRAM */ + memcpy(onchip_mem, sh_mobile_standby, sh_mobile_standby_size); + wmb(); + ctrl_barrier(); + suspend_set_ops(&sh_pm_ops); + sh_mobile_setup_cpuidle(); return 0; } diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index 9b352a1e3fb..d2424b068b7 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -21,6 +21,7 @@ #include <linux/smp.h> #include <linux/rtc.h> #include <asm/clock.h> +#include <asm/hwblk.h> #include <asm/rtc.h> /* Dummy RTC ops */ @@ -96,6 +97,7 @@ void __init time_init(void) if (board_time_init) board_time_init(); + hwblk_init(); clk_init(); rtc_sh_get_time(&xtime); diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d1430ef6b4f..c07f7220590 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1913,25 +1913,14 @@ config DMAR_DEFAULT_ON recommended you say N here while the DMAR code remains experimental. -config DMAR_GFX_WA - def_bool y - prompt "Support for Graphics workaround" - depends on DMAR - ---help--- - Current Graphics drivers tend to use physical address - for DMA and avoid using DMA APIs. Setting this config - option permits the IOMMU driver to set a unity map for - all the OS-visible memory. Hence the driver can continue - to use physical addresses for DMA. - config DMAR_FLOPPY_WA def_bool y depends on DMAR ---help--- - Floppy disk drivers are know to bypass DMA API calls + Floppy disk drivers are known to bypass DMA API calls thereby failing to work when IOMMU is enabled. This workaround will setup a 1:1 mapping for the first - 16M to make floppy (an ISA device) work. + 16MiB to make floppy (an ISA device) work. config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 47630479b06..1a041bcf506 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -211,11 +211,11 @@ static __init int iommu_setup(char *p) #ifdef CONFIG_SWIOTLB if (!strncmp(p, "soft", 4)) swiotlb = 1; +#endif if (!strncmp(p, "pt", 2)) { iommu_pass_through = 1; return 1; } -#endif gart_parse_options(p); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 420afa88728..53075424a43 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -56,14 +56,32 @@ #define MAX_AGAW_WIDTH 64 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) +#define DOMAIN_MAX_PFN(gaw) ((((u64)1) << (gaw-VTD_PAGE_SHIFT)) - 1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) -#ifndef PHYSICAL_PAGE_MASK -#define PHYSICAL_PAGE_MASK PAGE_MASK -#endif + +/* VT-d pages must always be _smaller_ than MM pages. Otherwise things + are never going to work. */ +static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) +{ + return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT); +} + +static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +{ + return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); +} +static inline unsigned long page_to_dma_pfn(struct page *pg) +{ + return mm_to_dma_pfn(page_to_pfn(pg)); +} +static inline unsigned long virt_to_dma_pfn(void *p) +{ + return page_to_dma_pfn(virt_to_page(p)); +} /* global iommu list, set NULL for ignored DMAR units */ static struct intel_iommu **g_iommus; @@ -204,12 +222,17 @@ static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot) static inline u64 dma_pte_addr(struct dma_pte *pte) { - return (pte->val & VTD_PAGE_MASK); +#ifdef CONFIG_64BIT + return pte->val & VTD_PAGE_MASK; +#else + /* Must have a full atomic 64-bit read */ + return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK; +#endif } -static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr) +static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn) { - pte->val |= (addr & VTD_PAGE_MASK); + pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT; } static inline bool dma_pte_present(struct dma_pte *pte) @@ -217,6 +240,11 @@ static inline bool dma_pte_present(struct dma_pte *pte) return (pte->val & 3) != 0; } +static inline int first_pte_in_page(struct dma_pte *pte) +{ + return !((unsigned long)pte & ~VTD_PAGE_MASK); +} + /* * This domain is a statically identity mapping domain. * 1. This domain creats a static 1:1 mapping to all usable memory. @@ -244,7 +272,6 @@ struct dmar_domain { struct iova_domain iovad; /* iova's that belong to this domain */ struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ int gaw; /* max guest address width */ /* adjusted guest address width, 0 is level 2 30-bit */ @@ -648,80 +675,78 @@ static inline int width_to_agaw(int width) static inline unsigned int level_to_offset_bits(int level) { - return (12 + (level - 1) * LEVEL_STRIDE); + return (level - 1) * LEVEL_STRIDE; } -static inline int address_level_offset(u64 addr, int level) +static inline int pfn_level_offset(unsigned long pfn, int level) { - return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK); + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; } -static inline u64 level_mask(int level) +static inline unsigned long level_mask(int level) { - return ((u64)-1 << level_to_offset_bits(level)); + return -1UL << level_to_offset_bits(level); } -static inline u64 level_size(int level) +static inline unsigned long level_size(int level) { - return ((u64)1 << level_to_offset_bits(level)); + return 1UL << level_to_offset_bits(level); } -static inline u64 align_to_level(u64 addr, int level) +static inline unsigned long align_to_level(unsigned long pfn, int level) { - return ((addr + level_size(level) - 1) & level_mask(level)); + return (pfn + level_size(level) - 1) & level_mask(level); } -static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) +static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, + unsigned long pfn) { - int addr_width = agaw_to_width(domain->agaw); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); int offset; - unsigned long flags; BUG_ON(!domain->pgd); - - addr &= (((u64)1) << addr_width) - 1; + BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; - spin_lock_irqsave(&domain->mapping_lock, flags); while (level > 0) { void *tmp_page; - offset = address_level_offset(addr, level); + offset = pfn_level_offset(pfn, level); pte = &parent[offset]; if (level == 1) break; if (!dma_pte_present(pte)) { + uint64_t pteval; + tmp_page = alloc_pgtable_page(); - if (!tmp_page) { - spin_unlock_irqrestore(&domain->mapping_lock, - flags); + if (!tmp_page) return NULL; + + domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); + pteval = (virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; + if (cmpxchg64(&pte->val, 0ULL, pteval)) { + /* Someone else set it while we were thinking; use theirs. */ + free_pgtable_page(tmp_page); + } else { + dma_pte_addr(pte); + domain_flush_cache(domain, pte, sizeof(*pte)); } - domain_flush_cache(domain, tmp_page, PAGE_SIZE); - dma_set_pte_addr(pte, virt_to_phys(tmp_page)); - /* - * high level table always sets r/w, last level page - * table control read/write - */ - dma_set_pte_readable(pte); - dma_set_pte_writable(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); } parent = phys_to_virt(dma_pte_addr(pte)); level--; } - spin_unlock_irqrestore(&domain->mapping_lock, flags); return pte; } /* return address's pte at specific level */ -static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, - int level) +static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, + unsigned long pfn, + int level) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -729,7 +754,7 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, parent = domain->pgd; while (level <= total) { - offset = address_level_offset(addr, total); + offset = pfn_level_offset(pfn, total); pte = &parent[offset]; if (level == total) return pte; @@ -742,74 +767,82 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, return NULL; } -/* clear one page's page table */ -static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) -{ - struct dma_pte *pte = NULL; - - /* get last level pte */ - pte = dma_addr_level_pte(domain, addr, 1); - - if (pte) { - dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); - } -} - /* clear last level pte, a tlb flush should be followed */ -static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end) +static void dma_pte_clear_range(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw); - int npages; + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + struct dma_pte *first_pte, *pte; + + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; - /* in case it's partial page */ - start &= PAGE_MASK; - end = PAGE_ALIGN(end); - npages = (end - start) / VTD_PAGE_SIZE; + /* we don't need lock here; nobody else touches the iova range */ + while (start_pfn <= last_pfn) { + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + if (!pte) { + start_pfn = align_to_level(start_pfn + 1, 2); + continue; + } + do { + dma_clear_pte(pte); + start_pfn++; + pte++; + } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); - /* we don't need lock here, nobody else touches the iova range */ - while (npages--) { - dma_pte_clear_one(domain, start); - start += VTD_PAGE_SIZE; + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); } } /* free page table pages. last level pte should already be cleared */ static void dma_pte_free_pagetable(struct dmar_domain *domain, - u64 start, u64 end) + unsigned long start_pfn, + unsigned long last_pfn) { - int addr_width = agaw_to_width(domain->agaw); - struct dma_pte *pte; + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + struct dma_pte *first_pte, *pte; int total = agaw_to_level(domain->agaw); int level; - u64 tmp; + unsigned long tmp; - start &= (((u64)1) << addr_width) - 1; - end &= (((u64)1) << addr_width) - 1; + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); - /* we don't need lock here, nobody else touches the iova range */ + /* We don't need lock here; nobody else touches the iova range */ level = 2; while (level <= total) { - tmp = align_to_level(start, level); - if (tmp >= end || (tmp + level_size(level) > end)) + tmp = align_to_level(start_pfn, level); + + /* If we can't even clear one PTE at this level, we're done */ + if (tmp + level_size(level) - 1 > last_pfn) return; - while (tmp < end) { - pte = dma_addr_level_pte(domain, tmp, level); - if (pte) { - free_pgtable_page( - phys_to_virt(dma_pte_addr(pte))); - dma_clear_pte(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); + while (tmp + level_size(level) - 1 <= last_pfn) { + first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + if (!pte) { + tmp = align_to_level(tmp + 1, level + 1); + continue; } - tmp += level_size(level); + do { + if (dma_pte_present(pte)) { + free_pgtable_page(phys_to_virt(dma_pte_addr(pte))); + dma_clear_pte(pte); + } + pte++; + tmp += level_size(level); + } while (!first_pte_in_page(pte) && + tmp + level_size(level) - 1 <= last_pfn); + + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + } level++; } /* free pgd */ - if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) { + if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { free_pgtable_page(domain->pgd); domain->pgd = NULL; } @@ -1035,11 +1068,11 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - u64 addr, unsigned int pages) + unsigned long pfn, unsigned int pages) { unsigned int mask = ilog2(__roundup_pow_of_two(pages)); + uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; - BUG_ON(addr & (~VTD_PAGE_MASK)); BUG_ON(pages == 0); /* @@ -1054,7 +1087,12 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, else iommu->flush.flush_iotlb(iommu, did, addr, mask, DMA_TLB_PSI_FLUSH); - if (did) + + /* + * In caching mode, domain ID 0 is reserved for non-present to present + * mapping flush. Device IOTLB doesn't need to be flushed in this case. + */ + if (!cap_caching_mode(iommu->cap) || did) iommu_flush_dev_iotlb(iommu->domains[did], addr, mask); } @@ -1279,7 +1317,6 @@ static void dmar_init_reserved_ranges(void) struct pci_dev *pdev = NULL; struct iova *iova; int i; - u64 addr, size; init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN); @@ -1302,12 +1339,9 @@ static void dmar_init_reserved_ranges(void) r = &pdev->resource[i]; if (!r->flags || !(r->flags & IORESOURCE_MEM)) continue; - addr = r->start; - addr &= PHYSICAL_PAGE_MASK; - size = r->end - addr; - size = PAGE_ALIGN(size); - iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr), - IOVA_PFN(size + addr) - 1); + iova = reserve_iova(&reserved_iova_list, + IOVA_PFN(r->start), + IOVA_PFN(r->end)); if (!iova) printk(KERN_ERR "Reserve iova failed\n"); } @@ -1341,7 +1375,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width) unsigned long sagaw; init_iova_domain(&domain->iovad, DMA_32BIT_PFN); - spin_lock_init(&domain->mapping_lock); spin_lock_init(&domain->iommu_lock); domain_reserve_special_ranges(domain); @@ -1388,7 +1421,6 @@ static void domain_exit(struct dmar_domain *domain) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - u64 end; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1397,14 +1429,12 @@ static void domain_exit(struct dmar_domain *domain) domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~PAGE_MASK); /* clear ptes */ - dma_pte_clear_range(domain, 0, end); + dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); + dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); for_each_active_iommu(iommu, drhd) if (test_bit(iommu->seq_id, &domain->iommu_bmp)) @@ -1618,42 +1648,86 @@ static int domain_context_mapped(struct pci_dev *pdev) tmp->devfn); } -static int -domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long phys_pfn, + unsigned long nr_pages, int prot) { - u64 start_pfn, end_pfn; - struct dma_pte *pte; - int index; - int addr_width = agaw_to_width(domain->agaw); + struct dma_pte *first_pte = NULL, *pte = NULL; + phys_addr_t uninitialized_var(pteval); + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned long sg_res; - hpa &= (((u64)1) << addr_width) - 1; + BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0) return -EINVAL; - iova &= PAGE_MASK; - start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT; - end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT; - index = 0; - while (start_pfn < end_pfn) { - pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index); - if (!pte) - return -ENOMEM; + + prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP; + + if (sg) + sg_res = 0; + else { + sg_res = nr_pages + 1; + pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; + } + + while (nr_pages--) { + uint64_t tmp; + + if (!sg_res) { + sg_res = (sg->offset + sg->length + VTD_PAGE_SIZE - 1) >> VTD_PAGE_SHIFT; + sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; + sg->dma_length = sg->length; + pteval = page_to_phys(sg_page(sg)) | prot; + } + if (!pte) { + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + if (!pte) + return -ENOMEM; + } /* We don't need lock here, nobody else * touches the iova range */ - BUG_ON(dma_pte_addr(pte)); - dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT); - dma_set_pte_prot(pte, prot); - if (prot & DMA_PTE_SNP) - dma_set_pte_snp(pte); - domain_flush_cache(domain, pte, sizeof(*pte)); - start_pfn++; - index++; + tmp = cmpxchg64_local(&pte->val, 0ULL, pteval); + if (tmp) { + static int dumps = 5; + printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n", + iov_pfn, tmp, (unsigned long long)pteval); + if (dumps) { + dumps--; + debug_dma_dump_mappings(NULL); + } + WARN_ON(1); + } + pte++; + if (!nr_pages || first_pte_in_page(pte)) { + domain_flush_cache(domain, first_pte, + (void *)pte - (void *)first_pte); + pte = NULL; + } + iov_pfn++; + pteval += VTD_PAGE_SIZE; + sg_res--; + if (!sg_res) + sg = sg_next(sg); } return 0; } +static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long nr_pages, + int prot) +{ + return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); +} + +static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + unsigned long phys_pfn, unsigned long nr_pages, + int prot) +{ + return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); +} + static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn) { if (!iommu) @@ -1844,58 +1918,61 @@ error: static int iommu_identity_mapping; +static int iommu_domain_identity_map(struct dmar_domain *domain, + unsigned long long start, + unsigned long long end) +{ + unsigned long first_vpfn = start >> VTD_PAGE_SHIFT; + unsigned long last_vpfn = end >> VTD_PAGE_SHIFT; + + if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn), + dma_to_mm_pfn(last_vpfn))) { + printk(KERN_ERR "IOMMU: reserve iova failed\n"); + return -ENOMEM; + } + + pr_debug("Mapping reserved region %llx-%llx for domain %d\n", + start, end, domain->id); + /* + * RMRR range might have overlap with physical memory range, + * clear it first + */ + dma_pte_clear_range(domain, first_vpfn, last_vpfn); + + return domain_pfn_mapping(domain, first_vpfn, first_vpfn, + last_vpfn - first_vpfn + 1, + DMA_PTE_READ|DMA_PTE_WRITE); +} + static int iommu_prepare_identity_map(struct pci_dev *pdev, unsigned long long start, unsigned long long end) { struct dmar_domain *domain; - unsigned long size; - unsigned long long base; int ret; printk(KERN_INFO - "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); - if (iommu_identity_mapping) - domain = si_domain; - else - /* page table init */ - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", + pci_name(pdev), start, end); + + domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; - /* The address might not be aligned */ - base = start & PAGE_MASK; - size = end - base; - size = PAGE_ALIGN(size); - if (!reserve_iova(&domain->iovad, IOVA_PFN(base), - IOVA_PFN(base + size) - 1)) { - printk(KERN_ERR "IOMMU: reserve iova failed\n"); - ret = -ENOMEM; - goto error; - } - - pr_debug("Mapping reserved region %lx@%llx for %s\n", - size, base, pci_name(pdev)); - /* - * RMRR range might have overlap with physical memory range, - * clear it first - */ - dma_pte_clear_range(domain, base, base + size); - - ret = domain_page_mapping(domain, base, base, size, - DMA_PTE_READ|DMA_PTE_WRITE); + ret = iommu_domain_identity_map(domain, start, end); if (ret) goto error; /* context entry init */ ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); - if (!ret) - return 0; -error: + if (ret) + goto error; + + return 0; + + error: domain_exit(domain); return ret; - } static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, @@ -1907,64 +1984,6 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, rmrr->end_address + 1); } -struct iommu_prepare_data { - struct pci_dev *pdev; - int ret; -}; - -static int __init iommu_prepare_work_fn(unsigned long start_pfn, - unsigned long end_pfn, void *datax) -{ - struct iommu_prepare_data *data; - - data = (struct iommu_prepare_data *)datax; - - data->ret = iommu_prepare_identity_map(data->pdev, - start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); - return data->ret; - -} - -static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev) -{ - int nid; - struct iommu_prepare_data data; - - data.pdev = pdev; - data.ret = 0; - - for_each_online_node(nid) { - work_with_active_regions(nid, iommu_prepare_work_fn, &data); - if (data.ret) - return data.ret; - } - return data.ret; -} - -#ifdef CONFIG_DMAR_GFX_WA -static void __init iommu_prepare_gfx_mapping(void) -{ - struct pci_dev *pdev = NULL; - int ret; - - for_each_pci_dev(pdev) { - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO || - !IS_GFX_DEVICE(pdev)) - continue; - printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n", - pci_name(pdev)); - ret = iommu_prepare_with_active_regions(pdev); - if (ret) - printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); - } -} -#else /* !CONFIG_DMAR_GFX_WA */ -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif - #ifdef CONFIG_DMAR_FLOPPY_WA static inline void iommu_prepare_isa(void) { @@ -1975,12 +1994,12 @@ static inline void iommu_prepare_isa(void) if (!pdev) return; - printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n"); + printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); if (ret) - printk(KERN_ERR "IOMMU: Failed to create 0-64M identity map, " - "floppy might not work\n"); + printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " + "floppy might not work\n"); } #else @@ -2008,16 +2027,30 @@ static int __init init_context_pass_through(void) } static int md_domain_init(struct dmar_domain *domain, int guest_width); + +static int __init si_domain_work_fn(unsigned long start_pfn, + unsigned long end_pfn, void *datax) +{ + int *ret = datax; + + *ret = iommu_domain_identity_map(si_domain, + (uint64_t)start_pfn << PAGE_SHIFT, + (uint64_t)end_pfn << PAGE_SHIFT); + return *ret; + +} + static int si_domain_init(void) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; - int ret = 0; + int nid, ret = 0; si_domain = alloc_domain(); if (!si_domain) return -EFAULT; + pr_debug("Identity mapping domain is domain %d\n", si_domain->id); for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); @@ -2034,6 +2067,12 @@ static int si_domain_init(void) si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + for_each_online_node(nid) { + work_with_active_regions(nid, si_domain_work_fn, &ret); + if (ret) + return ret; + } + return 0; } @@ -2087,13 +2126,14 @@ static int iommu_prepare_static_identity_mapping(void) if (ret) return -EFAULT; - printk(KERN_INFO "IOMMU: Setting identity map:\n"); for_each_pci_dev(pdev) { - ret = iommu_prepare_with_active_regions(pdev); - if (ret) { - printk(KERN_INFO "1:1 mapping to one domain failed.\n"); - return -EFAULT; - } + printk(KERN_INFO "IOMMU: identity mapping for device %s\n", + pci_name(pdev)); + + ret = domain_context_mapping(si_domain, pdev, + CONTEXT_TT_MULTI_LEVEL); + if (ret) + return ret; ret = domain_add_dev_info(si_domain, pdev); if (ret) return ret; @@ -2284,8 +2324,6 @@ int __init init_dmars(void) } } - iommu_prepare_gfx_mapping(); - iommu_prepare_isa(); } @@ -2330,50 +2368,40 @@ error: return ret; } -static inline u64 aligned_size(u64 host_addr, size_t size) -{ - u64 addr; - addr = (host_addr & (~PAGE_MASK)) + size; - return PAGE_ALIGN(addr); -} - -struct iova * -iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end) +static inline unsigned long aligned_nrpages(unsigned long host_addr, + size_t size) { - struct iova *piova; + host_addr &= ~PAGE_MASK; + host_addr += size + PAGE_SIZE - 1; - /* Make sure it's in range */ - end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end); - if (!size || (IOVA_START_ADDR + size > end)) - return NULL; - - piova = alloc_iova(&domain->iovad, - size >> PAGE_SHIFT, IOVA_PFN(end), 1); - return piova; + return host_addr >> VTD_PAGE_SHIFT; } -static struct iova * -__intel_alloc_iova(struct device *dev, struct dmar_domain *domain, - size_t size, u64 dma_mask) +static struct iova *intel_alloc_iova(struct device *dev, + struct dmar_domain *domain, + unsigned long nrpages, uint64_t dma_mask) { struct pci_dev *pdev = to_pci_dev(dev); struct iova *iova = NULL; - if (dma_mask <= DMA_BIT_MASK(32) || dmar_forcedac) - iova = iommu_alloc_iova(domain, size, dma_mask); - else { + /* Restrict dma_mask to the width that the iommu can handle */ + dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); + + if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) { /* * First try to allocate an io virtual address in * DMA_BIT_MASK(32) and if that fails then try allocating * from higher range */ - iova = iommu_alloc_iova(domain, size, DMA_BIT_MASK(32)); - if (!iova) - iova = iommu_alloc_iova(domain, size, dma_mask); - } - - if (!iova) { - printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev)); + iova = alloc_iova(&domain->iovad, nrpages, + IOVA_PFN(DMA_BIT_MASK(32)), 1); + if (iova) + return iova; + } + iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); + if (unlikely(!iova)) { + printk(KERN_ERR "Allocating %ld-page iova for %s failed", + nrpages, pci_name(pdev)); return NULL; } @@ -2476,14 +2504,12 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, return 0; iommu = domain_get_iommu(domain); - size = aligned_size((u64)paddr, size); + size = aligned_nrpages(paddr, size); - iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) goto error; - start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; - /* * Check if DMAR supports zero-length reads on write only * mappings.. @@ -2499,20 +2525,20 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, * might have two guest_addr mapping to the same host paddr, but this * is not a big problem */ - ret = domain_page_mapping(domain, start_paddr, - ((u64)paddr) & PHYSICAL_PAGE_MASK, - size, prot); + ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo), + paddr >> VTD_PAGE_SHIFT, size, prot); if (ret) goto error; /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_paddr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size); else iommu_flush_write_buffer(iommu); - return start_paddr + ((u64)paddr & (~PAGE_MASK)); + start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT; + start_paddr += paddr & ~PAGE_MASK; + return start_paddr; error: if (iova) @@ -2605,7 +2631,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, { struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_pfn, last_pfn; struct iova *iova; struct intel_iommu *iommu; @@ -2618,22 +2644,25 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, iommu = domain_get_iommu(domain); iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr)); - if (!iova) + if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n", + (unsigned long long)dev_addr)) return; - start_addr = iova->pfn_lo << PAGE_SHIFT; - size = aligned_size((u64)dev_addr, size); + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; - pr_debug("Device %s unmapping: %zx@%llx\n", - pci_name(pdev), size, (unsigned long long)start_addr); + pr_debug("Device %s unmapping: pfn %lx-%lx\n", + pci_name(pdev), start_pfn, last_pfn); /* clear the whole page */ - dma_pte_clear_range(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, start_addr + size); + dma_pte_free_pagetable(domain, start_pfn, last_pfn); + if (intel_iommu_strict) { - iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, + last_pfn - start_pfn + 1); /* free iova */ __free_iova(&domain->iovad, iova); } else { @@ -2691,14 +2720,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; - unsigned long start_addr; + unsigned long start_pfn, last_pfn; struct iova *iova; - size_t size = 0; - phys_addr_t addr; - struct scatterlist *sg; struct intel_iommu *iommu; if (iommu_no_mapping(pdev)) @@ -2710,22 +2735,21 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, iommu = domain_get_iommu(domain); iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address)); - if (!iova) + if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n", + (unsigned long long)sglist[0].dma_address)) return; - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size += aligned_size((u64)addr, sg->length); - } - start_addr = iova->pfn_lo << PAGE_SHIFT; + start_pfn = mm_to_dma_pfn(iova->pfn_lo); + last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; /* clear the whole page */ - dma_pte_clear_range(domain, start_addr, start_addr + size); + dma_pte_clear_range(domain, start_pfn, last_pfn); + /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, start_addr + size); + dma_pte_free_pagetable(domain, start_pfn, last_pfn); - iommu_flush_iotlb_psi(iommu, domain->id, start_addr, - size >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, + (last_pfn - start_pfn + 1)); /* free iova */ __free_iova(&domain->iovad, iova); @@ -2748,17 +2772,16 @@ static int intel_nontranslate_map_sg(struct device *hddev, static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - phys_addr_t addr; int i; struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; size_t size = 0; int prot = 0; - size_t offset = 0; + size_t offset_pfn = 0; struct iova *iova = NULL; int ret; struct scatterlist *sg; - unsigned long start_addr; + unsigned long start_vpfn; struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); @@ -2771,12 +2794,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne iommu = domain_get_iommu(domain); - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size += aligned_size((u64)addr, sg->length); - } + for_each_sg(sglist, sg, nelems, i) + size += aligned_nrpages(sg->offset, sg->length); - iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, size, pdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; @@ -2792,35 +2813,24 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL) prot |= DMA_PTE_WRITE; - start_addr = iova->pfn_lo << PAGE_SHIFT; - offset = 0; - for_each_sg(sglist, sg, nelems, i) { - addr = page_to_phys(sg_page(sg)) + sg->offset; - size = aligned_size((u64)addr, sg->length); - ret = domain_page_mapping(domain, start_addr + offset, - ((u64)addr) & PHYSICAL_PAGE_MASK, - size, prot); - if (ret) { - /* clear the page */ - dma_pte_clear_range(domain, start_addr, - start_addr + offset); - /* free page tables */ - dma_pte_free_pagetable(domain, start_addr, - start_addr + offset); - /* free iova */ - __free_iova(&domain->iovad, iova); - return 0; - } - sg->dma_address = start_addr + offset + - ((u64)addr & (~PAGE_MASK)); - sg->dma_length = sg->length; - offset += size; + start_vpfn = mm_to_dma_pfn(iova->pfn_lo); + + ret = domain_sg_mapping(domain, start_vpfn, sglist, mm_to_dma_pfn(size), prot); + if (unlikely(ret)) { + /* clear the page */ + dma_pte_clear_range(domain, start_vpfn, + start_vpfn + size - 1); + /* free page tables */ + dma_pte_free_pagetable(domain, start_vpfn, + start_vpfn + size - 1); + /* free iova */ + __free_iova(&domain->iovad, iova); + return 0; } /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, 0, start_addr, - offset >> VTD_PAGE_SHIFT); + iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn); else iommu_flush_write_buffer(iommu); @@ -3325,7 +3335,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) int adjust_width; init_iova_domain(&domain->iovad, DMA_32BIT_PFN); - spin_lock_init(&domain->mapping_lock); spin_lock_init(&domain->iommu_lock); domain_reserve_special_ranges(domain); @@ -3379,8 +3388,6 @@ static void iommu_free_vm_domain(struct dmar_domain *domain) static void vm_domain_exit(struct dmar_domain *domain) { - u64 end; - /* Domain 0 is reserved, so dont process it */ if (!domain) return; @@ -3388,14 +3395,12 @@ static void vm_domain_exit(struct dmar_domain *domain) vm_domain_remove_all_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); - end = DOMAIN_MAX_ADDR(domain->gaw); - end = end & (~VTD_PAGE_MASK); /* clear ptes */ - dma_pte_clear_range(domain, 0, end); + dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); + dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); iommu_free_vm_domain(domain); free_domain_mem(domain); @@ -3504,7 +3509,7 @@ static int intel_iommu_map_range(struct iommu_domain *domain, if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping) prot |= DMA_PTE_SNP; - max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); + max_addr = iova + size; if (dmar_domain->max_addr < max_addr) { int min_agaw; u64 end; @@ -3522,8 +3527,11 @@ static int intel_iommu_map_range(struct iommu_domain *domain, } dmar_domain->max_addr = max_addr; } - - ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); + /* Round up size to next multiple of PAGE_SIZE, if it and + the low bits of hpa would take us onto the next page */ + size = aligned_nrpages(hpa, size); + ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT, + hpa >> VTD_PAGE_SHIFT, size, prot); return ret; } @@ -3531,15 +3539,12 @@ static void intel_iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - dma_addr_t base; - /* The address might not be aligned */ - base = iova & VTD_PAGE_MASK; - size = VTD_PAGE_ALIGN(size); - dma_pte_clear_range(dmar_domain, base, base + size); + dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, + (iova + size - 1) >> VTD_PAGE_SHIFT); - if (dmar_domain->max_addr == base + size) - dmar_domain->max_addr = base; + if (dmar_domain->max_addr == iova + size) + dmar_domain->max_addr = iova; } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -3549,7 +3554,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = addr_to_dma_pte(dmar_domain, iova); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); if (pte) phys = dma_pte_addr(pte); diff --git a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c index 74369a3f963..c399f485aa7 100644 --- a/drivers/scsi/cxgb3i/cxgb3i_iscsi.c +++ b/drivers/scsi/cxgb3i/cxgb3i_iscsi.c @@ -13,6 +13,7 @@ #include <linux/inet.h> #include <linux/crypto.h> +#include <linux/if_vlan.h> #include <net/dst.h> #include <net/tcp.h> #include <scsi/scsi_cmnd.h> @@ -184,6 +185,9 @@ static struct cxgb3i_hba *cxgb3i_hba_find_by_netdev(struct net_device *ndev) struct cxgb3i_adapter *snic; int i; + if (ndev->priv_flags & IFF_802_1Q_VLAN) + ndev = vlan_dev_real_dev(ndev); + read_lock(&cxgb3i_snic_rwlock); list_for_each_entry(snic, &cxgb3i_snic_list, list_head) { for (i = 0; i < snic->hba_cnt; i++) { diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index a84072865fc..2c266c01dc5 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -473,16 +473,16 @@ static int __devinit fnic_probe(struct pci_dev *pdev, * limitation for the device. Try 40-bit first, and * fail to 32-bit. */ - err = pci_set_dma_mask(pdev, DMA_40BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40)); if (err) { - err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, "No usable DMA configuration " "aborting\n"); goto err_out_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, "Unable to obtain 32-bit DMA " @@ -490,7 +490,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev, goto err_out_release_regions; } } else { - err = pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK); + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)); if (err) { shost_printk(KERN_ERR, fnic->lport->host, "Unable to obtain 40-bit DMA " diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index eabf3650285..bfc996971b8 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -245,7 +245,7 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, struct vnic_wq_copy *wq, struct fnic_io_req *io_req, struct scsi_cmnd *sc, - u32 sg_count) + int sg_count) { struct scatterlist *sg; struct fc_rport *rport = starget_to_rport(scsi_target(sc->device)); @@ -260,9 +260,6 @@ static inline int fnic_queue_wq_copy_desc(struct fnic *fnic, char msg[2]; if (sg_count) { - BUG_ON(sg_count < 0); - BUG_ON(sg_count > FNIC_MAX_SG_DESC_CNT); - /* For each SGE, create a device desc entry */ desc = io_req->sgl_list; for_each_sg(scsi_sglist(sc), sg, sg_count, i) { @@ -344,7 +341,7 @@ int fnic_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) struct fnic *fnic; struct vnic_wq_copy *wq; int ret; - u32 sg_count; + int sg_count; unsigned long flags; unsigned long ptr; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 869a11bdccb..9928704e235 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1095,9 +1095,14 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) MAX_INDIRECT_BUFS); hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; } + + if (hostdata->madapter_info.os_type == 3) { + enable_fast_fail(hostdata); + return; + } } - enable_fast_fail(hostdata); + send_srp_login(hostdata); } /** diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 2eee9e6e4fe..292c02f810d 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3670,13 +3670,14 @@ static void fc_bsg_goose_queue(struct fc_rport *rport) { int flagset; + unsigned long flags; if (!rport->rqst_q) return; get_device(&rport->dev); - spin_lock(rport->rqst_q->queue_lock); + spin_lock_irqsave(rport->rqst_q->queue_lock, flags); flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) && !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); if (flagset) @@ -3684,7 +3685,7 @@ fc_bsg_goose_queue(struct fc_rport *rport) __blk_run_queue(rport->rqst_q); if (flagset) queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); - spin_unlock(rport->rqst_q->queue_lock); + spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); put_device(&rport->dev); } diff --git a/drivers/scsi/zalon.c b/drivers/scsi/zalon.c index 97f3158fa7b..27e84e4b1fa 100644 --- a/drivers/scsi/zalon.c +++ b/drivers/scsi/zalon.c @@ -134,7 +134,7 @@ zalon_probe(struct parisc_device *dev) host = ncr_attach(&zalon7xx_template, unit, &device); if (!host) - goto fail; + return -ENODEV; if (request_irq(dev->irq, ncr53c8xx_intr, IRQF_SHARED, "zalon", host)) { dev_printk(KERN_ERR, &dev->dev, "irq problem with %d, detaching\n ", diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 66f52674ca0..12bd64684f1 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -707,12 +707,13 @@ static irqreturn_t sci_br_interrupt(int irq, void *ptr) static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr) { - unsigned short ssr_status, scr_status; + unsigned short ssr_status, scr_status, err_enabled; struct uart_port *port = ptr; irqreturn_t ret = IRQ_NONE; ssr_status = sci_in(port, SCxSR); scr_status = sci_in(port, SCSCR); + err_enabled = scr_status & (SCI_CTRL_FLAGS_REIE | SCI_CTRL_FLAGS_RIE); /* Tx Interrupt */ if ((ssr_status & 0x0020) && (scr_status & SCI_CTRL_FLAGS_TIE)) @@ -721,10 +722,10 @@ static irqreturn_t sci_mpxed_interrupt(int irq, void *ptr) if ((ssr_status & 0x0002) && (scr_status & SCI_CTRL_FLAGS_RIE)) ret = sci_rx_interrupt(irq, ptr); /* Error Interrupt */ - if ((ssr_status & 0x0080) && (scr_status & SCI_CTRL_FLAGS_REIE)) + if ((ssr_status & 0x0080) && err_enabled) ret = sci_er_interrupt(irq, ptr); /* Break Interrupt */ - if ((ssr_status & 0x0010) && (scr_status & SCI_CTRL_FLAGS_REIE)) + if ((ssr_status & 0x0010) && err_enabled) ret = sci_br_interrupt(irq, ptr); return ret; diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig index 1576a0520ad..0c03471f0d4 100644 --- a/drivers/usb/host/Kconfig +++ b/drivers/usb/host/Kconfig @@ -337,10 +337,10 @@ config USB_R8A66597_HCD config SUPERH_ON_CHIP_R8A66597 boolean "Enable SuperH on-chip R8A66597 USB" - depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723) + depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723 || CPU_SUBTYPE_SH7724) help This driver enables support for the on-chip R8A66597 in the - SH7366 and SH7723 processors. + SH7366, SH7723 and SH7724 processors. config USB_WHCI_HCD tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)" diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 7f88628a1a7..6e4f6c50a12 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -299,8 +299,8 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) "btrfs-%s-%d", workers->name, workers->num_workers + i); if (IS_ERR(worker->task)) { - kfree(worker); ret = PTR_ERR(worker->task); + kfree(worker); goto fail; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2779c2f5360..98a87383871 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2074,8 +2074,7 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root); +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index edc7d208c5c..a5aca3997d4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -990,15 +990,13 @@ static inline int extent_ref_type(u64 parent, u64 owner) return type; } -static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) +static int find_next_key(struct btrfs_path *path, int level, + struct btrfs_key *key) { - int level; - BUG_ON(!path->keep_locks); - for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + for (; level < BTRFS_MAX_LEVEL; level++) { if (!path->nodes[level]) break; - btrfs_assert_tree_locked(path->nodes[level]); if (path->slots[level] + 1 >= btrfs_header_nritems(path->nodes[level])) continue; @@ -1158,7 +1156,8 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, * For simplicity, we just do not add new inline back * ref if there is any kind of item for this block */ - if (find_next_key(path, &key) == 0 && key.objectid == bytenr && + if (find_next_key(path, 0, &key) == 0 && + key.objectid == bytenr && key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { err = -EAGAIN; goto out; @@ -2697,7 +2696,7 @@ again: printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" ", %llu bytes_used, %llu bytes_reserved, " - "%llu bytes_pinned, %llu bytes_readonly, %llu may use" + "%llu bytes_pinned, %llu bytes_readonly, %llu may use " "%llu total\n", (unsigned long long)bytes, (unsigned long long)data_sinfo->bytes_delalloc, (unsigned long long)data_sinfo->bytes_used, @@ -4128,6 +4127,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, return buf; } +#if 0 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *leaf) { @@ -4171,8 +4171,6 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, return 0; } -#if 0 - static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_leaf_ref *ref) @@ -4553,262 +4551,471 @@ out: } #endif +struct walk_control { + u64 refs[BTRFS_MAX_LEVEL]; + u64 flags[BTRFS_MAX_LEVEL]; + struct btrfs_key update_progress; + int stage; + int level; + int shared_level; + int update_ref; + int keep_locks; +}; + +#define DROP_REFERENCE 1 +#define UPDATE_BACKREF 2 + /* - * helper function for drop_subtree, this function is similar to - * walk_down_tree. The main difference is that it checks reference - * counts while tree blocks are locked. + * hepler to process tree block while walking down the tree. + * + * when wc->stage == DROP_REFERENCE, this function checks + * reference count of the block. if the block is shared and + * we need update back refs for the subtree rooted at the + * block, this function changes wc->stage to UPDATE_BACKREF + * + * when wc->stage == UPDATE_BACKREF, this function updates + * back refs for pointers in the block. + * + * NOTE: return value 1 means we should stop walking down. */ -static noinline int walk_down_tree(struct btrfs_trans_handle *trans, +static noinline int walk_down_proc(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct btrfs_path *path, int *level) + struct btrfs_path *path, + struct walk_control *wc) { - struct extent_buffer *next; - struct extent_buffer *cur; - struct extent_buffer *parent; - u64 bytenr; - u64 ptr_gen; - u64 refs; - u64 flags; - u32 blocksize; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + struct btrfs_key key; + u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; int ret; - cur = path->nodes[*level]; - ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, - &refs, &flags); - BUG_ON(ret); - if (refs > 1) - goto out; + if (wc->stage == UPDATE_BACKREF && + btrfs_header_owner(eb) != root->root_key.objectid) + return 1; - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* + * when reference count of tree block is 1, it won't increase + * again. once full backref flag is set, we never clear it. + */ + if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || + (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { + BUG_ON(!path->locks[level]); + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + } - while (*level >= 0) { - cur = path->nodes[*level]; - if (*level == 0) { - ret = btrfs_drop_leaf_ref(trans, root, cur); - BUG_ON(ret); - clean_tree_block(trans, root, cur); - break; - } - if (path->slots[*level] >= btrfs_header_nritems(cur)) { - clean_tree_block(trans, root, cur); - break; + if (wc->stage == DROP_REFERENCE && + wc->update_ref && wc->refs[level] > 1) { + BUG_ON(eb == root->node); + BUG_ON(path->slots[level] > 0); + if (level == 0) + btrfs_item_key_to_cpu(eb, &key, path->slots[level]); + else + btrfs_node_key_to_cpu(eb, &key, path->slots[level]); + if (btrfs_header_owner(eb) == root->root_key.objectid && + btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { + wc->stage = UPDATE_BACKREF; + wc->shared_level = level; } + } - bytenr = btrfs_node_blockptr(cur, path->slots[*level]); - blocksize = btrfs_level_size(root, *level - 1); - ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); + if (wc->stage == DROP_REFERENCE) { + if (wc->refs[level] > 1) + return 1; - next = read_tree_block(root, bytenr, blocksize, ptr_gen); - btrfs_tree_lock(next); - btrfs_set_lock_blocking(next); + if (path->locks[level] && !wc->keep_locks) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; + } - ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, - &refs, &flags); + /* wc->stage == UPDATE_BACKREF */ + if (!(wc->flags[level] & flag)) { + BUG_ON(!path->locks[level]); + ret = btrfs_inc_ref(trans, root, eb, 1); BUG_ON(ret); - if (refs > 1) { - parent = path->nodes[*level]; - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - *level - 1, 0); + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + ret = btrfs_set_disk_extent_flags(trans, root, eb->start, + eb->len, flag, 0); + BUG_ON(ret); + wc->flags[level] |= flag; + } + + /* + * the block is shared by multiple trees, so it's not good to + * keep the tree lock + */ + if (path->locks[level] && level > 0) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + } + return 0; +} + +/* + * hepler to process tree block while walking up the tree. + * + * when wc->stage == DROP_REFERENCE, this function drops + * reference count on the block. + * + * when wc->stage == UPDATE_BACKREF, this function changes + * wc->stage back to DROP_REFERENCE if we changed wc->stage + * to UPDATE_BACKREF previously while processing the block. + * + * NOTE: return value 1 means we should stop walking up. + */ +static noinline int walk_up_proc(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + int ret = 0; + int level = wc->level; + struct extent_buffer *eb = path->nodes[level]; + u64 parent = 0; + + if (wc->stage == UPDATE_BACKREF) { + BUG_ON(wc->shared_level < level); + if (level < wc->shared_level) + goto out; + + BUG_ON(wc->refs[level] <= 1); + ret = find_next_key(path, level + 1, &wc->update_progress); + if (ret > 0) + wc->update_ref = 0; + + wc->stage = DROP_REFERENCE; + wc->shared_level = -1; + path->slots[level] = 0; + + /* + * check reference count again if the block isn't locked. + * we should start walking down the tree again if reference + * count is one. + */ + if (!path->locks[level]) { + BUG_ON(level == 0); + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + + ret = btrfs_lookup_extent_info(trans, root, + eb->start, eb->len, + &wc->refs[level], + &wc->flags[level]); BUG_ON(ret); - path->slots[*level]++; - btrfs_tree_unlock(next); - free_extent_buffer(next); - continue; + BUG_ON(wc->refs[level] == 0); + if (wc->refs[level] == 1) { + btrfs_tree_unlock(eb); + path->locks[level] = 0; + return 1; + } + } else { + BUG_ON(level != 0); } + } - BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + /* wc->stage == DROP_REFERENCE */ + BUG_ON(wc->refs[level] > 1 && !path->locks[level]); - *level = btrfs_header_level(next); - path->nodes[*level] = next; - path->slots[*level] = 0; - path->locks[*level] = 1; - cond_resched(); + if (wc->refs[level] == 1) { + if (level == 0) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + ret = btrfs_dec_ref(trans, root, eb, 1); + else + ret = btrfs_dec_ref(trans, root, eb, 0); + BUG_ON(ret); + } + /* make block locked assertion in clean_tree_block happy */ + if (!path->locks[level] && + btrfs_header_generation(eb) == trans->transid) { + btrfs_tree_lock(eb); + btrfs_set_lock_blocking(eb); + path->locks[level] = 1; + } + clean_tree_block(trans, root, eb); + } + + if (eb == root->node) { + if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = eb->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(eb)); + } else { + if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) + parent = path->nodes[level + 1]->start; + else + BUG_ON(root->root_key.objectid != + btrfs_header_owner(path->nodes[level + 1])); } -out: - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; - bytenr = path->nodes[*level]->start; - blocksize = path->nodes[*level]->len; - ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, - btrfs_header_owner(parent), *level, 0); + ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, + root->root_key.objectid, level, 0); BUG_ON(ret); +out: + wc->refs[level] = 0; + wc->flags[level] = 0; + return ret; +} + +static noinline int walk_down_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct walk_control *wc) +{ + struct extent_buffer *next; + struct extent_buffer *cur; + u64 bytenr; + u64 ptr_gen; + u32 blocksize; + int level = wc->level; + int ret; + + while (level >= 0) { + cur = path->nodes[level]; + BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[*level]); - path->locks[*level] = 0; + ret = walk_down_proc(trans, root, path, wc); + if (ret > 0) + break; + + if (level == 0) + break; + + bytenr = btrfs_node_blockptr(cur, path->slots[level]); + blocksize = btrfs_level_size(root, level - 1); + ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); + + next = read_tree_block(root, bytenr, blocksize, ptr_gen); + btrfs_tree_lock(next); + btrfs_set_lock_blocking(next); + + level--; + BUG_ON(level != btrfs_header_level(next)); + path->nodes[level] = next; + path->slots[level] = 0; + path->locks[level] = 1; + wc->level = level; } - free_extent_buffer(path->nodes[*level]); - path->nodes[*level] = NULL; - *level += 1; - cond_resched(); return 0; } -/* - * helper for dropping snapshots. This walks back up the tree in the path - * to find the first node higher up where we haven't yet gone through - * all the slots - */ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, - int *level, int max_level) + struct walk_control *wc, int max_level) { - struct btrfs_root_item *root_item = &root->root_item; - int i; - int slot; + int level = wc->level; int ret; - for (i = *level; i < max_level && path->nodes[i]; i++) { - slot = path->slots[i]; - if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { - /* - * there is more work to do in this level. - * Update the drop_progress marker to reflect - * the work we've done so far, and then bump - * the slot number - */ - path->slots[i]++; - WARN_ON(*level == 0); - if (max_level == BTRFS_MAX_LEVEL) { - btrfs_node_key(path->nodes[i], - &root_item->drop_progress, - path->slots[i]); - root_item->drop_level = i; - } - *level = i; + path->slots[level] = btrfs_header_nritems(path->nodes[level]); + while (level < max_level && path->nodes[level]) { + wc->level = level; + if (path->slots[level] + 1 < + btrfs_header_nritems(path->nodes[level])) { + path->slots[level]++; return 0; } else { - struct extent_buffer *parent; - - /* - * this whole node is done, free our reference - * on it and go up one level - */ - if (path->nodes[*level] == root->node) - parent = path->nodes[*level]; - else - parent = path->nodes[*level + 1]; + ret = walk_up_proc(trans, root, path, wc); + if (ret > 0) + return 0; - clean_tree_block(trans, root, path->nodes[i]); - ret = btrfs_free_extent(trans, root, - path->nodes[i]->start, - path->nodes[i]->len, - parent->start, - btrfs_header_owner(parent), - *level, 0); - BUG_ON(ret); - if (path->locks[*level]) { - btrfs_tree_unlock(path->nodes[i]); - path->locks[i] = 0; + if (path->locks[level]) { + btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; } - free_extent_buffer(path->nodes[i]); - path->nodes[i] = NULL; - *level = i + 1; + free_extent_buffer(path->nodes[level]); + path->nodes[level] = NULL; + level++; } } return 1; } /* - * drop the reference count on the tree rooted at 'snap'. This traverses - * the tree freeing any blocks that have a ref count of zero after being - * decremented. + * drop a subvolume tree. + * + * this function traverses the tree freeing any blocks that only + * referenced by the tree. + * + * when a shared tree block is found. this function decreases its + * reference count by one. if update_ref is true, this function + * also make sure backrefs for the shared block and all lower level + * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root - *root) +int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) { - int ret = 0; - int wret; - int level; struct btrfs_path *path; - int update_count; + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = root->fs_info->tree_root; struct btrfs_root_item *root_item = &root->root_item; + struct walk_control *wc; + struct btrfs_key key; + int err = 0; + int ret; + int level; path = btrfs_alloc_path(); BUG_ON(!path); - level = btrfs_header_level(root->node); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + + trans = btrfs_start_transaction(tree_root, 1); + if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { + level = btrfs_header_level(root->node); path->nodes[level] = btrfs_lock_root_node(root); btrfs_set_lock_blocking(path->nodes[level]); path->slots[level] = 0; path->locks[level] = 1; + memset(&wc->update_progress, 0, + sizeof(wc->update_progress)); } else { - struct btrfs_key key; - struct btrfs_disk_key found_key; - struct extent_buffer *node; - btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); + memcpy(&wc->update_progress, &key, + sizeof(wc->update_progress)); + level = root_item->drop_level; + BUG_ON(level == 0); path->lowest_level = level; - wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (wret < 0) { - ret = wret; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + path->lowest_level = 0; + if (ret < 0) { + err = ret; goto out; } - node = path->nodes[level]; - btrfs_node_key(node, &found_key, path->slots[level]); - WARN_ON(memcmp(&found_key, &root_item->drop_progress, - sizeof(found_key))); + btrfs_node_key_to_cpu(path->nodes[level], &key, + path->slots[level]); + WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); + /* * unlock our path, this is safe because only this * function is allowed to delete this snapshot */ btrfs_unlock_up_safe(path, 0); + + level = btrfs_header_level(root->node); + while (1) { + btrfs_tree_lock(path->nodes[level]); + btrfs_set_lock_blocking(path->nodes[level]); + + ret = btrfs_lookup_extent_info(trans, root, + path->nodes[level]->start, + path->nodes[level]->len, + &wc->refs[level], + &wc->flags[level]); + BUG_ON(ret); + BUG_ON(wc->refs[level] == 0); + + if (level == root_item->drop_level) + break; + + btrfs_tree_unlock(path->nodes[level]); + WARN_ON(wc->refs[level] != 1); + level--; + } } + + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = update_ref; + wc->keep_locks = 0; + while (1) { - unsigned long update; - wret = walk_down_tree(trans, root, path, &level); - if (wret > 0) + ret = walk_down_tree(trans, root, path, wc); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; + } - wret = walk_up_tree(trans, root, path, &level, - BTRFS_MAX_LEVEL); - if (wret > 0) + ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); + if (ret < 0) { + err = ret; break; - if (wret < 0) - ret = wret; - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) { - ret = -EAGAIN; + } + + if (ret > 0) { + BUG_ON(wc->stage != DROP_REFERENCE); break; } - for (update_count = 0; update_count < 16; update_count++) { + + if (wc->stage == DROP_REFERENCE) { + level = wc->level; + btrfs_node_key(path->nodes[level], + &root_item->drop_progress, + path->slots[level]); + root_item->drop_level = level; + } + + BUG_ON(wc->level == 0); + if (trans->transaction->in_commit || + trans->transaction->delayed_refs.flushing) { + ret = btrfs_update_root(trans, tree_root, + &root->root_key, + root_item); + BUG_ON(ret); + + btrfs_end_transaction(trans, tree_root); + trans = btrfs_start_transaction(tree_root, 1); + } else { + unsigned long update; update = trans->delayed_ref_updates; trans->delayed_ref_updates = 0; if (update) - btrfs_run_delayed_refs(trans, root, update); - else - break; + btrfs_run_delayed_refs(trans, tree_root, + update); } } + btrfs_release_path(root, path); + BUG_ON(err); + + ret = btrfs_del_root(trans, tree_root, &root->root_key); + BUG_ON(ret); + + free_extent_buffer(root->node); + free_extent_buffer(root->commit_root); + kfree(root); out: + btrfs_end_transaction(trans, tree_root); + kfree(wc); btrfs_free_path(path); - return ret; + return err; } +/* + * drop subtree rooted at tree block 'node'. + * + * NOTE: this function will unlock and release tree block 'node' + */ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent) { struct btrfs_path *path; + struct walk_control *wc; int level; int parent_level; int ret = 0; int wret; + BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); + path = btrfs_alloc_path(); BUG_ON(!path); + wc = kzalloc(sizeof(*wc), GFP_NOFS); + BUG_ON(!wc); + btrfs_assert_tree_locked(parent); parent_level = btrfs_header_level(parent); extent_buffer_get(parent); @@ -4817,24 +5024,33 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, btrfs_assert_tree_locked(node); level = btrfs_header_level(node); - extent_buffer_get(node); path->nodes[level] = node; path->slots[level] = 0; + path->locks[level] = 1; + + wc->refs[parent_level] = 1; + wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; + wc->level = level; + wc->shared_level = -1; + wc->stage = DROP_REFERENCE; + wc->update_ref = 0; + wc->keep_locks = 1; while (1) { - wret = walk_down_tree(trans, root, path, &level); - if (wret < 0) + wret = walk_down_tree(trans, root, path, wc); + if (wret < 0) { ret = wret; - if (wret != 0) break; + } - wret = walk_up_tree(trans, root, path, &level, parent_level); + wret = walk_up_tree(trans, root, path, wc, parent_level); if (wret < 0) ret = wret; if (wret != 0) break; } + kfree(wc); btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 126477eaecf..7c3cd248d8d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -151,7 +151,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, } if (end_pos > isize) { i_size_write(inode, end_pos); - btrfs_update_inode(trans, root, inode); + /* we've only changed i_size in ram, and we haven't updated + * the disk i_size. There is no need to log the inode + * at this time. + */ } err = btrfs_end_transaction(trans, root); out_unlock: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dbe1aabf96c..7ffa3d34ea1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3580,12 +3580,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, owner = 1; BTRFS_I(inode)->block_group = btrfs_find_block_group(root, 0, alloc_hint, owner); - if ((mode & S_IFREG)) { - if (btrfs_test_opt(root, NODATASUM)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; - if (btrfs_test_opt(root, NODATACOW)) - BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; - } key[0].objectid = objectid; btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY); @@ -3640,6 +3634,13 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); + if ((mode & S_IFREG)) { + if (btrfs_test_opt(root, NODATASUM)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; + if (btrfs_test_opt(root, NODATACOW)) + BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; + } + insert_inode_hash(inode); inode_tree_add(inode); return inode; @@ -5082,6 +5083,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; struct btrfs_trans_handle *trans; + struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5100,6 +5102,13 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } + root = BTRFS_I(inode)->root; + + ret = btrfs_check_data_free_space(root, inode, + alloc_end - alloc_start); + if (ret) + goto out; + locked_end = alloc_end - 1; while (1) { struct btrfs_ordered_extent *ordered; @@ -5107,7 +5116,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); if (!trans) { ret = -EIO; - goto out; + goto out_free; } /* the extent lock is ordered inside the running @@ -5168,6 +5177,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, GFP_NOFS); btrfs_end_transaction(trans, BTRFS_I(inode)->root); +out_free: + btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index eff18f5b536..9f4db848db1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1028,7 +1028,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, struct btrfs_file_extent_item); comp = btrfs_file_extent_compression(leaf, extent); type = btrfs_file_extent_type(leaf, extent); - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { disko = btrfs_file_extent_disk_bytenr(leaf, extent); diskl = btrfs_file_extent_disk_num_bytes(leaf, @@ -1051,7 +1052,8 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, new_key.objectid = inode->i_ino; new_key.offset = key.offset + destoff - off; - if (type == BTRFS_FILE_EXTENT_REG) { + if (type == BTRFS_FILE_EXTENT_REG || + type == BTRFS_FILE_EXTENT_PREALLOC) { ret = btrfs_insert_empty_item(trans, root, path, &new_key, size); if (ret) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b23dc209ae1..00839793477 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1788,7 +1788,7 @@ static void merge_func(struct btrfs_work *work) btrfs_end_transaction(trans, root); } - btrfs_drop_dead_root(reloc_root); + btrfs_drop_snapshot(reloc_root, 0); if (atomic_dec_and_test(async->num_pending)) complete(async->done); @@ -2075,9 +2075,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, ret = btrfs_drop_subtree(trans, root, eb, upper->eb); BUG_ON(ret); - - btrfs_tree_unlock(eb); - free_extent_buffer(eb); } if (!lowest) { btrfs_tree_unlock(upper->eb); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4e83457ea25..2dbf1c1f56e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -593,6 +593,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) return 0; } +#if 0 /* * when dropping snapshots, we generate a ton of delayed refs, and it makes * sense not to join the transaction while it is trying to flush the current @@ -681,6 +682,7 @@ int btrfs_drop_dead_root(struct btrfs_root *root) btrfs_btree_balance_dirty(tree_root, nr); return ret; } +#endif /* * new snapshots need to be created at a very specific time in the @@ -1081,7 +1083,7 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) while (!list_empty(&list)) { root = list_entry(list.next, struct btrfs_root, root_list); list_del_init(&root->root_list); - btrfs_drop_dead_root(root); + btrfs_drop_snapshot(root, 0); } return 0; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index ff231ad2389..ff27a296584 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -296,12 +296,15 @@ static int inotify_fasync(int fd, struct file *file, int on) static int inotify_release(struct inode *ignored, struct file *file) { struct fsnotify_group *group = file->private_data; + struct user_struct *user = group->inotify_data.user; fsnotify_clear_marks_by_group(group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ fsnotify_put_group(group); + atomic_dec(&user->inotify_devs); + return 0; } |