diff options
Diffstat (limited to 'arch')
158 files changed, 4730 insertions, 2255 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 99193b16023..beea3ccebb5 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -30,6 +30,18 @@ config OPROFILE_IBS If unsure, say N. +config OPROFILE_EVENT_MULTIPLEX + bool "OProfile multiplexing support (EXPERIMENTAL)" + default n + depends on OPROFILE && X86 + help + The number of hardware counters is limited. The multiplexing + feature enables OProfile to gather more events than counters + are provided by the hardware. This is realized by switching + between events at an user specified time interval. + + If unsure, say N. + config HAVE_OPROFILE bool diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h index 60c83abfde7..5076a8860b1 100644 --- a/arch/alpha/include/asm/thread_info.h +++ b/arch/alpha/include/asm/thread_info.h @@ -75,6 +75,7 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define TIF_UAC_SIGBUS 7 #define TIF_MEMDIE 8 #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal */ +#define TIF_NOTIFY_RESUME 10 /* callback before returning to user */ #define TIF_FREEZE 16 /* is freezing for suspend */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) @@ -82,10 +83,12 @@ register struct thread_info *__current_thread_info __asm__("$8"); #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1<<TIF_FREEZE) /* Work to do on interrupt/exception return. */ -#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED) +#define _TIF_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ + _TIF_NOTIFY_RESUME) /* Work to do on any return to userspace. */ #define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \ diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index df65eaa84c4..0932dbb1ef8 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -20,6 +20,7 @@ #include <linux/binfmts.h> #include <linux/bitops.h> #include <linux/syscalls.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/sigcontext.h> @@ -683,4 +684,11 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw, { if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs, sw, r0, r19); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 73394e50cbc..d3a39b1e6c0 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -130,11 +130,13 @@ extern void vfp_sync_state(struct thread_info *thread); * TIF_SYSCALL_TRACE - syscall trace active * TIF_SIGPENDING - signal pending * TIF_NEED_RESCHED - rescheduling necessary + * TIF_NOTIFY_RESUME - callback before returning to user * TIF_USEDFPU - FPU was used by this task this quantum (SMP) * TIF_POLLING_NRFLAG - true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 +#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_SYSCALL_TRACE 8 #define TIF_POLLING_NRFLAG 16 #define TIF_USING_IWMMXT 17 @@ -143,6 +145,7 @@ extern void vfp_sync_state(struct thread_info *thread); #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8c3de1a350b..7813ab782fd 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -51,7 +51,7 @@ fast_work_pending: work_pending: tst r1, #_TIF_NEED_RESCHED bne work_resched - tst r1, #_TIF_SIGPENDING + tst r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME beq no_work_pending mov r0, sp @ 'regs' mov r2, why @ 'syscall' diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f6bc5d44278..b76fe06d92e 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -12,6 +12,7 @@ #include <linux/personality.h> #include <linux/freezer.h> #include <linux/uaccess.h> +#include <linux/tracehook.h> #include <asm/elf.h> #include <asm/cacheflush.h> @@ -707,4 +708,11 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall) { if (thread_flags & _TIF_SIGPENDING) do_signal(¤t->blocked, regs, syscall); + + if (thread_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/arm/mach-omap2/mcbsp.c b/arch/arm/mach-omap2/mcbsp.c index 99b6e154631..0447d26d454 100644 --- a/arch/arm/mach-omap2/mcbsp.c +++ b/arch/arm/mach-omap2/mcbsp.c @@ -128,6 +128,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP1_IRQ_RX, .tx_irq = INT_24XX_MCBSP1_IRQ_TX, .ops = &omap2_mcbsp_ops, + .buffer_size = 0x6F, }, { .phys_base = OMAP34XX_MCBSP2_BASE, @@ -136,6 +137,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP2_IRQ_RX, .tx_irq = INT_24XX_MCBSP2_IRQ_TX, .ops = &omap2_mcbsp_ops, + .buffer_size = 0x3FF, }, { .phys_base = OMAP34XX_MCBSP3_BASE, @@ -144,6 +146,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP3_IRQ_RX, .tx_irq = INT_24XX_MCBSP3_IRQ_TX, .ops = &omap2_mcbsp_ops, + .buffer_size = 0x6F, }, { .phys_base = OMAP34XX_MCBSP4_BASE, @@ -152,6 +155,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP4_IRQ_RX, .tx_irq = INT_24XX_MCBSP4_IRQ_TX, .ops = &omap2_mcbsp_ops, + .buffer_size = 0x6F, }, { .phys_base = OMAP34XX_MCBSP5_BASE, @@ -160,6 +164,7 @@ static struct omap_mcbsp_platform_data omap34xx_mcbsp_pdata[] = { .rx_irq = INT_24XX_MCBSP5_IRQ_RX, .tx_irq = INT_24XX_MCBSP5_IRQ_TX, .ops = &omap2_mcbsp_ops, + .buffer_size = 0x6F, }, }; #define OMAP34XX_MCBSP_PDATA_SZ ARRAY_SIZE(omap34xx_mcbsp_pdata) diff --git a/arch/arm/mach-pxa/include/mach/audio.h b/arch/arm/mach-pxa/include/mach/audio.h index 16eb02552d5..a3449e35a6f 100644 --- a/arch/arm/mach-pxa/include/mach/audio.h +++ b/arch/arm/mach-pxa/include/mach/audio.h @@ -3,10 +3,12 @@ #include <sound/core.h> #include <sound/pcm.h> +#include <sound/ac97_codec.h> /* * @reset_gpio: AC97 reset gpio (normally gpio113 or gpio95) * a -1 value means no gpio will be used for reset + * @codec_pdata: AC97 codec platform_data * reset_gpio should only be specified for pxa27x CPUs where a silicon * bug prevents correct operation of the reset line. If not specified, @@ -20,6 +22,7 @@ typedef struct { void (*resume)(void *); void *priv; int reset_gpio; + void *codec_pdata[AC97_BUS_MAX_DEVICES]; } pxa2xx_audio_ops_t; extern void pxa_set_ac97_info(pxa2xx_audio_ops_t *ops); diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index e3ac94f0900..9b00f4cbc90 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -1127,6 +1127,11 @@ int omap_dma_running(void) void omap_dma_link_lch(int lch_head, int lch_queue) { if (omap_dma_in_1510_mode()) { + if (lch_head == lch_queue) { + dma_write(dma_read(CCR(lch_head)) | (3 << 8), + CCR(lch_head)); + return; + } printk(KERN_ERR "DMA linking is not supported in 1510 mode\n"); BUG(); return; @@ -1149,6 +1154,11 @@ EXPORT_SYMBOL(omap_dma_link_lch); void omap_dma_unlink_lch(int lch_head, int lch_queue) { if (omap_dma_in_1510_mode()) { + if (lch_head == lch_queue) { + dma_write(dma_read(CCR(lch_head)) & ~(3 << 8), + CCR(lch_head)); + return; + } printk(KERN_ERR "DMA linking is not supported in 1510 mode\n"); BUG(); return; diff --git a/arch/arm/plat-omap/include/mach/mcbsp.h b/arch/arm/plat-omap/include/mach/mcbsp.h index bb154ea7676..63a3f254af7 100644 --- a/arch/arm/plat-omap/include/mach/mcbsp.h +++ b/arch/arm/plat-omap/include/mach/mcbsp.h @@ -134,6 +134,11 @@ #define OMAP_MCBSP_REG_XCERG 0x74 #define OMAP_MCBSP_REG_XCERH 0x78 #define OMAP_MCBSP_REG_SYSCON 0x8C +#define OMAP_MCBSP_REG_THRSH2 0x90 +#define OMAP_MCBSP_REG_THRSH1 0x94 +#define OMAP_MCBSP_REG_IRQST 0xA0 +#define OMAP_MCBSP_REG_IRQEN 0xA4 +#define OMAP_MCBSP_REG_WAKEUPEN 0xA8 #define OMAP_MCBSP_REG_XCCR 0xAC #define OMAP_MCBSP_REG_RCCR 0xB0 @@ -249,8 +254,27 @@ #define RDISABLE 0x0001 /********************** McBSP SYSCONFIG bit definitions ********************/ +#define CLOCKACTIVITY(value) ((value)<<8) +#define SIDLEMODE(value) ((value)<<3) +#define ENAWAKEUP 0x0004 #define SOFTRST 0x0002 +/********************** McBSP DMA operating modes **************************/ +#define MCBSP_DMA_MODE_ELEMENT 0 +#define MCBSP_DMA_MODE_THRESHOLD 1 +#define MCBSP_DMA_MODE_FRAME 2 + +/********************** McBSP WAKEUPEN bit definitions *********************/ +#define XEMPTYEOFEN 0x4000 +#define XRDYEN 0x0400 +#define XEOFEN 0x0200 +#define XFSXEN 0x0100 +#define XSYNCERREN 0x0080 +#define RRDYEN 0x0008 +#define REOFEN 0x0004 +#define RFSREN 0x0002 +#define RSYNCERREN 0x0001 + /* we don't do multichannel for now */ struct omap_mcbsp_reg_cfg { u16 spcr2; @@ -344,6 +368,9 @@ struct omap_mcbsp_platform_data { u8 dma_rx_sync, dma_tx_sync; u16 rx_irq, tx_irq; struct omap_mcbsp_ops *ops; +#ifdef CONFIG_ARCH_OMAP34XX + u16 buffer_size; +#endif }; struct omap_mcbsp { @@ -377,6 +404,11 @@ struct omap_mcbsp { struct omap_mcbsp_platform_data *pdata; struct clk *iclk; struct clk *fclk; +#ifdef CONFIG_ARCH_OMAP34XX + int dma_op_mode; + u16 max_tx_thres; + u16 max_rx_thres; +#endif }; extern struct omap_mcbsp **mcbsp_ptr; extern int omap_mcbsp_count; @@ -385,10 +417,25 @@ int omap_mcbsp_init(void); void omap_mcbsp_register_board_cfg(struct omap_mcbsp_platform_data *config, int size); void omap_mcbsp_config(unsigned int id, const struct omap_mcbsp_reg_cfg * config); +#ifdef CONFIG_ARCH_OMAP34XX +void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold); +void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold); +u16 omap_mcbsp_get_max_tx_threshold(unsigned int id); +u16 omap_mcbsp_get_max_rx_threshold(unsigned int id); +int omap_mcbsp_get_dma_op_mode(unsigned int id); +#else +static inline void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold) +{ } +static inline void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold) +{ } +static inline u16 omap_mcbsp_get_max_tx_threshold(unsigned int id) { return 0; } +static inline u16 omap_mcbsp_get_max_rx_threshold(unsigned int id) { return 0; } +static inline int omap_mcbsp_get_dma_op_mode(unsigned int id) { return 0; } +#endif int omap_mcbsp_request(unsigned int id); void omap_mcbsp_free(unsigned int id); -void omap_mcbsp_start(unsigned int id); -void omap_mcbsp_stop(unsigned int id); +void omap_mcbsp_start(unsigned int id, int tx, int rx); +void omap_mcbsp_stop(unsigned int id, int tx, int rx); void omap_mcbsp_xmit_word(unsigned int id, u32 word); u32 omap_mcbsp_recv_word(unsigned int id); diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index efa0e0111f3..8dc7927906f 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -198,6 +198,170 @@ void omap_mcbsp_config(unsigned int id, const struct omap_mcbsp_reg_cfg *config) } EXPORT_SYMBOL(omap_mcbsp_config); +#ifdef CONFIG_ARCH_OMAP34XX +/* + * omap_mcbsp_set_tx_threshold configures how to deal + * with transmit threshold. the threshold value and handler can be + * configure in here. + */ +void omap_mcbsp_set_tx_threshold(unsigned int id, u16 threshold) +{ + struct omap_mcbsp *mcbsp; + void __iomem *io_base; + + if (!cpu_is_omap34xx()) + return; + + if (!omap_mcbsp_check_valid_id(id)) { + printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1); + return; + } + mcbsp = id_to_mcbsp_ptr(id); + io_base = mcbsp->io_base; + + OMAP_MCBSP_WRITE(io_base, THRSH2, threshold); +} +EXPORT_SYMBOL(omap_mcbsp_set_tx_threshold); + +/* + * omap_mcbsp_set_rx_threshold configures how to deal + * with receive threshold. the threshold value and handler can be + * configure in here. + */ +void omap_mcbsp_set_rx_threshold(unsigned int id, u16 threshold) +{ + struct omap_mcbsp *mcbsp; + void __iomem *io_base; + + if (!cpu_is_omap34xx()) + return; + + if (!omap_mcbsp_check_valid_id(id)) { + printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1); + return; + } + mcbsp = id_to_mcbsp_ptr(id); + io_base = mcbsp->io_base; + + OMAP_MCBSP_WRITE(io_base, THRSH1, threshold); +} +EXPORT_SYMBOL(omap_mcbsp_set_rx_threshold); + +/* + * omap_mcbsp_get_max_tx_thres just return the current configured + * maximum threshold for transmission + */ +u16 omap_mcbsp_get_max_tx_threshold(unsigned int id) +{ + struct omap_mcbsp *mcbsp; + + if (!omap_mcbsp_check_valid_id(id)) { + printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1); + return -ENODEV; + } + mcbsp = id_to_mcbsp_ptr(id); + + return mcbsp->max_tx_thres; +} +EXPORT_SYMBOL(omap_mcbsp_get_max_tx_threshold); + +/* + * omap_mcbsp_get_max_rx_thres just return the current configured + * maximum threshold for reception + */ +u16 omap_mcbsp_get_max_rx_threshold(unsigned int id) +{ + struct omap_mcbsp *mcbsp; + + if (!omap_mcbsp_check_valid_id(id)) { + printk(KERN_ERR "%s: Invalid id (%d)\n", __func__, id + 1); + return -ENODEV; + } + mcbsp = id_to_mcbsp_ptr(id); + + return mcbsp->max_rx_thres; +} +EXPORT_SYMBOL(omap_mcbsp_get_max_rx_threshold); + +/* + * omap_mcbsp_get_dma_op_mode just return the current configured + * operating mode for the mcbsp channel + */ +int omap_mcbsp_get_dma_op_mode(unsigned int id) +{ + struct omap_mcbsp *mcbsp; + int dma_op_mode; + + if (!omap_mcbsp_check_valid_id(id)) { + printk(KERN_ERR "%s: Invalid id (%u)\n", __func__, id + 1); + return -ENODEV; + } + mcbsp = id_to_mcbsp_ptr(id); + + spin_lock_irq(&mcbsp->lock); + dma_op_mode = mcbsp->dma_op_mode; + spin_unlock_irq(&mcbsp->lock); + + return dma_op_mode; +} +EXPORT_SYMBOL(omap_mcbsp_get_dma_op_mode); + +static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp) +{ + /* + * Enable wakup behavior, smart idle and all wakeups + * REVISIT: some wakeups may be unnecessary + */ + if (cpu_is_omap34xx()) { + u16 syscon; + + syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON); + syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03)); + + spin_lock_irq(&mcbsp->lock); + if (mcbsp->dma_op_mode == MCBSP_DMA_MODE_THRESHOLD) { + syscon |= (ENAWAKEUP | SIDLEMODE(0x02) | + CLOCKACTIVITY(0x02)); + OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, + XRDYEN | RRDYEN); + } else { + syscon |= SIDLEMODE(0x01); + } + spin_unlock_irq(&mcbsp->lock); + + OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon); + } +} + +static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp) +{ + /* + * Disable wakup behavior, smart idle and all wakeups + */ + if (cpu_is_omap34xx()) { + u16 syscon; + + syscon = OMAP_MCBSP_READ(mcbsp->io_base, SYSCON); + syscon &= ~(ENAWAKEUP | SIDLEMODE(0x03) | CLOCKACTIVITY(0x03)); + /* + * HW bug workaround - If no_idle mode is taken, we need to + * go to smart_idle before going to always_idle, or the + * device will not hit retention anymore. + */ + syscon |= SIDLEMODE(0x02); + OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon); + + syscon &= ~(SIDLEMODE(0x03)); + OMAP_MCBSP_WRITE(mcbsp->io_base, SYSCON, syscon); + + OMAP_MCBSP_WRITE(mcbsp->io_base, WAKEUPEN, 0); + } +} +#else +static inline void omap34xx_mcbsp_request(struct omap_mcbsp *mcbsp) {} +static inline void omap34xx_mcbsp_free(struct omap_mcbsp *mcbsp) {} +#endif + /* * We can choose between IRQ based or polled IO. * This needs to be called before omap_mcbsp_request(). @@ -257,6 +421,9 @@ int omap_mcbsp_request(unsigned int id) clk_enable(mcbsp->iclk); clk_enable(mcbsp->fclk); + /* Do procedure specific to omap34xx arch, if applicable */ + omap34xx_mcbsp_request(mcbsp); + /* * Make sure that transmitter, receiver and sample-rate generator are * not running before activating IRQs. @@ -305,6 +472,9 @@ void omap_mcbsp_free(unsigned int id) if (mcbsp->pdata && mcbsp->pdata->ops && mcbsp->pdata->ops->free) mcbsp->pdata->ops->free(id); + /* Do procedure specific to omap34xx arch, if applicable */ + omap34xx_mcbsp_free(mcbsp); + clk_disable(mcbsp->fclk); clk_disable(mcbsp->iclk); @@ -328,14 +498,15 @@ void omap_mcbsp_free(unsigned int id) EXPORT_SYMBOL(omap_mcbsp_free); /* - * Here we start the McBSP, by enabling the sample - * generator, both transmitter and receivers, - * and the frame sync. + * Here we start the McBSP, by enabling transmitter, receiver or both. + * If no transmitter or receiver is active prior calling, then sample-rate + * generator and frame sync are started. */ -void omap_mcbsp_start(unsigned int id) +void omap_mcbsp_start(unsigned int id, int tx, int rx) { struct omap_mcbsp *mcbsp; void __iomem *io_base; + int idle; u16 w; if (!omap_mcbsp_check_valid_id(id)) { @@ -348,32 +519,58 @@ void omap_mcbsp_start(unsigned int id) mcbsp->rx_word_length = (OMAP_MCBSP_READ(io_base, RCR1) >> 5) & 0x7; mcbsp->tx_word_length = (OMAP_MCBSP_READ(io_base, XCR1) >> 5) & 0x7; - /* Start the sample generator */ - w = OMAP_MCBSP_READ(io_base, SPCR2); - OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 6)); + idle = !((OMAP_MCBSP_READ(io_base, SPCR2) | + OMAP_MCBSP_READ(io_base, SPCR1)) & 1); + + if (idle) { + /* Start the sample generator */ + w = OMAP_MCBSP_READ(io_base, SPCR2); + OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 6)); + } /* Enable transmitter and receiver */ + tx &= 1; w = OMAP_MCBSP_READ(io_base, SPCR2); - OMAP_MCBSP_WRITE(io_base, SPCR2, w | 1); + OMAP_MCBSP_WRITE(io_base, SPCR2, w | tx); + rx &= 1; w = OMAP_MCBSP_READ(io_base, SPCR1); - OMAP_MCBSP_WRITE(io_base, SPCR1, w | 1); + OMAP_MCBSP_WRITE(io_base, SPCR1, w | rx); - udelay(100); + /* + * Worst case: CLKSRG*2 = 8000khz: (1/8000) * 2 * 2 usec + * REVISIT: 100us may give enough time for two CLKSRG, however + * due to some unknown PM related, clock gating etc. reason it + * is now at 500us. + */ + udelay(500); - /* Start frame sync */ - w = OMAP_MCBSP_READ(io_base, SPCR2); - OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 7)); + if (idle) { + /* Start frame sync */ + w = OMAP_MCBSP_READ(io_base, SPCR2); + OMAP_MCBSP_WRITE(io_base, SPCR2, w | (1 << 7)); + } + + if (cpu_is_omap2430() || cpu_is_omap34xx()) { + /* Release the transmitter and receiver */ + w = OMAP_MCBSP_READ(io_base, XCCR); + w &= ~(tx ? XDISABLE : 0); + OMAP_MCBSP_WRITE(io_base, XCCR, w); + w = OMAP_MCBSP_READ(io_base, RCCR); + w &= ~(rx ? RDISABLE : 0); + OMAP_MCBSP_WRITE(io_base, RCCR, w); + } /* Dump McBSP Regs */ omap_mcbsp_dump_reg(id); } EXPORT_SYMBOL(omap_mcbsp_start); -void omap_mcbsp_stop(unsigned int id) +void omap_mcbsp_stop(unsigned int id, int tx, int rx) { struct omap_mcbsp *mcbsp; void __iomem *io_base; + int idle; u16 w; if (!omap_mcbsp_check_valid_id(id)) { @@ -385,16 +582,33 @@ void omap_mcbsp_stop(unsigned int id) io_base = mcbsp->io_base; /* Reset transmitter */ + tx &= 1; + if (cpu_is_omap2430() || cpu_is_omap34xx()) { + w = OMAP_MCBSP_READ(io_base, XCCR); + w |= (tx ? XDISABLE : 0); + OMAP_MCBSP_WRITE(io_base, XCCR, w); + } w = OMAP_MCBSP_READ(io_base, SPCR2); - OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(1)); + OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~tx); /* Reset receiver */ + rx &= 1; + if (cpu_is_omap2430() || cpu_is_omap34xx()) { + w = OMAP_MCBSP_READ(io_base, RCCR); + w |= (tx ? RDISABLE : 0); + OMAP_MCBSP_WRITE(io_base, RCCR, w); + } w = OMAP_MCBSP_READ(io_base, SPCR1); - OMAP_MCBSP_WRITE(io_base, SPCR1, w & ~(1)); + OMAP_MCBSP_WRITE(io_base, SPCR1, w & ~rx); - /* Reset the sample rate generator */ - w = OMAP_MCBSP_READ(io_base, SPCR2); - OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(1 << 6)); + idle = !((OMAP_MCBSP_READ(io_base, SPCR2) | + OMAP_MCBSP_READ(io_base, SPCR1)) & 1); + + if (idle) { + /* Reset the sample rate generator */ + w = OMAP_MCBSP_READ(io_base, SPCR2); + OMAP_MCBSP_WRITE(io_base, SPCR2, w & ~(1 << 6)); + } } EXPORT_SYMBOL(omap_mcbsp_stop); @@ -883,6 +1097,149 @@ void omap_mcbsp_set_spi_mode(unsigned int id, } EXPORT_SYMBOL(omap_mcbsp_set_spi_mode); +#ifdef CONFIG_ARCH_OMAP34XX +#define max_thres(m) (mcbsp->pdata->buffer_size) +#define valid_threshold(m, val) ((val) <= max_thres(m)) +#define THRESHOLD_PROP_BUILDER(prop) \ +static ssize_t prop##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct omap_mcbsp *mcbsp = dev_get_drvdata(dev); \ + \ + return sprintf(buf, "%u\n", mcbsp->prop); \ +} \ + \ +static ssize_t prop##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t size) \ +{ \ + struct omap_mcbsp *mcbsp = dev_get_drvdata(dev); \ + unsigned long val; \ + int status; \ + \ + status = strict_strtoul(buf, 0, &val); \ + if (status) \ + return status; \ + \ + if (!valid_threshold(mcbsp, val)) \ + return -EDOM; \ + \ + mcbsp->prop = val; \ + return size; \ +} \ + \ +static DEVICE_ATTR(prop, 0644, prop##_show, prop##_store); + +THRESHOLD_PROP_BUILDER(max_tx_thres); +THRESHOLD_PROP_BUILDER(max_rx_thres); + +static const char *dma_op_modes[] = { + "element", "threshold", "frame", +}; + +static ssize_t dma_op_mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct omap_mcbsp *mcbsp = dev_get_drvdata(dev); + int dma_op_mode, i = 0; + ssize_t len = 0; + const char * const *s; + + spin_lock_irq(&mcbsp->lock); + dma_op_mode = mcbsp->dma_op_mode; + spin_unlock_irq(&mcbsp->lock); + + for (s = &dma_op_modes[i]; i < ARRAY_SIZE(dma_op_modes); s++, i++) { + if (dma_op_mode == i) + len += sprintf(buf + len, "[%s] ", *s); + else + len += sprintf(buf + len, "%s ", *s); + } + len += sprintf(buf + len, "\n"); + + return len; +} + +static ssize_t dma_op_mode_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + struct omap_mcbsp *mcbsp = dev_get_drvdata(dev); + const char * const *s; + int i = 0; + + for (s = &dma_op_modes[i]; i < ARRAY_SIZE(dma_op_modes); s++, i++) + if (sysfs_streq(buf, *s)) + break; + + if (i == ARRAY_SIZE(dma_op_modes)) + return -EINVAL; + + spin_lock_irq(&mcbsp->lock); + if (!mcbsp->free) { + size = -EBUSY; + goto unlock; + } + mcbsp->dma_op_mode = i; + +unlock: + spin_unlock_irq(&mcbsp->lock); + + return size; +} + +static DEVICE_ATTR(dma_op_mode, 0644, dma_op_mode_show, dma_op_mode_store); + +static const struct attribute *additional_attrs[] = { + &dev_attr_max_tx_thres.attr, + &dev_attr_max_rx_thres.attr, + &dev_attr_dma_op_mode.attr, + NULL, +}; + +static const struct attribute_group additional_attr_group = { + .attrs = (struct attribute **)additional_attrs, +}; + +static inline int __devinit omap_additional_add(struct device *dev) +{ + return sysfs_create_group(&dev->kobj, &additional_attr_group); +} + +static inline void __devexit omap_additional_remove(struct device *dev) +{ + sysfs_remove_group(&dev->kobj, &additional_attr_group); +} + +static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp) +{ + mcbsp->dma_op_mode = MCBSP_DMA_MODE_ELEMENT; + if (cpu_is_omap34xx()) { + mcbsp->max_tx_thres = max_thres(mcbsp); + mcbsp->max_rx_thres = max_thres(mcbsp); + /* + * REVISIT: Set dmap_op_mode to THRESHOLD as default + * for mcbsp2 instances. + */ + if (omap_additional_add(mcbsp->dev)) + dev_warn(mcbsp->dev, + "Unable to create additional controls\n"); + } else { + mcbsp->max_tx_thres = -EINVAL; + mcbsp->max_rx_thres = -EINVAL; + } +} + +static inline void __devexit omap34xx_device_exit(struct omap_mcbsp *mcbsp) +{ + if (cpu_is_omap34xx()) + omap_additional_remove(mcbsp->dev); +} +#else +static inline void __devinit omap34xx_device_init(struct omap_mcbsp *mcbsp) {} +static inline void __devexit omap34xx_device_exit(struct omap_mcbsp *mcbsp) {} +#endif /* CONFIG_ARCH_OMAP34XX */ + /* * McBSP1 and McBSP3 are directly mapped on 1610 and 1510. * 730 has only 2 McBSP, and both of them are MPU peripherals. @@ -953,6 +1310,10 @@ static int __devinit omap_mcbsp_probe(struct platform_device *pdev) mcbsp->dev = &pdev->dev; mcbsp_ptr[id] = mcbsp; platform_set_drvdata(pdev, mcbsp); + + /* Initialize mcbsp properties for OMAP34XX if needed / applicable */ + omap34xx_device_init(mcbsp); + return 0; err_fclk: @@ -976,6 +1337,8 @@ static int __devexit omap_mcbsp_remove(struct platform_device *pdev) mcbsp->pdata->ops->free) mcbsp->pdata->ops->free(mcbsp->id); + omap34xx_device_exit(mcbsp); + clk_disable(mcbsp->fclk); clk_disable(mcbsp->iclk); clk_put(mcbsp->fclk); diff --git a/arch/arm/plat-s3c/include/plat/audio-simtec.h b/arch/arm/plat-s3c/include/plat/audio-simtec.h new file mode 100644 index 00000000000..0f440b9168d --- /dev/null +++ b/arch/arm/plat-s3c/include/plat/audio-simtec.h @@ -0,0 +1,37 @@ +/* arch/arm/plat-s3c/include/plat/audio-simtec.h + * + * Copyright 2008 Simtec Electronics + * http://armlinux.simtec.co.uk/ + * Ben Dooks <ben@simtec.co.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Simtec Audio support. +*/ + +/** + * struct s3c24xx_audio_simtec_pdata - platform data for simtec audio + * @use_mpllin: Select codec clock from MPLLin + * @output_cdclk: Need to output CDCLK to the codec + * @have_mic: Set if we have a MIC socket + * @have_lout: Set if we have a LineOut socket + * @amp_gpio: GPIO pin to enable the AMP + * @amp_gain: Option GPIO to control AMP gain + */ +struct s3c24xx_audio_simtec_pdata { + unsigned int use_mpllin:1; + unsigned int output_cdclk:1; + + unsigned int have_mic:1; + unsigned int have_lout:1; + + int amp_gpio; + int amp_gain[2]; + + void (*startup)(void); +}; + +extern int simtec_audio_add(const char *codec_name, + struct s3c24xx_audio_simtec_pdata *pdata); diff --git a/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h b/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h index 0fad7571030..07659dad174 100644 --- a/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h +++ b/arch/arm/plat-s3c/include/plat/regs-s3c2412-iis.h @@ -33,6 +33,11 @@ #define S3C2412_IISCON_RXDMA_ACTIVE (1 << 1) #define S3C2412_IISCON_IIS_ACTIVE (1 << 0) +#define S3C64XX_IISMOD_BLC_16BIT (0 << 13) +#define S3C64XX_IISMOD_BLC_8BIT (1 << 13) +#define S3C64XX_IISMOD_BLC_24BIT (2 << 13) +#define S3C64XX_IISMOD_BLC_MASK (3 << 13) + #define S3C64XX_IISMOD_IMS_PCLK (0 << 10) #define S3C64XX_IISMOD_IMS_SYSMUX (1 << 10) diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h index fc42de5ca20..fd0c5d7e933 100644 --- a/arch/avr32/include/asm/thread_info.h +++ b/arch/avr32/include/asm/thread_info.h @@ -84,6 +84,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_MEMDIE 6 #define TIF_RESTORE_SIGMASK 7 /* restore signal mask in do_signal */ #define TIF_CPU_GOING_TO_SLEEP 8 /* CPU is entering sleep 0 mode */ +#define TIF_NOTIFY_RESUME 9 /* callback before returning to user */ #define TIF_FREEZE 29 #define TIF_DEBUG 30 /* debugging enabled */ #define TIF_USERSPACE 31 /* true if FS sets userspace */ @@ -96,6 +97,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_MEMDIE (1 << TIF_MEMDIE) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1 << TIF_FREEZE) /* Note: The masks below must never span more than 16 bits! */ @@ -103,13 +105,15 @@ static inline struct thread_info *current_thread_info(void) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ ((1 << TIF_SIGPENDING) \ + | _TIF_NOTIFY_RESUME \ | (1 << TIF_NEED_RESCHED) \ | (1 << TIF_POLLING_NRFLAG) \ | (1 << TIF_BREAKPOINT) \ | (1 << TIF_RESTORE_SIGMASK)) /* work to do on any return to userspace */ -#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE)) +#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \ + _TIF_NOTIFY_RESUME) /* work to do on return from debug mode */ #define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT)) diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S index 009a80155d6..169268c40ae 100644 --- a/arch/avr32/kernel/entry-avr32b.S +++ b/arch/avr32/kernel/entry-avr32b.S @@ -281,7 +281,7 @@ syscall_exit_work: ld.w r1, r0[TI_flags] rjmp 1b -2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK +2: mov r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NOTIFY_RESUME tst r1, r2 breq 3f unmask_interrupts diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index 27227561bad..64f886fac2e 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -16,6 +16,7 @@ #include <linux/ptrace.h> #include <linux/unistd.h> #include <linux/freezer.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/ucontext.h> @@ -322,4 +323,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti) if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs, ¤t->blocked, syscall); + + if (ti->flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c index b326023baab..48b0f391263 100644 --- a/arch/cris/kernel/ptrace.c +++ b/arch/cris/kernel/ptrace.c @@ -16,6 +16,7 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -36,4 +37,11 @@ void do_notify_resume(int canrestart, struct pt_regs *regs, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(canrestart,regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 4a7a62c6e78..6b0a2b6fed6 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -572,6 +572,8 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(__frame); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } /* end do_notify_resume() */ diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index 8bbc8b0ee45..70e67e47d02 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -89,6 +89,7 @@ static inline struct thread_info *current_thread_info(void) TIF_NEED_RESCHED */ #define TIF_MEMDIE 4 #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ +#define TIF_NOTIFY_RESUME 6 /* callback before returning to user */ #define TIF_FREEZE 16 /* is freezing for suspend */ /* as above, but as bit values */ @@ -97,6 +98,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FREEZE (1<<TIF_FREEZE) #define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */ diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index cf3472f7389..af842c369d2 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -39,6 +39,7 @@ #include <linux/tty.h> #include <linux/binfmts.h> #include <linux/freezer.h> +#include <linux/tracehook.h> #include <asm/setup.h> #include <asm/uaccess.h> @@ -552,4 +553,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) { if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs, NULL); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 5a61b5c2e18..8d3c79cd81e 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -44,7 +44,6 @@ static inline void dma_free_coherent(struct device *dev, size_t size, #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #define get_dma_ops(dev) platform_dma_get_ops(dev) -#define flush_write_buffers() #include <asm-generic/dma-mapping-common.h> @@ -69,6 +68,24 @@ dma_set_mask (struct device *dev, u64 mask) return 0; } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + extern int dma_get_cache_alignment(void); static inline void diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 39a3cd0a417..f2c1600da09 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -10,7 +10,9 @@ EXPORT_SYMBOL(dma_ops); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); + + return 0; } fs_initcall(dma_init); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 5d7c0e5b9e7..89969e95004 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -192,6 +192,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) if (test_thread_flag(TIF_NOTIFY_RESUME)) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(&scr->pt); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } /* copy user rbs to kernel rbs */ diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S index 1f86aeb2c94..620d9dc5220 100644 --- a/arch/ia64/lib/ip_fast_csum.S +++ b/arch/ia64/lib/ip_fast_csum.S @@ -96,20 +96,22 @@ END(ip_fast_csum) GLOBAL_ENTRY(csum_ipv6_magic) ld4 r20=[in0],4 ld4 r21=[in1],4 - dep r15=in3,in2,32,16 + zxt4 in2=in2 ;; ld4 r22=[in0],4 ld4 r23=[in1],4 - mux1 r15=r15,@rev + dep r15=in3,in2,32,16 ;; ld4 r24=[in0],4 ld4 r25=[in1],4 - shr.u r15=r15,16 + mux1 r15=r15,@rev add r16=r20,r21 add r17=r22,r23 + zxt4 in4=in4 ;; ld4 r26=[in0],4 ld4 r27=[in1],4 + shr.u r15=r15,16 add r18=r24,r25 add r8=r16,r17 ;; diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index fb833269017..dbeadb9c8e2 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -133,8 +133,7 @@ consider_steal_time(unsigned long new_itm) account_idle_ticks(blocked); run_local_timers(); - if (rcu_pending(cpu)) - rcu_check_callbacks(cpu, user_mode(get_irq_regs())); + rcu_check_callbacks(cpu, user_mode(get_irq_regs())); scheduler_tick(); run_posix_cpu_timers(p); diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h index 07bb5bd00e2..71578151a40 100644 --- a/arch/m32r/include/asm/thread_info.h +++ b/arch/m32r/include/asm/thread_info.h @@ -149,6 +149,7 @@ static inline unsigned int get_thread_fault_code(void) #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ #define TIF_IRET 4 /* return with iret */ +#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -160,6 +161,7 @@ static inline unsigned int get_thread_fault_code(void) #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) #define _TIF_IRET (1<<TIF_IRET) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_USEDFPU (1<<TIF_USEDFPU) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 18124542a6e..144b0f124fc 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -21,6 +21,7 @@ #include <linux/stddef.h> #include <linux/personality.h> #include <linux/freezer.h> +#include <linux/tracehook.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/uaccess.h> @@ -408,5 +409,12 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs,oldset); + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } + clear_thread_flag(TIF_IRET); } diff --git a/arch/m68k/include/asm/entry_mm.h b/arch/m68k/include/asm/entry_mm.h index 5202f5a5b42..47412588621 100644 --- a/arch/m68k/include/asm/entry_mm.h +++ b/arch/m68k/include/asm/entry_mm.h @@ -46,7 +46,6 @@ #define curptr a2 LFLUSH_I_AND_D = 0x00000808 -LSIGTRAP = 5 /* process bits for task_struct.ptrace */ PT_TRACESYS_OFF = 3 @@ -118,9 +117,6 @@ PT_DTRACE_BIT = 2 #define STR(X) STR1(X) #define STR1(X) #X -#define PT_OFF_ORIG_D0 0x24 -#define PT_OFF_FORMATVEC 0x32 -#define PT_OFF_SR 0x2C #define SAVE_ALL_INT \ "clrl %%sp@-;" /* stk_adj */ \ "pea -1:w;" /* orig d0 = -1 */ \ diff --git a/arch/m68k/include/asm/entry_no.h b/arch/m68k/include/asm/entry_no.h index c2553d26273..907ed03d792 100644 --- a/arch/m68k/include/asm/entry_no.h +++ b/arch/m68k/include/asm/entry_no.h @@ -72,8 +72,8 @@ LENOSYS = 38 lea %sp@(-32),%sp /* space for 8 regs */ moveml %d1-%d5/%a0-%a2,%sp@ movel sw_usp,%a0 /* get usp */ - movel %a0@-,%sp@(PT_PC) /* copy exception program counter */ - movel %a0@-,%sp@(PT_FORMATVEC)/* copy exception format/vector/sr */ + movel %a0@-,%sp@(PT_OFF_PC) /* copy exception program counter */ + movel %a0@-,%sp@(PT_OFF_FORMATVEC)/*copy exception format/vector/sr */ bra 7f 6: clrl %sp@- /* stkadj */ @@ -89,8 +89,8 @@ LENOSYS = 38 bnes 8f /* no, skip */ move #0x2700,%sr /* disable intrs */ movel sw_usp,%a0 /* get usp */ - movel %sp@(PT_PC),%a0@- /* copy exception program counter */ - movel %sp@(PT_FORMATVEC),%a0@-/* copy exception format/vector/sr */ + movel %sp@(PT_OFF_PC),%a0@- /* copy exception program counter */ + movel %sp@(PT_OFF_FORMATVEC),%a0@-/*copy exception format/vector/sr */ moveml %sp@,%d1-%d5/%a0-%a2 lea %sp@(32),%sp /* space for 8 regs */ movel %sp@+,%d0 diff --git a/arch/m68k/include/asm/math-emu.h b/arch/m68k/include/asm/math-emu.h index ddfab96403c..5e9249b0014 100644 --- a/arch/m68k/include/asm/math-emu.h +++ b/arch/m68k/include/asm/math-emu.h @@ -145,16 +145,16 @@ extern unsigned int fp_debugprint; * these are only used during instruction decoding * where we always know how deep we're on the stack. */ -#define FPS_DO (PT_D0) -#define FPS_D1 (PT_D1) -#define FPS_D2 (PT_D2) -#define FPS_A0 (PT_A0) -#define FPS_A1 (PT_A1) -#define FPS_A2 (PT_A2) -#define FPS_SR (PT_SR) -#define FPS_PC (PT_PC) -#define FPS_EA (PT_PC+6) -#define FPS_PC2 (PT_PC+10) +#define FPS_DO (PT_OFF_D0) +#define FPS_D1 (PT_OFF_D1) +#define FPS_D2 (PT_OFF_D2) +#define FPS_A0 (PT_OFF_A0) +#define FPS_A1 (PT_OFF_A1) +#define FPS_A2 (PT_OFF_A2) +#define FPS_SR (PT_OFF_SR) +#define FPS_PC (PT_OFF_PC) +#define FPS_EA (PT_OFF_PC+6) +#define FPS_PC2 (PT_OFF_PC+10) .macro fp_get_fp_reg lea (FPD_FPREG,FPDATA,%d0.w*4),%a0 diff --git a/arch/m68k/include/asm/thread_info_mm.h b/arch/m68k/include/asm/thread_info_mm.h index 6ea5c33b3c5..b6da3882be9 100644 --- a/arch/m68k/include/asm/thread_info_mm.h +++ b/arch/m68k/include/asm/thread_info_mm.h @@ -1,6 +1,10 @@ #ifndef _ASM_M68K_THREAD_INFO_H #define _ASM_M68K_THREAD_INFO_H +#ifndef ASM_OFFSETS_C +#include <asm/asm-offsets.h> +#endif +#include <asm/current.h> #include <asm/types.h> #include <asm/page.h> @@ -31,7 +35,12 @@ struct thread_info { #define init_thread_info (init_task.thread.info) #define init_stack (init_thread_union.stack) -#define task_thread_info(tsk) (&(tsk)->thread.info) +#ifdef ASM_OFFSETS_C +#define task_thread_info(tsk) ((struct thread_info *) NULL) +#else +#define task_thread_info(tsk) ((struct thread_info *)((char *)tsk+TASK_TINFO)) +#endif + #define task_stack_page(tsk) ((tsk)->stack) #define current_thread_info() task_thread_info(current) diff --git a/arch/m68k/kernel/asm-offsets.c b/arch/m68k/kernel/asm-offsets.c index b1f012f6c49..73e5e581245 100644 --- a/arch/m68k/kernel/asm-offsets.c +++ b/arch/m68k/kernel/asm-offsets.c @@ -8,6 +8,8 @@ * #defines from the assembly-language output. */ +#define ASM_OFFSETS_C + #include <linux/stddef.h> #include <linux/sched.h> #include <linux/kernel_stat.h> @@ -27,6 +29,9 @@ int main(void) DEFINE(TASK_INFO, offsetof(struct task_struct, thread.info)); DEFINE(TASK_MM, offsetof(struct task_struct, mm)); DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); +#ifdef CONFIG_MMU + DEFINE(TASK_TINFO, offsetof(struct task_struct, thread.info)); +#endif /* offsets into the thread struct */ DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp)); @@ -44,20 +49,20 @@ int main(void) DEFINE(TINFO_FLAGS, offsetof(struct thread_info, flags)); /* offsets into the pt_regs */ - DEFINE(PT_D0, offsetof(struct pt_regs, d0)); - DEFINE(PT_ORIG_D0, offsetof(struct pt_regs, orig_d0)); - DEFINE(PT_D1, offsetof(struct pt_regs, d1)); - DEFINE(PT_D2, offsetof(struct pt_regs, d2)); - DEFINE(PT_D3, offsetof(struct pt_regs, d3)); - DEFINE(PT_D4, offsetof(struct pt_regs, d4)); - DEFINE(PT_D5, offsetof(struct pt_regs, d5)); - DEFINE(PT_A0, offsetof(struct pt_regs, a0)); - DEFINE(PT_A1, offsetof(struct pt_regs, a1)); - DEFINE(PT_A2, offsetof(struct pt_regs, a2)); - DEFINE(PT_PC, offsetof(struct pt_regs, pc)); - DEFINE(PT_SR, offsetof(struct pt_regs, sr)); + DEFINE(PT_OFF_D0, offsetof(struct pt_regs, d0)); + DEFINE(PT_OFF_ORIG_D0, offsetof(struct pt_regs, orig_d0)); + DEFINE(PT_OFF_D1, offsetof(struct pt_regs, d1)); + DEFINE(PT_OFF_D2, offsetof(struct pt_regs, d2)); + DEFINE(PT_OFF_D3, offsetof(struct pt_regs, d3)); + DEFINE(PT_OFF_D4, offsetof(struct pt_regs, d4)); + DEFINE(PT_OFF_D5, offsetof(struct pt_regs, d5)); + DEFINE(PT_OFF_A0, offsetof(struct pt_regs, a0)); + DEFINE(PT_OFF_A1, offsetof(struct pt_regs, a1)); + DEFINE(PT_OFF_A2, offsetof(struct pt_regs, a2)); + DEFINE(PT_OFF_PC, offsetof(struct pt_regs, pc)); + DEFINE(PT_OFF_SR, offsetof(struct pt_regs, sr)); /* bitfields are a bit difficult */ - DEFINE(PT_VECTOR, offsetof(struct pt_regs, pc) + 4); + DEFINE(PT_OFF_FORMATVEC, offsetof(struct pt_regs, pc) + 4); /* offsets into the irq_handler struct */ DEFINE(IRQ_HANDLER, offsetof(struct irq_node, handler)); @@ -84,10 +89,10 @@ int main(void) DEFINE(FONT_DESC_PREF, offsetof(struct font_desc, pref)); /* signal defines */ - DEFINE(SIGSEGV, SIGSEGV); - DEFINE(SEGV_MAPERR, SEGV_MAPERR); - DEFINE(SIGTRAP, SIGTRAP); - DEFINE(TRAP_TRACE, TRAP_TRACE); + DEFINE(LSIGSEGV, SIGSEGV); + DEFINE(LSEGV_MAPERR, SEGV_MAPERR); + DEFINE(LSIGTRAP, SIGTRAP); + DEFINE(LTRAP_TRACE, TRAP_TRACE); /* offsets into the custom struct */ DEFINE(CUSTOMBASE, &amiga_custom); diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index c3735cd6207..922f52e7ed1 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -77,17 +77,17 @@ ENTRY(ret_from_fork) jra .Lret_from_exception do_trace_entry: - movel #-ENOSYS,%sp@(PT_D0) | needed for strace + movel #-ENOSYS,%sp@(PT_OFF_D0)| needed for strace subql #4,%sp SAVE_SWITCH_STACK jbsr syscall_trace RESTORE_SWITCH_STACK addql #4,%sp - movel %sp@(PT_ORIG_D0),%d0 + movel %sp@(PT_OFF_ORIG_D0),%d0 cmpl #NR_syscalls,%d0 jcs syscall badsys: - movel #-ENOSYS,%sp@(PT_D0) + movel #-ENOSYS,%sp@(PT_OFF_D0) jra ret_from_syscall do_trace_exit: @@ -103,7 +103,7 @@ ENTRY(ret_from_signal) addql #4,%sp /* on 68040 complete pending writebacks if any */ #ifdef CONFIG_M68040 - bfextu %sp@(PT_VECTOR){#0,#4},%d0 + bfextu %sp@(PT_OFF_FORMATVEC){#0,#4},%d0 subql #7,%d0 | bus error frame ? jbne 1f movel %sp,%sp@- @@ -127,7 +127,7 @@ ENTRY(system_call) jcc badsys syscall: jbsr @(sys_call_table,%d0:l:4)@(0) - movel %d0,%sp@(PT_D0) | save the return value + movel %d0,%sp@(PT_OFF_D0) | save the return value ret_from_syscall: |oriw #0x0700,%sr movew %curptr@(TASK_INFO+TINFO_FLAGS+2),%d0 @@ -135,7 +135,7 @@ ret_from_syscall: 1: RESTORE_ALL syscall_exit_work: - btst #5,%sp@(PT_SR) | check if returning to kernel + btst #5,%sp@(PT_OFF_SR) | check if returning to kernel bnes 1b | if so, skip resched, signals lslw #1,%d0 jcs do_trace_exit @@ -148,7 +148,7 @@ syscall_exit_work: ENTRY(ret_from_exception) .Lret_from_exception: - btst #5,%sp@(PT_SR) | check if returning to kernel + btst #5,%sp@(PT_OFF_SR) | check if returning to kernel bnes 1f | if so, skip resched, signals | only allow interrupts when we are really the last one on the | kernel stack, otherwise stack overflow can occur during @@ -182,7 +182,7 @@ do_signal_return: jbra resume_userspace do_delayed_trace: - bclr #7,%sp@(PT_SR) | clear trace bit in SR + bclr #7,%sp@(PT_OFF_SR) | clear trace bit in SR pea 1 | send SIGTRAP movel %curptr,%sp@- pea LSIGTRAP @@ -199,7 +199,7 @@ ENTRY(auto_inthandler) GET_CURRENT(%d0) addqb #1,%curptr@(TASK_INFO+TINFO_PREEMPT+1) | put exception # in d0 - bfextu %sp@(PT_VECTOR){#4,#10},%d0 + bfextu %sp@(PT_OFF_FORMATVEC){#4,#10},%d0 subw #VEC_SPUR,%d0 movel %sp,%sp@- @@ -216,7 +216,7 @@ ret_from_interrupt: ALIGN ret_from_last_interrupt: moveq #(~ALLOWINT>>8)&0xff,%d0 - andb %sp@(PT_SR),%d0 + andb %sp@(PT_OFF_SR),%d0 jne 2b /* check if we need to do software interrupts */ @@ -232,7 +232,7 @@ ENTRY(user_inthandler) GET_CURRENT(%d0) addqb #1,%curptr@(TASK_INFO+TINFO_PREEMPT+1) | put exception # in d0 - bfextu %sp@(PT_VECTOR){#4,#10},%d0 + bfextu %sp@(PT_OFF_FORMATVEC){#4,#10},%d0 user_irqvec_fixup = . + 2 subw #VEC_USER,%d0 diff --git a/arch/m68k/math-emu/fp_entry.S b/arch/m68k/math-emu/fp_entry.S index 954b4f304a7..a3fe1f348df 100644 --- a/arch/m68k/math-emu/fp_entry.S +++ b/arch/m68k/math-emu/fp_entry.S @@ -85,8 +85,8 @@ fp_err_ua2: fp_err_ua1: addq.l #4,%sp move.l %a0,-(%sp) - pea SEGV_MAPERR - pea SIGSEGV + pea LSEGV_MAPERR + pea LSIGSEGV jsr fpemu_signal add.w #12,%sp jra ret_from_exception @@ -96,8 +96,8 @@ fp_err_ua1: | it does not really belong here, but... fp_sendtrace060: move.l (FPS_PC,%sp),-(%sp) - pea TRAP_TRACE - pea SIGTRAP + pea LTRAP_TRACE + pea LSIGTRAP jsr fpemu_signal add.w #12,%sp jra ret_from_exception @@ -122,17 +122,17 @@ fp_get_data_reg: .long fp_get_d6, fp_get_d7 fp_get_d0: - move.l (PT_D0+8,%sp),%d0 + move.l (PT_OFF_D0+8,%sp),%d0 printf PREGISTER,"{d0->%08x}",1,%d0 rts fp_get_d1: - move.l (PT_D1+8,%sp),%d0 + move.l (PT_OFF_D1+8,%sp),%d0 printf PREGISTER,"{d1->%08x}",1,%d0 rts fp_get_d2: - move.l (PT_D2+8,%sp),%d0 + move.l (PT_OFF_D2+8,%sp),%d0 printf PREGISTER,"{d2->%08x}",1,%d0 rts @@ -173,35 +173,35 @@ fp_put_data_reg: fp_put_d0: printf PREGISTER,"{d0<-%08x}",1,%d0 - move.l %d0,(PT_D0+8,%sp) + move.l %d0,(PT_OFF_D0+8,%sp) rts fp_put_d1: printf PREGISTER,"{d1<-%08x}",1,%d0 - move.l %d0,(PT_D1+8,%sp) + move.l %d0,(PT_OFF_D1+8,%sp) rts fp_put_d2: printf PREGISTER,"{d2<-%08x}",1,%d0 - move.l %d0,(PT_D2+8,%sp) + move.l %d0,(PT_OFF_D2+8,%sp) rts fp_put_d3: printf PREGISTER,"{d3<-%08x}",1,%d0 | move.l %d0,%d3 - move.l %d0,(PT_D3+8,%sp) + move.l %d0,(PT_OFF_D3+8,%sp) rts fp_put_d4: printf PREGISTER,"{d4<-%08x}",1,%d0 | move.l %d0,%d4 - move.l %d0,(PT_D4+8,%sp) + move.l %d0,(PT_OFF_D4+8,%sp) rts fp_put_d5: printf PREGISTER,"{d5<-%08x}",1,%d0 | move.l %d0,%d5 - move.l %d0,(PT_D5+8,%sp) + move.l %d0,(PT_OFF_D5+8,%sp) rts fp_put_d6: @@ -225,17 +225,17 @@ fp_get_addr_reg: .long fp_get_a6, fp_get_a7 fp_get_a0: - move.l (PT_A0+8,%sp),%a0 + move.l (PT_OFF_A0+8,%sp),%a0 printf PREGISTER,"{a0->%08x}",1,%a0 rts fp_get_a1: - move.l (PT_A1+8,%sp),%a0 + move.l (PT_OFF_A1+8,%sp),%a0 printf PREGISTER,"{a1->%08x}",1,%a0 rts fp_get_a2: - move.l (PT_A2+8,%sp),%a0 + move.l (PT_OFF_A2+8,%sp),%a0 printf PREGISTER,"{a2->%08x}",1,%a0 rts @@ -276,17 +276,17 @@ fp_put_addr_reg: fp_put_a0: printf PREGISTER,"{a0<-%08x}",1,%a0 - move.l %a0,(PT_A0+8,%sp) + move.l %a0,(PT_OFF_A0+8,%sp) rts fp_put_a1: printf PREGISTER,"{a1<-%08x}",1,%a0 - move.l %a0,(PT_A1+8,%sp) + move.l %a0,(PT_OFF_A1+8,%sp) rts fp_put_a2: printf PREGISTER,"{a2<-%08x}",1,%a0 - move.l %a0,(PT_A2+8,%sp) + move.l %a0,(PT_OFF_A2+8,%sp) rts fp_put_a3: diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index f9df720d2e4..01cc1630b66 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -115,6 +115,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define TIF_NEED_RESCHED 2 /* rescheduling necessary */ #define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ #define TIF_SECCOMP 4 /* secure computing */ +#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */ #define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */ #define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */ #define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -139,6 +140,7 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1<<TIF_SECCOMP) +#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME) #define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK) #define _TIF_USEDFPU (1<<TIF_USEDFPU) #define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG) diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 830c5ef9932..6254041b942 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -21,6 +21,7 @@ #include <linux/compiler.h> #include <linux/syscalls.h> #include <linux/uaccess.h> +#include <linux/tracehook.h> #include <asm/abi.h> #include <asm/asm.h> @@ -700,4 +701,11 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused, /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index feb2f2e810d..a21f43bc68e 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -568,5 +568,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(__frame); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h index 4ce0edfbe96..ac775a76bff 100644 --- a/arch/parisc/include/asm/thread_info.h +++ b/arch/parisc/include/asm/thread_info.h @@ -59,6 +59,7 @@ struct thread_info { #define TIF_MEMDIE 5 #define TIF_RESTORE_SIGMASK 6 /* restore saved signal mask */ #define TIF_FREEZE 7 /* is freezing for suspend */ +#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -67,8 +68,9 @@ struct thread_info { #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_FREEZE (1 << TIF_FREEZE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) -#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | \ +#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \ _TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK) #endif /* __KERNEL__ */ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index e552e547cb9..8c4712b74dc 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -948,7 +948,7 @@ intr_check_sig: /* As above */ mfctl %cr30,%r1 LDREG TI_FLAGS(%r1),%r19 - ldi (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK), %r20 + ldi (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK|_TIF_NOTIFY_RESUME), %r20 and,COND(<>) %r19, %r20, %r0 b,n intr_restore /* skip past if we've nothing to do */ diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index f82544225e8..8eb3c63c407 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -25,6 +25,7 @@ #include <linux/stddef.h> #include <linux/compat.h> #include <linux/elf.h> +#include <linux/tracehook.h> #include <asm/ucontext.h> #include <asm/rt_sigframe.h> #include <asm/uaccess.h> @@ -645,4 +646,11 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall) if (test_thread_flag(TIF_SIGPENDING) || test_thread_flag(TIF_RESTORE_SIGMASK)) do_signal(regs, in_syscall); + + if (test_thread_flag(TIF_NOTIFY_RESUME)) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); + } } diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 528f0ff9b27..8b58bf0b7d5 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -532,7 +532,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) /* Kill the user process later */ regs->iaoq[0] = 0 | 3; regs->iaoq[1] = regs->iaoq[0] + 4; - regs->iasq[0] = regs->iasq[0] = regs->sr[7]; + regs->iasq[0] = regs->iasq[1] = regs->sr[7]; regs->gr[0] &= ~PSW_B; return; } diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index b44aaabdd1a..0c34371ec49 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -424,6 +424,29 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #endif } +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + struct dma_mapping_ops *ops = get_dma_ops(dev); + + if (ops->addr_needs_map && ops->addr_needs_map(dev, addr, size)) + return 0; + + if (!dev->dma_mask) + return 0; + + return addr + size <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr + get_dma_direct_offset(dev); +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr - get_dma_direct_offset(dev); +} + #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #ifdef CONFIG_NOT_COHERENT_CACHE diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index eb17da78112..2a5da069714 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -104,8 +104,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, else pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); -#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP) - /* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we +#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) + /* Second case is 32-bit with 64-bit PTE. In this case, we * can just store as long as we do the two halves in the right order * with a barrier in between. This is possible because we take care, * in the hash code, to pre-invalidate if the PTE was already hashed, @@ -140,7 +140,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, #else /* Anything else just stores the PTE normally. That covers all 64-bit - * cases, and 32-bit non-hash with 64-bit PTEs in UP mode + * cases, and 32-bit non-hash with 32-bit PTEs. */ *ptep = pte; #endif diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index c3b193121f8..198266cf9e2 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -54,7 +54,7 @@ * This returns the old value in the lock, so we succeeded * in getting the lock if the return value is 0. */ -static inline unsigned long __spin_trylock(raw_spinlock_t *lock) +static inline unsigned long arch_spin_trylock(raw_spinlock_t *lock) { unsigned long tmp, token; @@ -76,7 +76,7 @@ static inline unsigned long __spin_trylock(raw_spinlock_t *lock) static inline int __raw_spin_trylock(raw_spinlock_t *lock) { CLEAR_IO_SYNC; - return __spin_trylock(lock) == 0; + return arch_spin_trylock(lock) == 0; } /* @@ -108,7 +108,7 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) { CLEAR_IO_SYNC; while (1) { - if (likely(__spin_trylock(lock) == 0)) + if (likely(arch_spin_trylock(lock) == 0)) break; do { HMT_low(); @@ -126,7 +126,7 @@ void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) CLEAR_IO_SYNC; while (1) { - if (likely(__spin_trylock(lock) == 0)) + if (likely(arch_spin_trylock(lock) == 0)) break; local_save_flags(flags_dis); local_irq_restore(flags); @@ -181,7 +181,7 @@ extern void __raw_spin_unlock_wait(raw_spinlock_t *lock); * This returns the old value in the lock + 1, * so we got a read lock if the return value is > 0. */ -static inline long __read_trylock(raw_rwlock_t *rw) +static inline long arch_read_trylock(raw_rwlock_t *rw) { long tmp; @@ -205,7 +205,7 @@ static inline long __read_trylock(raw_rwlock_t *rw) * This returns the old value in the lock, * so we got the write lock if the return value is 0. */ -static inline long __write_trylock(raw_rwlock_t *rw) +static inline long arch_write_trylock(raw_rwlock_t *rw) { long tmp, token; @@ -228,7 +228,7 @@ static inline long __write_trylock(raw_rwlock_t *rw) static inline void __raw_read_lock(raw_rwlock_t *rw) { while (1) { - if (likely(__read_trylock(rw) > 0)) + if (likely(arch_read_trylock(rw) > 0)) break; do { HMT_low(); @@ -242,7 +242,7 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) static inline void __raw_write_lock(raw_rwlock_t *rw) { while (1) { - if (likely(__write_trylock(rw) == 0)) + if (likely(arch_write_trylock(rw) == 0)) break; do { HMT_low(); @@ -255,12 +255,12 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) static inline int __raw_read_trylock(raw_rwlock_t *rw) { - return __read_trylock(rw) > 0; + return arch_read_trylock(rw) > 0; } static inline int __raw_write_trylock(raw_rwlock_t *rw) { - return __write_trylock(rw) == 0; + return arch_write_trylock(rw) == 0; } static inline void __raw_read_unlock(raw_rwlock_t *rw) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index b73396b9390..9619285f64e 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 561b6465231..197b15646ee 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -67,6 +67,8 @@ int main(void) DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); + DEFINE(SIGSEGV, SIGSEGV); + DEFINE(NMI_MASK, NMI_MASK); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 68ccf11e4f1..e8a57de85bc 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -24,50 +24,12 @@ int swiotlb __read_mostly; unsigned int ppc_swiotlb_enable; -void *swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t addr) -{ - unsigned long pfn = PFN_DOWN(swiotlb_bus_to_phys(hwdev, addr)); - void *pageaddr = page_address(pfn_to_page(pfn)); - - if (pageaddr != NULL) - return pageaddr + (addr % PAGE_SIZE); - return NULL; -} - -dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr + get_dma_direct_offset(hwdev); -} - -phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) - -{ - return baddr - get_dma_direct_offset(hwdev); -} - -/* - * Determine if an address needs bounce buffering via swiotlb. - * Going forward I expect the swiotlb code to generalize on using - * a dma_ops->addr_needs_map, and this function will move from here to the - * generic swiotlb code. - */ -int -swiotlb_arch_address_needs_mapping(struct device *hwdev, dma_addr_t addr, - size_t size) -{ - struct dma_mapping_ops *dma_ops = get_dma_ops(hwdev); - - BUG_ON(!dma_ops); - return dma_ops->addr_needs_map(hwdev, addr, size); -} - /* * Determine if an address is reachable by a pci device, or if we must bounce. */ static int swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) { - u64 mask = dma_get_mask(hwdev); dma_addr_t max; struct pci_controller *hose; struct pci_dev *pdev = to_pci_dev(hwdev); @@ -79,16 +41,9 @@ swiotlb_pci_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) if ((addr + size > max) | (addr < hose->dma_window_base_cur)) return 1; - return !is_buffer_dma_capable(mask, addr, size); -} - -static int -swiotlb_addr_needs_map(struct device *hwdev, dma_addr_t addr, size_t size) -{ - return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size); + return 0; } - /* * At the moment, all platforms that use this code only require * swiotlb to be used if we're operating on HIGHMEM. Since @@ -104,7 +59,6 @@ struct dma_mapping_ops swiotlb_dma_ops = { .dma_supported = swiotlb_dma_supported, .map_page = swiotlb_map_page, .unmap_page = swiotlb_unmap_page, - .addr_needs_map = swiotlb_addr_needs_map, .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eb898112e57..8ac85e08ffa 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -729,6 +729,11 @@ BEGIN_FTR_SECTION bne- do_ste_alloc /* If so handle it */ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + clrrdi r11,r1,THREAD_SHIFT + lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ + andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ + bne 77f /* then don't call hash_page now */ + /* * On iSeries, we soft-disable interrupts here, then * hard-enable interrupts so that the hash_page code can spin on @@ -833,6 +838,20 @@ handle_page_fault: bl .low_hash_fault b .ret_from_except +/* + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ +77: bl .save_nvgprs + mr r4,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + li r5,SIGSEGV + bl .bad_page_fault + b .ret_from_except + /* here we have a segment miss */ do_ste_alloc: bl .ste_allocate /* try to insert stab entry */ diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c new file mode 100644 index 00000000000..f74b62c6751 --- /dev/null +++ b/arch/powerpc/kernel/perf_callchain.c @@ -0,0 +1,527 @@ +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_counter.h> +#include <linux/percpu.h> +#include <linux/uaccess.h> +#include <linux/mm.h> +#include <asm/ptrace.h> +#include <asm/pgtable.h> +#include <asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/vdso.h> +#ifdef CONFIG_PPC64 +#include "ppc32.h" +#endif + +/* + * Store another value in a callchain_entry. + */ +static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + unsigned int nr = entry->nr; + + if (nr < PERF_MAX_STACK_DEPTH) { + entry->ip[nr] = ip; + entry->nr = nr + 1; + } +} + +/* + * Is sp valid as the address of the next kernel stack frame after prev_sp? + * The next frame may be in a different stack area but should not go + * back down in the same stack area. + */ +static int valid_next_sp(unsigned long sp, unsigned long prev_sp) +{ + if (sp & 0xf) + return 0; /* must be 16-byte aligned */ + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return 0; + if (sp >= prev_sp + STACK_FRAME_OVERHEAD) + return 1; + /* + * sp could decrease when we jump off an interrupt stack + * back to the regular process stack. + */ + if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) + return 1; + return 0; +} + +static void perf_callchain_kernel(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + unsigned long *fp; + + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_KERNEL); + callchain_store(entry, regs->nip); + + if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) + return; + + for (;;) { + fp = (unsigned long *) sp; + next_sp = fp[0]; + + if (next_sp == sp + STACK_INT_FRAME_SIZE && + fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { + /* + * This looks like an interrupt frame for an + * interrupt that occurred in the kernel + */ + regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); + next_ip = regs->nip; + lr = regs->link; + level = 0; + callchain_store(entry, PERF_CONTEXT_KERNEL); + + } else { + if (level == 0) + next_ip = lr; + else + next_ip = fp[STACK_FRAME_LR_SAVE]; + + /* + * We can't tell which of the first two addresses + * we get are valid, but we can filter out the + * obviously bogus ones here. We replace them + * with 0 rather than removing them entirely so + * that userspace can tell which is which. + */ + if ((level == 1 && next_ip == lr) || + (level <= 1 && !kernel_text_address(next_ip))) + next_ip = 0; + + ++level; + } + + callchain_store(entry, next_ip); + if (!valid_next_sp(next_sp, sp)) + return; + sp = next_sp; + } +} + +#ifdef CONFIG_PPC64 + +#ifdef CONFIG_HUGETLB_PAGE +#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) +#else +#define is_huge_psize(pagesize) 0 +#endif + +/* + * On 64-bit we don't want to invoke hash_page on user addresses from + * interrupt context, so if the access faults, we read the page tables + * to find which page (if any) is mapped and access it directly. + */ +static int read_user_stack_slow(void __user *ptr, void *ret, int nb) +{ + pgd_t *pgdir; + pte_t *ptep, pte; + int pagesize; + unsigned long addr = (unsigned long) ptr; + unsigned long offset; + unsigned long pfn; + void *kaddr; + + pgdir = current->mm->pgd; + if (!pgdir) + return -EFAULT; + + pagesize = get_slice_psize(current->mm, addr); + + /* align address to page boundary */ + offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); + addr -= offset; + + if (is_huge_psize(pagesize)) + ptep = huge_pte_offset(current->mm, addr); + else + ptep = find_linux_pte(pgdir, addr); + + if (ptep == NULL) + return -EFAULT; + pte = *ptep; + if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) + return -EFAULT; + pfn = pte_pfn(pte); + if (!page_is_ram(pfn)) + return -EFAULT; + + /* no highmem to worry about here */ + kaddr = pfn_to_kaddr(pfn); + memcpy(ret, kaddr + offset, nb); + return 0; +} + +static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || + ((unsigned long)ptr & 7)) + return -EFAULT; + + if (!__get_user_inatomic(*ret, ptr)) + return 0; + + return read_user_stack_slow(ptr, ret, 8); +} + +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + if (!__get_user_inatomic(*ret, ptr)) + return 0; + + return read_user_stack_slow(ptr, ret, 4); +} + +static inline int valid_user_sp(unsigned long sp, int is_64) +{ + if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) + return 0; + return 1; +} + +/* + * 64-bit user processes use the same stack frame for RT and non-RT signals. + */ +struct signal_frame_64 { + char dummy[__SIGNAL_FRAMESIZE]; + struct ucontext uc; + unsigned long unused[2]; + unsigned int tramp[6]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + char abigap[288]; +}; + +static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) +{ + if (nip == fp + offsetof(struct signal_frame_64, tramp)) + return 1; + if (vdso64_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + return 1; + return 0; +} + +/* + * Do some sanity checking on the signal frame pointed to by sp. + * We check the pinfo and puc pointers in the frame. + */ +static int sane_signal_64_frame(unsigned long sp) +{ + struct signal_frame_64 __user *sf; + unsigned long pinfo, puc; + + sf = (struct signal_frame_64 __user *) sp; + if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || + read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) + return 0; + return pinfo == (unsigned long) &sf->info && + puc == (unsigned long) &sf->uc; +} + +static void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + struct signal_frame_64 __user *sigframe; + unsigned long __user *fp, *uregs; + + next_ip = regs->nip; + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + + for (;;) { + fp = (unsigned long __user *) sp; + if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) + return; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, which can happen when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_64) && + (is_sigreturn_64_address(next_ip, sp) || + (level <= 1 && is_sigreturn_64_address(lr, sp))) && + sane_signal_64_frame(sp)) { + /* + * This looks like an signal frame + */ + sigframe = (struct signal_frame_64 __user *) sp; + uregs = sigframe->uc.uc_mcontext.gp_regs; + if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || + read_user_stack_64(&uregs[PT_LNK], &lr) || + read_user_stack_64(&uregs[PT_R1], &sp)) + return; + level = 0; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} + +static inline int current_is_64bit(void) +{ + /* + * We can't use test_thread_flag() here because we may be on an + * interrupt stack, and the thread flags don't get copied over + * from the thread_info on the main stack to the interrupt stack. + */ + return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); +} + +#else /* CONFIG_PPC64 */ +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + return __get_user_inatomic(*ret, ptr); +} + +static inline void perf_callchain_user_64(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ +} + +static inline int current_is_64bit(void) +{ + return 0; +} + +static inline int valid_user_sp(unsigned long sp, int is_64) +{ + if (!sp || (sp & 7) || sp > TASK_SIZE - 32) + return 0; + return 1; +} + +#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE +#define sigcontext32 sigcontext +#define mcontext32 mcontext +#define ucontext32 ucontext +#define compat_siginfo_t struct siginfo + +#endif /* CONFIG_PPC64 */ + +/* + * Layout for non-RT signal frames + */ +struct signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32]; + struct sigcontext32 sctx; + struct mcontext32 mctx; + int abigap[56]; +}; + +/* + * Layout for RT signal frames + */ +struct rt_signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32 + 16]; + compat_siginfo_t info; + struct ucontext32 uc; + int abigap[56]; +}; + +static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) + return 1; + if (vdso32_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_sigtramp) + return 1; + return 0; +} + +static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct rt_signal_frame_32, + uc.uc_mcontext.mc_pad)) + return 1; + if (vdso32_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + return 1; + return 0; +} + +static int sane_signal_32_frame(unsigned int sp) +{ + struct signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) + return 0; + return regs == (unsigned long) &sf->mctx; +} + +static int sane_rt_signal_32_frame(unsigned int sp) +{ + struct rt_signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) + return 0; + return regs == (unsigned long) &sf->uc.uc_mcontext; +} + +static unsigned int __user *signal_frame_32_regs(unsigned int sp, + unsigned int next_sp, unsigned int next_ip) +{ + struct mcontext32 __user *mctx = NULL; + struct signal_frame_32 __user *sf; + struct rt_signal_frame_32 __user *rt_sf; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, for example, when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_32) && + is_sigreturn_32_address(next_ip, sp) && + sane_signal_32_frame(sp)) { + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + mctx = &sf->mctx; + } + + if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && + is_rt_sigreturn_32_address(next_ip, sp) && + sane_rt_signal_32_frame(sp)) { + rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + mctx = &rt_sf->uc.uc_mcontext; + } + + if (!mctx) + return NULL; + return mctx->mc_gregs; +} + +static void perf_callchain_user_32(struct pt_regs *regs, + struct perf_callchain_entry *entry) +{ + unsigned int sp, next_sp; + unsigned int next_ip; + unsigned int lr; + long level = 0; + unsigned int __user *fp, *uregs; + + next_ip = regs->nip; + lr = regs->link; + sp = regs->gpr[1]; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + + while (entry->nr < PERF_MAX_STACK_DEPTH) { + fp = (unsigned int __user *) (unsigned long) sp; + if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) + return; + + uregs = signal_frame_32_regs(sp, next_sp, next_ip); + if (!uregs && level <= 1) + uregs = signal_frame_32_regs(sp, next_sp, lr); + if (uregs) { + /* + * This looks like an signal frame, so restart + * the stack trace with the values in it. + */ + if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || + read_user_stack_32(&uregs[PT_LNK], &lr) || + read_user_stack_32(&uregs[PT_R1], &sp)) + return; + level = 0; + callchain_store(entry, PERF_CONTEXT_USER); + callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} + +/* + * Since we can't get PMU interrupts inside a PMU interrupt handler, + * we don't need separate irq and nmi entries here. + */ +static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); + +struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + struct perf_callchain_entry *entry = &__get_cpu_var(callchain); + + entry->nr = 0; + + if (current->pid == 0) /* idle task? */ + return entry; + + if (!user_mode(regs)) { + perf_callchain_kernel(regs, entry); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + if (current_is_64bit()) + perf_callchain_user_64(regs, entry); + else + perf_callchain_user_32(regs, entry); + } + + return entry; +} diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index 388cf57ad82..018d094d92f 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -317,7 +317,7 @@ static int power7_generic_events[] = { */ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x400f0, 0xc880 }, + [C(OP_READ)] = { 0xc880, 0x400f0 }, [C(OP_WRITE)] = { 0, 0x300f0 }, [C(OP_PREFETCH)] = { 0xd8b8, 0 }, }, @@ -327,8 +327,8 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(OP_PREFETCH)] = { 0x408a, 0 }, }, [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x6080, 0x6084 }, - [C(OP_WRITE)] = { 0x6082, 0x6086 }, + [C(OP_READ)] = { 0x16080, 0x26080 }, + [C(OP_WRITE)] = { 0x16082, 0x26082 }, [C(OP_PREFETCH)] = { 0, 0 }, }, [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5b7038f248b..a685652effe 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -92,15 +92,13 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } -void slb_flush_and_rebolt(void) +static void __slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ unsigned long linear_llp, vmalloc_llp, lflags, vflags; unsigned long ksp_esid_data, ksp_vsid_data; - WARN_ON(!irqs_disabled()); - linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; lflags = SLB_VSID_KERNEL | linear_llp; @@ -117,12 +115,6 @@ void slb_flush_and_rebolt(void) ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; } - /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. - */ - hard_irq_disable(); - /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ asm volatile("isync\n" @@ -139,6 +131,21 @@ void slb_flush_and_rebolt(void) : "memory"); } +void slb_flush_and_rebolt(void) +{ + + WARN_ON(!irqs_disabled()); + + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + + __slb_flush_and_rebolt(); + get_paca()->slb_cache_ptr = 0; +} + void slb_vmalloc_update(void) { unsigned long vflags; @@ -180,12 +187,20 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - unsigned long offset = get_paca()->slb_cache_ptr; + unsigned long offset; unsigned long slbie_data = 0; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an SLB miss + * which would update the slb_cache/slb_cache_ptr fields in the PACA. + */ + hard_irq_disable(); + offset = get_paca()->slb_cache_ptr; if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; @@ -200,7 +215,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) } asm volatile("isync" : : : "memory"); } else { - slb_flush_and_rebolt(); + __slb_flush_and_rebolt(); } /* Workaround POWER5 < DD2.1 issue */ diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 98cd1dc2ae7..ab5fb48b3e9 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) { struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; struct stab_entry *ste; - unsigned long offset = __get_cpu_var(stab_cache_ptr); + unsigned long offset; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; @@ -172,6 +172,15 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); + /* + * We need interrupts hard-disabled here, not just soft-disabled, + * so that a PMU interrupt can't occur, which might try to access + * user memory (to get a stack trace) and possible cause an STAB miss + * which would update the stab_cache/stab_cache_ptr per-cpu variables. + */ + hard_irq_disable(); + + offset = __get_cpu_var(stab_cache_ptr); if (offset <= NR_STAB_CACHE_ENTRIES) { int i; diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c index 3ee1fd37bbf..40edad52077 100644 --- a/arch/powerpc/sysdev/xilinx_intc.c +++ b/arch/powerpc/sysdev/xilinx_intc.c @@ -234,7 +234,6 @@ static void xilinx_i8259_cascade(unsigned int irq, struct irq_desc *desc) generic_handle_irq(cascade_irq); /* Let xilinx_intc end the interrupt */ - desc->chip->ack(irq); desc->chip->unmask(irq); } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 2ae5d72f47e..e030e86ff6a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -95,7 +95,6 @@ config S390 select HAVE_ARCH_TRACEHOOK select INIT_ALL_POSSIBLE select HAVE_PERF_COUNTERS - select GENERIC_ATOMIC64 if !64BIT config SCHED_OMIT_FRAME_POINTER bool @@ -481,13 +480,6 @@ config CMM_IUCV Select this option to enable the special message interface to the cooperative memory management. -config PAGE_STATES - bool "Unused page notification" - help - This enables the notification of unused pages to the - hypervisor. The ESSA instruction is used to do the states - changes between a page that has content and the unused state. - config APPLDATA_BASE bool "Linux - VM Monitor Stream, base infrastructure" depends on PROC_FS diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 0ff387cebf8..fc8fb20e7fc 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -88,8 +88,7 @@ LDFLAGS_vmlinux := -e start head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ \ - arch/s390/power/ + arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 4aba83b3159..2bc479ab3a6 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -250,8 +250,9 @@ static int des3_128_setkey(struct crypto_tfm *tfm, const u8 *key, const u8 *temp_key = key; u32 *flags = &tfm->crt_flags; - if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE))) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; + if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE)) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } for (i = 0; i < 2; i++, temp_key += DES_KEY_SIZE) { @@ -411,9 +412,9 @@ static int des3_192_setkey(struct crypto_tfm *tfm, const u8 *key, if (!(memcmp(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) && memcmp(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2], - DES_KEY_SIZE))) { - - *flags |= CRYPTO_TFM_RES_BAD_KEY_SCHED; + DES_KEY_SIZE)) && + (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) { + *flags |= CRYPTO_TFM_RES_WEAK_KEY; return -EINVAL; } for (i = 0; i < 3; i++, temp_key += DES_KEY_SIZE) { diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index e85ba348722..f6de7826c97 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -46,12 +46,38 @@ static int sha1_init(struct shash_desc *desc) return 0; } +static int sha1_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha1_state *octx = out; + + octx->count = sctx->count; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buffer, sctx->buf, sizeof(octx->buffer)); + return 0; +} + +static int sha1_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha1_state *ictx = in; + + sctx->count = ictx->count; + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buffer, sizeof(ictx->buffer)); + sctx->func = KIMD_SHA_1; + return 0; +} + static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_init, .update = s390_sha_update, .final = s390_sha_final, + .export = sha1_export, + .import = sha1_import, .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name= "sha1-s390", diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index f9fefc56963..61a7db37212 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -42,12 +42,38 @@ static int sha256_init(struct shash_desc *desc) return 0; } +static int sha256_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha256_state *octx = out; + + octx->count = sctx->count; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + return 0; +} + +static int sha256_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha256_state *ictx = in; + + sctx->count = ictx->count; + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->func = KIMD_SHA_256; + return 0; +} + static struct shash_alg alg = { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_init, .update = s390_sha_update, .final = s390_sha_final, + .export = sha256_export, + .import = sha256_import, .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha256_state), .base = { .cra_name = "sha256", .cra_driver_name= "sha256-s390", diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 83192bfc804..4bf73d0dc52 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -13,7 +13,10 @@ * */ #include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <linux/errno.h> #include <linux/init.h> +#include <linux/kernel.h> #include <linux/module.h> #include "sha.h" @@ -37,12 +40,42 @@ static int sha512_init(struct shash_desc *desc) return 0; } +static int sha512_export(struct shash_desc *desc, void *out) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + struct sha512_state *octx = out; + + octx->count[0] = sctx->count; + octx->count[1] = 0; + memcpy(octx->state, sctx->state, sizeof(octx->state)); + memcpy(octx->buf, sctx->buf, sizeof(octx->buf)); + return 0; +} + +static int sha512_import(struct shash_desc *desc, const void *in) +{ + struct s390_sha_ctx *sctx = shash_desc_ctx(desc); + const struct sha512_state *ictx = in; + + if (unlikely(ictx->count[1])) + return -ERANGE; + sctx->count = ictx->count[0]; + + memcpy(sctx->state, ictx->state, sizeof(ictx->state)); + memcpy(sctx->buf, ictx->buf, sizeof(ictx->buf)); + sctx->func = KIMD_SHA_512; + return 0; +} + static struct shash_alg sha512_alg = { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_init, .update = s390_sha_update, .final = s390_sha_final, + .export = sha512_export, + .import = sha512_import, .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha512_state), .base = { .cra_name = "sha512", .cra_driver_name= "sha512-s390", @@ -78,7 +111,10 @@ static struct shash_alg sha384_alg = { .init = sha384_init, .update = s390_sha_update, .final = s390_sha_final, + .export = sha512_export, + .import = sha512_import, .descsize = sizeof(struct s390_sha_ctx), + .statesize = sizeof(struct sha512_state), .base = { .cra_name = "sha384", .cra_driver_name= "sha384-s390", diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 5a805df216b..bd9914b8948 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -355,11 +355,7 @@ static struct dentry *hypfs_create_file(struct super_block *sb, { struct dentry *dentry; struct inode *inode; - struct qstr qname; - qname.name = name; - qname.len = strlen(name); - qname.hash = full_name_hash(name, qname.len); mutex_lock(&parent->d_inode->i_mutex); dentry = lookup_one_len(name, parent, strlen(name)); if (IS_ERR(dentry)) { @@ -426,7 +422,7 @@ struct dentry *hypfs_create_u64(struct super_block *sb, struct dentry *dir, char tmp[TMP_SIZE]; struct dentry *dentry; - snprintf(tmp, TMP_SIZE, "%lld\n", (unsigned long long int)value); + snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value); buffer = kstrdup(tmp, GFP_KERNEL); if (!buffer) return ERR_PTR(-ENOMEM); diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index c7d0abfb0f0..ae7c8f9f94a 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -1,33 +1,23 @@ #ifndef __ARCH_S390_ATOMIC__ #define __ARCH_S390_ATOMIC__ -#include <linux/compiler.h> -#include <linux/types.h> - /* - * include/asm-s390/atomic.h + * Copyright 1999,2009 IBM Corp. + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Denis Joseph Barrow, + * Arnd Bergmann <arndb@de.ibm.com>, * - * S390 version - * Copyright (C) 1999-2005 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * Denis Joseph Barrow, - * Arnd Bergmann (arndb@de.ibm.com) - * - * Derived from "include/asm-i386/bitops.h" - * Copyright (C) 1992, Linus Torvalds + * Atomic operations that C can't guarantee us. + * Useful for resource counting etc. + * s390 uses 'Compare And Swap' for atomicity in SMP enviroment. * */ -/* - * Atomic operations that C can't guarantee us. Useful for - * resource counting etc.. - * S390 uses 'Compare And Swap' for atomicity in SMP enviroment - */ +#include <linux/compiler.h> +#include <linux/types.h> #define ATOMIC_INIT(i) { (i) } -#ifdef __KERNEL__ - #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) #define __CS_LOOP(ptr, op_val, op_string) ({ \ @@ -77,7 +67,7 @@ static inline void atomic_set(atomic_t *v, int i) barrier(); } -static __inline__ int atomic_add_return(int i, atomic_t * v) +static inline int atomic_add_return(int i, atomic_t *v) { return __CS_LOOP(v, i, "ar"); } @@ -87,7 +77,7 @@ static __inline__ int atomic_add_return(int i, atomic_t * v) #define atomic_inc_return(_v) atomic_add_return(1, _v) #define atomic_inc_and_test(_v) (atomic_add_return(1, _v) == 0) -static __inline__ int atomic_sub_return(int i, atomic_t * v) +static inline int atomic_sub_return(int i, atomic_t *v) { return __CS_LOOP(v, i, "sr"); } @@ -97,19 +87,19 @@ static __inline__ int atomic_sub_return(int i, atomic_t * v) #define atomic_dec_return(_v) atomic_sub_return(1, _v) #define atomic_dec_and_test(_v) (atomic_sub_return(1, _v) == 0) -static __inline__ void atomic_clear_mask(unsigned long mask, atomic_t * v) +static inline void atomic_clear_mask(unsigned long mask, atomic_t *v) { - __CS_LOOP(v, ~mask, "nr"); + __CS_LOOP(v, ~mask, "nr"); } -static __inline__ void atomic_set_mask(unsigned long mask, atomic_t * v) +static inline void atomic_set_mask(unsigned long mask, atomic_t *v) { - __CS_LOOP(v, mask, "or"); + __CS_LOOP(v, mask, "or"); } #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) -static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new) +static inline int atomic_cmpxchg(atomic_t *v, int old, int new) { #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) asm volatile( @@ -127,7 +117,7 @@ static __inline__ int atomic_cmpxchg(atomic_t *v, int old, int new) return old; } -static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) +static inline int atomic_add_unless(atomic_t *v, int a, int u) { int c, old; c = atomic_read(v); @@ -146,9 +136,10 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) #undef __CS_LOOP -#ifdef __s390x__ #define ATOMIC64_INIT(i) { (i) } +#ifdef CONFIG_64BIT + #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) #define __CSG_LOOP(ptr, op_val, op_string) ({ \ @@ -162,7 +153,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) : "=&d" (old_val), "=&d" (new_val), \ "=Q" (((atomic_t *)(ptr))->counter) \ : "d" (op_val), "Q" (((atomic_t *)(ptr))->counter) \ - : "cc", "memory" ); \ + : "cc", "memory"); \ new_val; \ }) @@ -180,7 +171,7 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u) "=m" (((atomic_t *)(ptr))->counter) \ : "a" (ptr), "d" (op_val), \ "m" (((atomic_t *)(ptr))->counter) \ - : "cc", "memory" ); \ + : "cc", "memory"); \ new_val; \ }) @@ -198,39 +189,29 @@ static inline void atomic64_set(atomic64_t *v, long long i) barrier(); } -static __inline__ long long atomic64_add_return(long long i, atomic64_t * v) +static inline long long atomic64_add_return(long long i, atomic64_t *v) { return __CSG_LOOP(v, i, "agr"); } -#define atomic64_add(_i, _v) atomic64_add_return(_i, _v) -#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0) -#define atomic64_inc(_v) atomic64_add_return(1, _v) -#define atomic64_inc_return(_v) atomic64_add_return(1, _v) -#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) -static __inline__ long long atomic64_sub_return(long long i, atomic64_t * v) +static inline long long atomic64_sub_return(long long i, atomic64_t *v) { return __CSG_LOOP(v, i, "sgr"); } -#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v) -#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) -#define atomic64_dec(_v) atomic64_sub_return(1, _v) -#define atomic64_dec_return(_v) atomic64_sub_return(1, _v) -#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0) -static __inline__ void atomic64_clear_mask(unsigned long mask, atomic64_t * v) +static inline void atomic64_clear_mask(unsigned long mask, atomic64_t *v) { - __CSG_LOOP(v, ~mask, "ngr"); + __CSG_LOOP(v, ~mask, "ngr"); } -static __inline__ void atomic64_set_mask(unsigned long mask, atomic64_t * v) +static inline void atomic64_set_mask(unsigned long mask, atomic64_t *v) { - __CSG_LOOP(v, mask, "ogr"); + __CSG_LOOP(v, mask, "ogr"); } #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -static __inline__ long long atomic64_cmpxchg(atomic64_t *v, +static inline long long atomic64_cmpxchg(atomic64_t *v, long long old, long long new) { #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) @@ -249,8 +230,112 @@ static __inline__ long long atomic64_cmpxchg(atomic64_t *v, return old; } -static __inline__ int atomic64_add_unless(atomic64_t *v, - long long a, long long u) +#undef __CSG_LOOP + +#else /* CONFIG_64BIT */ + +typedef struct { + long long counter; +} atomic64_t; + +static inline long long atomic64_read(const atomic64_t *v) +{ + register_pair rp; + + asm volatile( + " lm %0,%N0,0(%1)" + : "=&d" (rp) + : "a" (&v->counter), "m" (v->counter) + ); + return rp.pair; +} + +static inline void atomic64_set(atomic64_t *v, long long i) +{ + register_pair rp = {.pair = i}; + + asm volatile( + " stm %1,%N1,0(%2)" + : "=m" (v->counter) + : "d" (rp), "a" (&v->counter) + ); +} + +static inline long long atomic64_xchg(atomic64_t *v, long long new) +{ + register_pair rp_new = {.pair = new}; + register_pair rp_old; + + asm volatile( + " lm %0,%N0,0(%2)\n" + "0: cds %0,%3,0(%2)\n" + " jl 0b\n" + : "=&d" (rp_old), "+m" (v->counter) + : "a" (&v->counter), "d" (rp_new) + : "cc"); + return rp_old.pair; +} + +static inline long long atomic64_cmpxchg(atomic64_t *v, + long long old, long long new) +{ + register_pair rp_old = {.pair = old}; + register_pair rp_new = {.pair = new}; + + asm volatile( + " cds %0,%3,0(%2)" + : "+&d" (rp_old), "+m" (v->counter) + : "a" (&v->counter), "d" (rp_new) + : "cc"); + return rp_old.pair; +} + + +static inline long long atomic64_add_return(long long i, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old + i; + } while (atomic64_cmpxchg(v, old, new) != old); + return new; +} + +static inline long long atomic64_sub_return(long long i, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old - i; + } while (atomic64_cmpxchg(v, old, new) != old); + return new; +} + +static inline void atomic64_set_mask(unsigned long long mask, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old | mask; + } while (atomic64_cmpxchg(v, old, new) != old); +} + +static inline void atomic64_clear_mask(unsigned long long mask, atomic64_t *v) +{ + long long old, new; + + do { + old = atomic64_read(v); + new = old & mask; + } while (atomic64_cmpxchg(v, old, new) != old); +} + +#endif /* CONFIG_64BIT */ + +static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) { long long c, old; c = atomic64_read(v); @@ -265,15 +350,17 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, return c != u; } -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) - -#undef __CSG_LOOP - -#else /* __s390x__ */ - -#include <asm-generic/atomic64.h> - -#endif /* __s390x__ */ +#define atomic64_add(_i, _v) atomic64_add_return(_i, _v) +#define atomic64_add_negative(_i, _v) (atomic64_add_return(_i, _v) < 0) +#define atomic64_inc(_v) atomic64_add_return(1, _v) +#define atomic64_inc_return(_v) atomic64_add_return(1, _v) +#define atomic64_inc_and_test(_v) (atomic64_add_return(1, _v) == 0) +#define atomic64_sub(_i, _v) atomic64_sub_return(_i, _v) +#define atomic64_sub_and_test(_i, _v) (atomic64_sub_return(_i, _v) == 0) +#define atomic64_dec(_v) atomic64_sub_return(1, _v) +#define atomic64_dec_return(_v) atomic64_sub_return(1, _v) +#define atomic64_dec_and_test(_v) (atomic64_sub_return(1, _v) == 0) +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #define smp_mb__before_atomic_dec() smp_mb() #define smp_mb__after_atomic_dec() smp_mb() @@ -281,5 +368,5 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, #define smp_mb__after_atomic_inc() smp_mb() #include <asm-generic/atomic-long.h> -#endif /* __KERNEL__ */ + #endif /* __ARCH_S390_ATOMIC__ */ diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index d5a8e7c1477..6c00f6800a3 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -78,28 +78,11 @@ csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) */ static inline __sum16 csum_fold(__wsum sum) { -#ifndef __s390x__ - register_pair rp; + u32 csum = (__force u32) sum; - asm volatile( - " slr %N1,%N1\n" /* %0 = H L */ - " lr %1,%0\n" /* %0 = H L, %1 = H L 0 0 */ - " srdl %1,16\n" /* %0 = H L, %1 = 0 H L 0 */ - " alr %1,%N1\n" /* %0 = H L, %1 = L H L 0 */ - " alr %0,%1\n" /* %0 = H+L+C L+H */ - " srl %0,16\n" /* %0 = H+L+C */ - : "+&d" (sum), "=d" (rp) : : "cc"); -#else /* __s390x__ */ - asm volatile( - " sr 3,3\n" /* %0 = H*65536 + L */ - " lr 2,%0\n" /* %0 = H L, 2/3 = H L / 0 0 */ - " srdl 2,16\n" /* %0 = H L, 2/3 = 0 H / L 0 */ - " alr 2,3\n" /* %0 = H L, 2/3 = L H / L 0 */ - " alr %0,2\n" /* %0 = H+L+C L+H */ - " srl %0,16\n" /* %0 = H+L+C */ - : "+&d" (sum) : : "cc", "2", "3"); -#endif /* __s390x__ */ - return (__force __sum16) ~sum; + csum += (csum >> 16) + (csum << 16); + csum >>= 16; + return (__force __sum16) ~csum; } /* diff --git a/arch/s390/include/asm/chsc.h b/arch/s390/include/asm/chsc.h index 807997f7414..4943654ed7f 100644 --- a/arch/s390/include/asm/chsc.h +++ b/arch/s390/include/asm/chsc.h @@ -125,4 +125,32 @@ struct chsc_cpd_info { #define CHSC_INFO_CPD _IOWR(CHSC_IOCTL_MAGIC, 0x87, struct chsc_cpd_info) #define CHSC_INFO_DCAL _IOWR(CHSC_IOCTL_MAGIC, 0x88, struct chsc_dcal) +#ifdef __KERNEL__ + +struct css_general_char { + u64 : 12; + u32 dynio : 1; /* bit 12 */ + u32 : 28; + u32 aif : 1; /* bit 41 */ + u32 : 3; + u32 mcss : 1; /* bit 45 */ + u32 fcs : 1; /* bit 46 */ + u32 : 1; + u32 ext_mb : 1; /* bit 48 */ + u32 : 7; + u32 aif_tdd : 1; /* bit 56 */ + u32 : 1; + u32 qebsm : 1; /* bit 58 */ + u32 : 8; + u32 aif_osa : 1; /* bit 67 */ + u32 : 14; + u32 cib : 1; /* bit 82 */ + u32 : 5; + u32 fcx : 1; /* bit 88 */ + u32 : 7; +}__attribute__((packed)); + +extern struct css_general_char css_general_characteristics; + +#endif /* __KERNEL__ */ #endif diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 619bf94b11f..e85679af54d 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -15,228 +15,7 @@ #define LPM_ANYPATH 0xff #define __MAX_CSSID 0 -/** - * struct cmd_scsw - command-mode subchannel status word - * @key: subchannel key - * @sctl: suspend control - * @eswf: esw format - * @cc: deferred condition code - * @fmt: format - * @pfch: prefetch - * @isic: initial-status interruption control - * @alcc: address-limit checking control - * @ssi: suppress-suspended interruption - * @zcc: zero condition code - * @ectl: extended control - * @pno: path not operational - * @res: reserved - * @fctl: function control - * @actl: activity control - * @stctl: status control - * @cpa: channel program address - * @dstat: device status - * @cstat: subchannel status - * @count: residual count - */ -struct cmd_scsw { - __u32 key : 4; - __u32 sctl : 1; - __u32 eswf : 1; - __u32 cc : 2; - __u32 fmt : 1; - __u32 pfch : 1; - __u32 isic : 1; - __u32 alcc : 1; - __u32 ssi : 1; - __u32 zcc : 1; - __u32 ectl : 1; - __u32 pno : 1; - __u32 res : 1; - __u32 fctl : 3; - __u32 actl : 7; - __u32 stctl : 5; - __u32 cpa; - __u32 dstat : 8; - __u32 cstat : 8; - __u32 count : 16; -} __attribute__ ((packed)); - -/** - * struct tm_scsw - transport-mode subchannel status word - * @key: subchannel key - * @eswf: esw format - * @cc: deferred condition code - * @fmt: format - * @x: IRB-format control - * @q: interrogate-complete - * @ectl: extended control - * @pno: path not operational - * @fctl: function control - * @actl: activity control - * @stctl: status control - * @tcw: TCW address - * @dstat: device status - * @cstat: subchannel status - * @fcxs: FCX status - * @schxs: subchannel-extended status - */ -struct tm_scsw { - u32 key:4; - u32 :1; - u32 eswf:1; - u32 cc:2; - u32 fmt:3; - u32 x:1; - u32 q:1; - u32 :1; - u32 ectl:1; - u32 pno:1; - u32 :1; - u32 fctl:3; - u32 actl:7; - u32 stctl:5; - u32 tcw; - u32 dstat:8; - u32 cstat:8; - u32 fcxs:8; - u32 schxs:8; -} __attribute__ ((packed)); - -/** - * union scsw - subchannel status word - * @cmd: command-mode SCSW - * @tm: transport-mode SCSW - */ -union scsw { - struct cmd_scsw cmd; - struct tm_scsw tm; -} __attribute__ ((packed)); - -int scsw_is_tm(union scsw *scsw); -u32 scsw_key(union scsw *scsw); -u32 scsw_eswf(union scsw *scsw); -u32 scsw_cc(union scsw *scsw); -u32 scsw_ectl(union scsw *scsw); -u32 scsw_pno(union scsw *scsw); -u32 scsw_fctl(union scsw *scsw); -u32 scsw_actl(union scsw *scsw); -u32 scsw_stctl(union scsw *scsw); -u32 scsw_dstat(union scsw *scsw); -u32 scsw_cstat(union scsw *scsw); -int scsw_is_solicited(union scsw *scsw); -int scsw_is_valid_key(union scsw *scsw); -int scsw_is_valid_eswf(union scsw *scsw); -int scsw_is_valid_cc(union scsw *scsw); -int scsw_is_valid_ectl(union scsw *scsw); -int scsw_is_valid_pno(union scsw *scsw); -int scsw_is_valid_fctl(union scsw *scsw); -int scsw_is_valid_actl(union scsw *scsw); -int scsw_is_valid_stctl(union scsw *scsw); -int scsw_is_valid_dstat(union scsw *scsw); -int scsw_is_valid_cstat(union scsw *scsw); -int scsw_cmd_is_valid_key(union scsw *scsw); -int scsw_cmd_is_valid_sctl(union scsw *scsw); -int scsw_cmd_is_valid_eswf(union scsw *scsw); -int scsw_cmd_is_valid_cc(union scsw *scsw); -int scsw_cmd_is_valid_fmt(union scsw *scsw); -int scsw_cmd_is_valid_pfch(union scsw *scsw); -int scsw_cmd_is_valid_isic(union scsw *scsw); -int scsw_cmd_is_valid_alcc(union scsw *scsw); -int scsw_cmd_is_valid_ssi(union scsw *scsw); -int scsw_cmd_is_valid_zcc(union scsw *scsw); -int scsw_cmd_is_valid_ectl(union scsw *scsw); -int scsw_cmd_is_valid_pno(union scsw *scsw); -int scsw_cmd_is_valid_fctl(union scsw *scsw); -int scsw_cmd_is_valid_actl(union scsw *scsw); -int scsw_cmd_is_valid_stctl(union scsw *scsw); -int scsw_cmd_is_valid_dstat(union scsw *scsw); -int scsw_cmd_is_valid_cstat(union scsw *scsw); -int scsw_cmd_is_solicited(union scsw *scsw); -int scsw_tm_is_valid_key(union scsw *scsw); -int scsw_tm_is_valid_eswf(union scsw *scsw); -int scsw_tm_is_valid_cc(union scsw *scsw); -int scsw_tm_is_valid_fmt(union scsw *scsw); -int scsw_tm_is_valid_x(union scsw *scsw); -int scsw_tm_is_valid_q(union scsw *scsw); -int scsw_tm_is_valid_ectl(union scsw *scsw); -int scsw_tm_is_valid_pno(union scsw *scsw); -int scsw_tm_is_valid_fctl(union scsw *scsw); -int scsw_tm_is_valid_actl(union scsw *scsw); -int scsw_tm_is_valid_stctl(union scsw *scsw); -int scsw_tm_is_valid_dstat(union scsw *scsw); -int scsw_tm_is_valid_cstat(union scsw *scsw); -int scsw_tm_is_valid_fcxs(union scsw *scsw); -int scsw_tm_is_valid_schxs(union scsw *scsw); -int scsw_tm_is_solicited(union scsw *scsw); - -#define SCSW_FCTL_CLEAR_FUNC 0x1 -#define SCSW_FCTL_HALT_FUNC 0x2 -#define SCSW_FCTL_START_FUNC 0x4 - -#define SCSW_ACTL_SUSPENDED 0x1 -#define SCSW_ACTL_DEVACT 0x2 -#define SCSW_ACTL_SCHACT 0x4 -#define SCSW_ACTL_CLEAR_PEND 0x8 -#define SCSW_ACTL_HALT_PEND 0x10 -#define SCSW_ACTL_START_PEND 0x20 -#define SCSW_ACTL_RESUME_PEND 0x40 - -#define SCSW_STCTL_STATUS_PEND 0x1 -#define SCSW_STCTL_SEC_STATUS 0x2 -#define SCSW_STCTL_PRIM_STATUS 0x4 -#define SCSW_STCTL_INTER_STATUS 0x8 -#define SCSW_STCTL_ALERT_STATUS 0x10 - -#define DEV_STAT_ATTENTION 0x80 -#define DEV_STAT_STAT_MOD 0x40 -#define DEV_STAT_CU_END 0x20 -#define DEV_STAT_BUSY 0x10 -#define DEV_STAT_CHN_END 0x08 -#define DEV_STAT_DEV_END 0x04 -#define DEV_STAT_UNIT_CHECK 0x02 -#define DEV_STAT_UNIT_EXCEP 0x01 - -#define SCHN_STAT_PCI 0x80 -#define SCHN_STAT_INCORR_LEN 0x40 -#define SCHN_STAT_PROG_CHECK 0x20 -#define SCHN_STAT_PROT_CHECK 0x10 -#define SCHN_STAT_CHN_DATA_CHK 0x08 -#define SCHN_STAT_CHN_CTRL_CHK 0x04 -#define SCHN_STAT_INTF_CTRL_CHK 0x02 -#define SCHN_STAT_CHAIN_CHECK 0x01 - -/* - * architectured values for first sense byte - */ -#define SNS0_CMD_REJECT 0x80 -#define SNS_CMD_REJECT SNS0_CMD_REJEC -#define SNS0_INTERVENTION_REQ 0x40 -#define SNS0_BUS_OUT_CHECK 0x20 -#define SNS0_EQUIPMENT_CHECK 0x10 -#define SNS0_DATA_CHECK 0x08 -#define SNS0_OVERRUN 0x04 -#define SNS0_INCOMPL_DOMAIN 0x01 - -/* - * architectured values for second sense byte - */ -#define SNS1_PERM_ERR 0x80 -#define SNS1_INV_TRACK_FORMAT 0x40 -#define SNS1_EOC 0x20 -#define SNS1_MESSAGE_TO_OPER 0x10 -#define SNS1_NO_REC_FOUND 0x08 -#define SNS1_FILE_PROTECTED 0x04 -#define SNS1_WRITE_INHIBITED 0x02 -#define SNS1_INPRECISE_END 0x01 - -/* - * architectured values for third sense byte - */ -#define SNS2_REQ_INH_WRITE 0x80 -#define SNS2_CORRECTABLE 0x40 -#define SNS2_FIRST_LOG_ERR 0x20 -#define SNS2_ENV_DATA_PRESENT 0x10 -#define SNS2_INPRECISE_END 0x04 +#include <asm/scsw.h> /** * struct ccw1 - channel command word diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h new file mode 100644 index 00000000000..471234b9057 --- /dev/null +++ b/arch/s390/include/asm/cpu.h @@ -0,0 +1,26 @@ +/* + * Copyright IBM Corp. 2000,2009 + * Author(s): Hartmut Penner <hp@de.ibm.com>, + * Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Christian Ehrhardt <ehrhardt@de.ibm.com>, + */ + +#ifndef _ASM_S390_CPU_H +#define _ASM_S390_CPU_H + +#define MAX_CPU_ADDRESS 255 + +#ifndef __ASSEMBLY__ + +#include <linux/types.h> + +struct cpuid +{ + unsigned int version : 8; + unsigned int ident : 24; + unsigned int machine : 16; + unsigned int unused : 16; +} __packed; + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_S390_CPU_H */ diff --git a/arch/s390/include/asm/cpuid.h b/arch/s390/include/asm/cpuid.h deleted file mode 100644 index 07836a2e522..00000000000 --- a/arch/s390/include/asm/cpuid.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright IBM Corp. 2000,2009 - * Author(s): Hartmut Penner <hp@de.ibm.com>, - * Martin Schwidefsky <schwidefsky@de.ibm.com> - * Christian Ehrhardt <ehrhardt@de.ibm.com> - */ - -#ifndef _ASM_S390_CPUID_H_ -#define _ASM_S390_CPUID_H_ - -/* - * CPU type and hardware bug flags. Kept separately for each CPU. - * Members of this structure are referenced in head.S, so think twice - * before touching them. [mj] - */ - -typedef struct -{ - unsigned int version : 8; - unsigned int ident : 24; - unsigned int machine : 16; - unsigned int unused : 16; -} __attribute__ ((packed)) cpuid_t; - -#endif /* _ASM_S390_CPUID_H_ */ diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 31ed5686a96..18124b75a7a 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -167,6 +167,10 @@ debug_text_event(debug_info_t* id, int level, const char* txt) return debug_event_common(id,level,txt,strlen(txt)); } +/* + * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are + * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + */ extern debug_entry_t * debug_sprintf_event(debug_info_t* id,int level,char *string,...) __attribute__ ((format(printf, 3, 4))); @@ -206,7 +210,10 @@ debug_text_exception(debug_info_t* id, int level, const char* txt) return debug_exception_common(id,level,txt,strlen(txt)); } - +/* + * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are + * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! + */ extern debug_entry_t * debug_sprintf_exception(debug_info_t* id,int level,char *string,...) __attribute__ ((format(printf, 3, 4))); diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index 89ec7056da2..498bc389238 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -18,13 +18,6 @@ #include <linux/interrupt.h> #include <asm/lowcore.h> -/* irq_cpustat_t is unused currently, but could be converted - * into a percpu variable instead of storing softirq_pending - * on the lowcore */ -typedef struct { - unsigned int __softirq_pending; -} irq_cpustat_t; - #define local_softirq_pending() (S390_lowcore.softirq_pending) #define __ARCH_IRQ_STAT diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index 1171e6d144a..5e95d95450b 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -57,6 +57,8 @@ struct ipl_block_fcp { } __attribute__((packed)); #define DIAG308_VMPARM_SIZE 64 +#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \ + offsetof(struct ipl_block_fcp, scp_data))) struct ipl_block_ccw { u8 load_parm[8]; @@ -91,7 +93,8 @@ extern void do_halt(void); extern void do_poff(void); extern void ipl_save_parameters(void); extern void ipl_update_parameters(void); -extern void get_ipl_vmparm(char *); +extern size_t append_ipl_vmparm(char *, size_t); +extern size_t append_ipl_scpdata(char *, size_t); enum { IPL_DEVNO_VALID = 1, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 1cd02f6073a..698988f6940 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -17,7 +17,7 @@ #include <linux/interrupt.h> #include <linux/kvm_host.h> #include <asm/debug.h> -#include <asm/cpuid.h> +#include <asm/cpu.h> #define KVM_MAX_VCPUS 64 #define KVM_MEMORY_SLOTS 32 @@ -217,8 +217,8 @@ struct kvm_vcpu_arch { struct hrtimer ckc_timer; struct tasklet_struct tasklet; union { - cpuid_t cpu_id; - u64 stidp_data; + struct cpuid cpu_id; + u64 stidp_data; }; }; diff --git a/arch/s390/include/asm/kvm_virtio.h b/arch/s390/include/asm/kvm_virtio.h index 0503936f101..acdfdff2661 100644 --- a/arch/s390/include/asm/kvm_virtio.h +++ b/arch/s390/include/asm/kvm_virtio.h @@ -54,14 +54,4 @@ struct kvm_vqconfig { * This is pagesize for historical reasons. */ #define KVM_S390_VIRTIO_RING_ALIGN 4096 -#ifdef __KERNEL__ -/* early virtio console setup */ -#ifdef CONFIG_S390_GUEST -extern void s390_virtio_console_init(void); -#else -static inline void s390_virtio_console_init(void) -{ -} -#endif /* CONFIG_VIRTIO_CONSOLE */ -#endif /* __KERNEL__ */ #endif diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 5046ad6b7a6..6bc9426a6fb 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -132,7 +132,7 @@ #ifndef __ASSEMBLY__ -#include <asm/cpuid.h> +#include <asm/cpu.h> #include <asm/ptrace.h> #include <linux/types.h> @@ -275,7 +275,7 @@ struct _lowcore __u32 user_exec_asce; /* 0x02ac */ /* SMP info area */ - cpuid_t cpu_id; /* 0x02b0 */ + struct cpuid cpu_id; /* 0x02b0 */ __u32 cpu_nr; /* 0x02b8 */ __u32 softirq_pending; /* 0x02bc */ __u32 percpu_offset; /* 0x02c0 */ @@ -380,7 +380,7 @@ struct _lowcore __u64 user_exec_asce; /* 0x0318 */ /* SMP info area */ - cpuid_t cpu_id; /* 0x0320 */ + struct cpuid cpu_id; /* 0x0320 */ __u32 cpu_nr; /* 0x0328 */ __u32 softirq_pending; /* 0x032c */ __u64 percpu_offset; /* 0x0330 */ diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 3b59216e628..03be99919d6 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -2,6 +2,7 @@ #define __MMU_H typedef struct { + spinlock_t list_lock; struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 3e3594d01f8..5e9daf5d7f2 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -125,8 +125,6 @@ page_get_storage_key(unsigned long addr) return skey; } -#ifdef CONFIG_PAGE_STATES - struct page; void arch_free_page(struct page *page, int order); void arch_alloc_page(struct page *page, int order); @@ -134,8 +132,6 @@ void arch_alloc_page(struct page *page, int order); #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -#endif - #endif /* !__ASSEMBLY__ */ #define __PAGE_OFFSET 0x0UL diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index b2658b9220f..ddad5903341 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { + spin_lock_init(&mm->context.list_lock); INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); return (pgd_t *) crst_table_alloc(mm, s390_noexec); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c139fa7b8e8..cf8eed3fa77 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,7 +14,7 @@ #define __ASM_S390_PROCESSOR_H #include <linux/linkage.h> -#include <asm/cpuid.h> +#include <asm/cpu.h> #include <asm/page.h> #include <asm/ptrace.h> #include <asm/setup.h> @@ -26,7 +26,7 @@ */ #define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; }) -static inline void get_cpu_id(cpuid_t *ptr) +static inline void get_cpu_id(struct cpuid *ptr) { asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr)); } diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h index 29ec8e28c8d..35d786fe93a 100644 --- a/arch/s390/include/asm/scatterlist.h +++ b/arch/s390/include/asm/scatterlist.h @@ -1,19 +1 @@ -#ifndef _ASMS390_SCATTERLIST_H -#define _ASMS390_SCATTERLIST_H - -struct scatterlist { -#ifdef CONFIG_DEBUG_SG - unsigned long sg_magic; -#endif - unsigned long page_link; - unsigned int offset; - unsigned int length; -}; - -#ifdef __s390x__ -#define ISA_DMA_THRESHOLD (0xffffffffffffffffUL) -#else -#define ISA_DMA_THRESHOLD (0xffffffffUL) -#endif - -#endif /* _ASMS390X_SCATTERLIST_H */ +#include <asm-generic/scatterlist.h> diff --git a/arch/s390/include/asm/scsw.h b/arch/s390/include/asm/scsw.h new file mode 100644 index 00000000000..de389cb54d2 --- /dev/null +++ b/arch/s390/include/asm/scsw.h @@ -0,0 +1,956 @@ +/* + * Helper functions for scsw access. + * + * Copyright IBM Corp. 2008,2009 + * Author(s): Peter Oberparleiter <peter.oberparleiter@de.ibm.com> + */ + +#ifndef _ASM_S390_SCSW_H_ +#define _ASM_S390_SCSW_H_ + +#include <linux/types.h> +#include <asm/chsc.h> +#include <asm/cio.h> + +/** + * struct cmd_scsw - command-mode subchannel status word + * @key: subchannel key + * @sctl: suspend control + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @pfch: prefetch + * @isic: initial-status interruption control + * @alcc: address-limit checking control + * @ssi: suppress-suspended interruption + * @zcc: zero condition code + * @ectl: extended control + * @pno: path not operational + * @res: reserved + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @cpa: channel program address + * @dstat: device status + * @cstat: subchannel status + * @count: residual count + */ +struct cmd_scsw { + __u32 key : 4; + __u32 sctl : 1; + __u32 eswf : 1; + __u32 cc : 2; + __u32 fmt : 1; + __u32 pfch : 1; + __u32 isic : 1; + __u32 alcc : 1; + __u32 ssi : 1; + __u32 zcc : 1; + __u32 ectl : 1; + __u32 pno : 1; + __u32 res : 1; + __u32 fctl : 3; + __u32 actl : 7; + __u32 stctl : 5; + __u32 cpa; + __u32 dstat : 8; + __u32 cstat : 8; + __u32 count : 16; +} __attribute__ ((packed)); + +/** + * struct tm_scsw - transport-mode subchannel status word + * @key: subchannel key + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @x: IRB-format control + * @q: interrogate-complete + * @ectl: extended control + * @pno: path not operational + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @tcw: TCW address + * @dstat: device status + * @cstat: subchannel status + * @fcxs: FCX status + * @schxs: subchannel-extended status + */ +struct tm_scsw { + u32 key:4; + u32 :1; + u32 eswf:1; + u32 cc:2; + u32 fmt:3; + u32 x:1; + u32 q:1; + u32 :1; + u32 ectl:1; + u32 pno:1; + u32 :1; + u32 fctl:3; + u32 actl:7; + u32 stctl:5; + u32 tcw; + u32 dstat:8; + u32 cstat:8; + u32 fcxs:8; + u32 schxs:8; +} __attribute__ ((packed)); + +/** + * union scsw - subchannel status word + * @cmd: command-mode SCSW + * @tm: transport-mode SCSW + */ +union scsw { + struct cmd_scsw cmd; + struct tm_scsw tm; +} __attribute__ ((packed)); + +#define SCSW_FCTL_CLEAR_FUNC 0x1 +#define SCSW_FCTL_HALT_FUNC 0x2 +#define SCSW_FCTL_START_FUNC 0x4 + +#define SCSW_ACTL_SUSPENDED 0x1 +#define SCSW_ACTL_DEVACT 0x2 +#define SCSW_ACTL_SCHACT 0x4 +#define SCSW_ACTL_CLEAR_PEND 0x8 +#define SCSW_ACTL_HALT_PEND 0x10 +#define SCSW_ACTL_START_PEND 0x20 +#define SCSW_ACTL_RESUME_PEND 0x40 + +#define SCSW_STCTL_STATUS_PEND 0x1 +#define SCSW_STCTL_SEC_STATUS 0x2 +#define SCSW_STCTL_PRIM_STATUS 0x4 +#define SCSW_STCTL_INTER_STATUS 0x8 +#define SCSW_STCTL_ALERT_STATUS 0x10 + +#define DEV_STAT_ATTENTION 0x80 +#define DEV_STAT_STAT_MOD 0x40 +#define DEV_STAT_CU_END 0x20 +#define DEV_STAT_BUSY 0x10 +#define DEV_STAT_CHN_END 0x08 +#define DEV_STAT_DEV_END 0x04 +#define DEV_STAT_UNIT_CHECK 0x02 +#define DEV_STAT_UNIT_EXCEP 0x01 + +#define SCHN_STAT_PCI 0x80 +#define SCHN_STAT_INCORR_LEN 0x40 +#define SCHN_STAT_PROG_CHECK 0x20 +#define SCHN_STAT_PROT_CHECK 0x10 +#define SCHN_STAT_CHN_DATA_CHK 0x08 +#define SCHN_STAT_CHN_CTRL_CHK 0x04 +#define SCHN_STAT_INTF_CTRL_CHK 0x02 +#define SCHN_STAT_CHAIN_CHECK 0x01 + +/* + * architectured values for first sense byte + */ +#define SNS0_CMD_REJECT 0x80 +#define SNS_CMD_REJECT SNS0_CMD_REJEC +#define SNS0_INTERVENTION_REQ 0x40 +#define SNS0_BUS_OUT_CHECK 0x20 +#define SNS0_EQUIPMENT_CHECK 0x10 +#define SNS0_DATA_CHECK 0x08 +#define SNS0_OVERRUN 0x04 +#define SNS0_INCOMPL_DOMAIN 0x01 + +/* + * architectured values for second sense byte + */ +#define SNS1_PERM_ERR 0x80 +#define SNS1_INV_TRACK_FORMAT 0x40 +#define SNS1_EOC 0x20 +#define SNS1_MESSAGE_TO_OPER 0x10 +#define SNS1_NO_REC_FOUND 0x08 +#define SNS1_FILE_PROTECTED 0x04 +#define SNS1_WRITE_INHIBITED 0x02 +#define SNS1_INPRECISE_END 0x01 + +/* + * architectured values for third sense byte + */ +#define SNS2_REQ_INH_WRITE 0x80 +#define SNS2_CORRECTABLE 0x40 +#define SNS2_FIRST_LOG_ERR 0x20 +#define SNS2_ENV_DATA_PRESENT 0x10 +#define SNS2_INPRECISE_END 0x04 + +/** + * scsw_is_tm - check for transport mode scsw + * @scsw: pointer to scsw + * + * Return non-zero if the specified scsw is a transport mode scsw, zero + * otherwise. + */ +static inline int scsw_is_tm(union scsw *scsw) +{ + return css_general_characteristics.fcx && (scsw->tm.x == 1); +} + +/** + * scsw_key - return scsw key field + * @scsw: pointer to scsw + * + * Return the value of the key field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_key(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.key; + else + return scsw->cmd.key; +} + +/** + * scsw_eswf - return scsw eswf field + * @scsw: pointer to scsw + * + * Return the value of the eswf field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_eswf(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.eswf; + else + return scsw->cmd.eswf; +} + +/** + * scsw_cc - return scsw cc field + * @scsw: pointer to scsw + * + * Return the value of the cc field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_cc(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.cc; + else + return scsw->cmd.cc; +} + +/** + * scsw_ectl - return scsw ectl field + * @scsw: pointer to scsw + * + * Return the value of the ectl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_ectl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.ectl; + else + return scsw->cmd.ectl; +} + +/** + * scsw_pno - return scsw pno field + * @scsw: pointer to scsw + * + * Return the value of the pno field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_pno(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.pno; + else + return scsw->cmd.pno; +} + +/** + * scsw_fctl - return scsw fctl field + * @scsw: pointer to scsw + * + * Return the value of the fctl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_fctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.fctl; + else + return scsw->cmd.fctl; +} + +/** + * scsw_actl - return scsw actl field + * @scsw: pointer to scsw + * + * Return the value of the actl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_actl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.actl; + else + return scsw->cmd.actl; +} + +/** + * scsw_stctl - return scsw stctl field + * @scsw: pointer to scsw + * + * Return the value of the stctl field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_stctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.stctl; + else + return scsw->cmd.stctl; +} + +/** + * scsw_dstat - return scsw dstat field + * @scsw: pointer to scsw + * + * Return the value of the dstat field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_dstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.dstat; + else + return scsw->cmd.dstat; +} + +/** + * scsw_cstat - return scsw cstat field + * @scsw: pointer to scsw + * + * Return the value of the cstat field of the specified scsw, regardless of + * whether it is a transport mode or command mode scsw. + */ +static inline u32 scsw_cstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw->tm.cstat; + else + return scsw->cmd.cstat; +} + +/** + * scsw_cmd_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_key(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_sctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_sctl(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_eswf(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_cmd_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_cc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && + (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_cmd_is_valid_fmt - check fmt field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fmt field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_fmt(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_pfch - check pfch field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pfch field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_pfch(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_isic - check isic field validity + * @scsw: pointer to scsw + * + * Return non-zero if the isic field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_isic(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_alcc - check alcc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the alcc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_alcc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_ssi - check ssi field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ssi field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_ssi(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_cmd_is_valid_zcc - check zcc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the zcc field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_zcc(union scsw *scsw) +{ + return (scsw->cmd.fctl & SCSW_FCTL_START_FUNC) && + (scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS); +} + +/** + * scsw_cmd_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_ectl(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + !(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->cmd.stctl & SCSW_STCTL_ALERT_STATUS); +} + +/** + * scsw_cmd_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_pno(union scsw *scsw) +{ + return (scsw->cmd.fctl != 0) && + (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (!(scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) || + ((scsw->cmd.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->cmd.actl & SCSW_ACTL_SUSPENDED))); +} + +/** + * scsw_cmd_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_fctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_actl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_stctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_cmd_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_dstat(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->cmd.cc != 3); +} + +/** + * scsw_cmd_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified command mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_cmd_is_valid_cstat(union scsw *scsw) +{ + return (scsw->cmd.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->cmd.cc != 3); +} + +/** + * scsw_tm_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_key(union scsw *scsw) +{ + return (scsw->tm.fctl & SCSW_FCTL_START_FUNC); +} + +/** + * scsw_tm_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_eswf(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_tm_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_cc(union scsw *scsw) +{ + return (scsw->tm.fctl & SCSW_FCTL_START_FUNC) && + (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND); +} + +/** + * scsw_tm_is_valid_fmt - check fmt field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fmt field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fmt(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_x - check x field validity + * @scsw: pointer to scsw + * + * Return non-zero if the x field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_x(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_q - check q field validity + * @scsw: pointer to scsw + * + * Return non-zero if the q field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_q(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_ectl(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + !(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->tm.stctl & SCSW_STCTL_ALERT_STATUS); +} + +/** + * scsw_tm_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_pno(union scsw *scsw) +{ + return (scsw->tm.fctl != 0) && + (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (!(scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) || + ((scsw->tm.stctl & SCSW_STCTL_INTER_STATUS) && + (scsw->tm.actl & SCSW_ACTL_SUSPENDED))); +} + +/** + * scsw_tm_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_actl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_stctl(union scsw *scsw) +{ + /* Only valid if pmcw.dnv == 1*/ + return 1; +} + +/** + * scsw_tm_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_dstat(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->tm.cc != 3); +} + +/** + * scsw_tm_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_cstat(union scsw *scsw) +{ + return (scsw->tm.stctl & SCSW_STCTL_STATUS_PEND) && + (scsw->tm.cc != 3); +} + +/** + * scsw_tm_is_valid_fcxs - check fcxs field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fcxs field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_fcxs(union scsw *scsw) +{ + return 1; +} + +/** + * scsw_tm_is_valid_schxs - check schxs field validity + * @scsw: pointer to scsw + * + * Return non-zero if the schxs field of the specified transport mode scsw is + * valid, zero otherwise. + */ +static inline int scsw_tm_is_valid_schxs(union scsw *scsw) +{ + return (scsw->tm.cstat & (SCHN_STAT_PROG_CHECK | + SCHN_STAT_INTF_CTRL_CHK | + SCHN_STAT_PROT_CHECK | + SCHN_STAT_CHN_DATA_CHK)); +} + +/** + * scsw_is_valid_actl - check actl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the actl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_actl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_actl(scsw); + else + return scsw_cmd_is_valid_actl(scsw); +} + +/** + * scsw_is_valid_cc - check cc field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cc field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_cc(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_cc(scsw); + else + return scsw_cmd_is_valid_cc(scsw); +} + +/** + * scsw_is_valid_cstat - check cstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the cstat field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_cstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_cstat(scsw); + else + return scsw_cmd_is_valid_cstat(scsw); +} + +/** + * scsw_is_valid_dstat - check dstat field validity + * @scsw: pointer to scsw + * + * Return non-zero if the dstat field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_dstat(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_dstat(scsw); + else + return scsw_cmd_is_valid_dstat(scsw); +} + +/** + * scsw_is_valid_ectl - check ectl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the ectl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_ectl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_ectl(scsw); + else + return scsw_cmd_is_valid_ectl(scsw); +} + +/** + * scsw_is_valid_eswf - check eswf field validity + * @scsw: pointer to scsw + * + * Return non-zero if the eswf field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_eswf(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_eswf(scsw); + else + return scsw_cmd_is_valid_eswf(scsw); +} + +/** + * scsw_is_valid_fctl - check fctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the fctl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_fctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_fctl(scsw); + else + return scsw_cmd_is_valid_fctl(scsw); +} + +/** + * scsw_is_valid_key - check key field validity + * @scsw: pointer to scsw + * + * Return non-zero if the key field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_key(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_key(scsw); + else + return scsw_cmd_is_valid_key(scsw); +} + +/** + * scsw_is_valid_pno - check pno field validity + * @scsw: pointer to scsw + * + * Return non-zero if the pno field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_pno(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_pno(scsw); + else + return scsw_cmd_is_valid_pno(scsw); +} + +/** + * scsw_is_valid_stctl - check stctl field validity + * @scsw: pointer to scsw + * + * Return non-zero if the stctl field of the specified scsw is valid, + * regardless of whether it is a transport mode or command mode scsw. + * Return zero if the field does not contain a valid value. + */ +static inline int scsw_is_valid_stctl(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_valid_stctl(scsw); + else + return scsw_cmd_is_valid_stctl(scsw); +} + +/** + * scsw_cmd_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the command mode scsw indicates that the associated + * status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_cmd_is_solicited(union scsw *scsw) +{ + return (scsw->cmd.cc != 0) || (scsw->cmd.stctl != + (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); +} + +/** + * scsw_tm_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the transport mode scsw indicates that the associated + * status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_tm_is_solicited(union scsw *scsw) +{ + return (scsw->tm.cc != 0) || (scsw->tm.stctl != + (SCSW_STCTL_STATUS_PEND | SCSW_STCTL_ALERT_STATUS)); +} + +/** + * scsw_is_solicited - check for solicited scsw + * @scsw: pointer to scsw + * + * Return non-zero if the transport or command mode scsw indicates that the + * associated status condition is solicited, zero if it is unsolicited. + */ +static inline int scsw_is_solicited(union scsw *scsw) +{ + if (scsw_is_tm(scsw)) + return scsw_tm_is_solicited(scsw); + else + return scsw_cmd_is_solicited(scsw); +} + +#endif /* _ASM_S390_SCSW_H_ */ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 38b0fc221ed..e37478e8728 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -8,7 +8,7 @@ #ifndef _ASM_S390_SETUP_H #define _ASM_S390_SETUP_H -#define COMMAND_LINE_SIZE 1024 +#define COMMAND_LINE_SIZE 4096 #define ARCH_COMMAND_LINE_SIZE 896 diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index 72137bc907a..c991fe6473c 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -51,32 +51,7 @@ extern void machine_power_off_smp(void); #define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ #define raw_smp_processor_id() (S390_lowcore.cpu_nr) - -/* - * returns 1 if cpu is in stopped/check stopped state or not operational - * returns 0 otherwise - */ -static inline int -smp_cpu_not_running(int cpu) -{ - __u32 status; - - switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) { - case sigp_order_code_accepted: - case sigp_status_stored: - /* Check for stopped and check stop state */ - if (status & 0x50) - return 1; - break; - case sigp_not_operational: - return 1; - default: - break; - } - return 0; -} - -#define cpu_logical_map(cpu) (cpu) +#define cpu_logical_map(cpu) (cpu) extern int __cpu_disable (void); extern void __cpu_die (unsigned int cpu); @@ -91,11 +66,6 @@ extern void arch_send_call_function_ipi(cpumask_t mask); #endif -#ifndef CONFIG_SMP -#define hard_smp_processor_id() 0 -#define smp_cpu_not_running(cpu) 1 -#endif - #ifdef CONFIG_HOTPLUG_CPU extern int smp_rescan_cpus(void); #else diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index c9af0d19c7a..41ce6861174 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -191,4 +191,33 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() +#define __always_inline__spin_lock +#define __always_inline__read_lock +#define __always_inline__write_lock +#define __always_inline__spin_lock_bh +#define __always_inline__read_lock_bh +#define __always_inline__write_lock_bh +#define __always_inline__spin_lock_irq +#define __always_inline__read_lock_irq +#define __always_inline__write_lock_irq +#define __always_inline__spin_lock_irqsave +#define __always_inline__read_lock_irqsave +#define __always_inline__write_lock_irqsave +#define __always_inline__spin_trylock +#define __always_inline__read_trylock +#define __always_inline__write_trylock +#define __always_inline__spin_trylock_bh +#define __always_inline__spin_unlock +#define __always_inline__read_unlock +#define __always_inline__write_unlock +#define __always_inline__spin_unlock_bh +#define __always_inline__read_unlock_bh +#define __always_inline__write_unlock_bh +#define __always_inline__spin_unlock_irq +#define __always_inline__read_unlock_irq +#define __always_inline__write_unlock_irq +#define __always_inline__spin_unlock_irqrestore +#define __always_inline__read_unlock_irqrestore +#define __always_inline__write_unlock_irqrestore + #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h index 4fb83c1cdb7..379661d2f81 100644 --- a/arch/s390/include/asm/system.h +++ b/arch/s390/include/asm/system.h @@ -109,11 +109,7 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ -#ifdef CONFIG_PAGE_STATES extern void cmma_init(void); -#else -static inline void cmma_init(void) { } -#endif #define finish_arch_switch(prev) do { \ set_fs(current->thread.mm_segment); \ diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index cc21e3e20fd..24aa1cda20a 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -90,4 +90,18 @@ unsigned long long monotonic_clock(void); extern u64 sched_clock_base_cc; +/** + * get_clock_monotonic - returns current time in clock rate units + * + * The caller must ensure that preemption is disabled. + * The clock and sched_clock_base get changed via stop_machine. + * Therefore preemption must be disabled when calling this + * function, otherwise the returned value is not guaranteed to + * be monotonic. + */ +static inline unsigned long long get_clock_monotonic(void) +{ + return get_clock_xt() - sched_clock_base_cc; +} + #endif diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index c75ed43b1a1..c7be8e10b87 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -32,7 +32,7 @@ extra-y += head.o init_task.o vmlinux.lds obj-$(CONFIG_MODULES) += s390_ksyms.o module.o obj-$(CONFIG_SMP) += smp.o topology.o - +obj-$(CONFIG_HIBERNATION) += suspend.o swsusp_asm64.o obj-$(CONFIG_AUDIT) += audit.o compat-obj-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ @@ -41,7 +41,7 @@ obj-$(CONFIG_COMPAT) += compat_linux.o compat_signal.o \ obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_FUNCTION_TRACER) += mcount.o +obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o) obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cae14c49951..bf8b4ae7ff2 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -6,6 +6,9 @@ * Heiko Carstens <heiko.carstens@de.ibm.com> */ +#define KMSG_COMPONENT "setup" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + #include <linux/compiler.h> #include <linux/init.h> #include <linux/errno.h> @@ -16,6 +19,7 @@ #include <linux/module.h> #include <linux/pfn.h> #include <linux/uaccess.h> +#include <linux/kernel.h> #include <asm/ebcdic.h> #include <asm/ipl.h> #include <asm/lowcore.h> @@ -35,8 +39,6 @@ char kernel_nss_name[NSS_NAME_SIZE + 1]; -static unsigned long machine_flags; - static void __init setup_boot_command_line(void); /* @@ -81,6 +83,8 @@ asm( " br 14\n" " .size savesys_ipl_nss, .-savesys_ipl_nss\n"); +static __initdata char upper_command_line[COMMAND_LINE_SIZE]; + static noinline __init void create_kernel_nss(void) { unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size; @@ -90,7 +94,6 @@ static noinline __init void create_kernel_nss(void) int response; size_t len; char *savesys_ptr; - char upper_command_line[COMMAND_LINE_SIZE]; char defsys_cmd[DEFSYS_CMD_SIZE]; char savesys_cmd[SAVESYS_CMD_SIZE]; @@ -141,6 +144,8 @@ static noinline __init void create_kernel_nss(void) __cpcmd(defsys_cmd, NULL, 0, &response); if (response != 0) { + pr_err("Defining the Linux kernel NSS failed with rc=%d\n", + response); kernel_nss_name[0] = '\0'; return; } @@ -153,8 +158,11 @@ static noinline __init void create_kernel_nss(void) * max SAVESYS_CMD_SIZE * On error: response contains the numeric portion of cp error message. * for SAVESYS it will be >= 263 + * for missing privilege class, it will be 1 */ - if (response > SAVESYS_CMD_SIZE) { + if (response > SAVESYS_CMD_SIZE || response == 1) { + pr_err("Saving the Linux kernel NSS failed with rc=%d\n", + response); kernel_nss_name[0] = '\0'; return; } @@ -205,12 +213,9 @@ static noinline __init void detect_machine_type(void) /* Running under KVM? If not we assume z/VM */ if (!memcmp(vmms.vm[0].cpi, "\xd2\xe5\xd4", 3)) - machine_flags |= MACHINE_FLAG_KVM; + S390_lowcore.machine_flags |= MACHINE_FLAG_KVM; else - machine_flags |= MACHINE_FLAG_VM; - - /* Store machine flags for setting up lowcore early */ - S390_lowcore.machine_flags = machine_flags; + S390_lowcore.machine_flags |= MACHINE_FLAG_VM; } static __init void early_pgm_check_handler(void) @@ -245,7 +250,7 @@ static noinline __init void setup_hpage(void) facilities = stfl(); if (!(facilities & (1UL << 23)) || !(facilities & (1UL << 29))) return; - machine_flags |= MACHINE_FLAG_HPAGE; + S390_lowcore.machine_flags |= MACHINE_FLAG_HPAGE; __ctl_set_bit(0, 23); #endif } @@ -263,7 +268,7 @@ static __init void detect_mvpg(void) EX_TABLE(0b,1b) : "=d" (rc) : "0" (-EOPNOTSUPP), "a" (0) : "memory", "cc", "0"); if (!rc) - machine_flags |= MACHINE_FLAG_MVPG; + S390_lowcore.machine_flags |= MACHINE_FLAG_MVPG; #endif } @@ -279,7 +284,7 @@ static __init void detect_ieee(void) EX_TABLE(0b,1b) : "=d" (rc), "=d" (tmp): "0" (-EOPNOTSUPP) : "cc"); if (!rc) - machine_flags |= MACHINE_FLAG_IEEE; + S390_lowcore.machine_flags |= MACHINE_FLAG_IEEE; #endif } @@ -298,7 +303,7 @@ static __init void detect_csp(void) EX_TABLE(0b,1b) : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc", "0", "1", "2"); if (!rc) - machine_flags |= MACHINE_FLAG_CSP; + S390_lowcore.machine_flags |= MACHINE_FLAG_CSP; #endif } @@ -315,7 +320,7 @@ static __init void detect_diag9c(void) EX_TABLE(0b,1b) : "=d" (rc) : "0" (-EOPNOTSUPP), "d" (cpu_address) : "cc"); if (!rc) - machine_flags |= MACHINE_FLAG_DIAG9C; + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C; } static __init void detect_diag44(void) @@ -330,7 +335,7 @@ static __init void detect_diag44(void) EX_TABLE(0b,1b) : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc"); if (!rc) - machine_flags |= MACHINE_FLAG_DIAG44; + S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44; #endif } @@ -341,11 +346,11 @@ static __init void detect_machine_facilities(void) facilities = stfl(); if (facilities & (1 << 28)) - machine_flags |= MACHINE_FLAG_IDTE; + S390_lowcore.machine_flags |= MACHINE_FLAG_IDTE; if (facilities & (1 << 23)) - machine_flags |= MACHINE_FLAG_PFMF; + S390_lowcore.machine_flags |= MACHINE_FLAG_PFMF; if (facilities & (1 << 4)) - machine_flags |= MACHINE_FLAG_MVCOS; + S390_lowcore.machine_flags |= MACHINE_FLAG_MVCOS; #endif } @@ -367,21 +372,35 @@ static __init void rescue_initrd(void) } /* Set up boot command line */ -static void __init setup_boot_command_line(void) +static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) { - char *parm = NULL; + char *parm, *delim; + size_t rc, len; + + len = strlen(boot_command_line); + + delim = boot_command_line + len; /* '\0' character position */ + parm = boot_command_line + len + 1; /* append right after '\0' */ + rc = ipl_data(parm, COMMAND_LINE_SIZE - len - 1); + if (rc) { + if (*parm == '=') + memmove(boot_command_line, parm + 1, rc); + else + *delim = ' '; /* replace '\0' with space */ + } +} + +static void __init setup_boot_command_line(void) +{ /* copy arch command line */ strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE); /* append IPL PARM data to the boot command line */ - if (MACHINE_IS_VM) { - parm = boot_command_line + strlen(boot_command_line); - *parm++ = ' '; - get_ipl_vmparm(parm); - if (parm[0] == '=') - memmove(boot_command_line, parm + 1, strlen(parm)); - } + if (MACHINE_IS_VM) + append_to_cmdline(append_ipl_vmparm); + + append_to_cmdline(append_ipl_scpdata); } @@ -413,7 +432,6 @@ void __init startup_init(void) setup_hpage(); sclp_facilities_detect(); detect_memory_layout(memory_chunk); - S390_lowcore.machine_flags = machine_flags; #ifdef CONFIG_DYNAMIC_FTRACE S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; #endif diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index c4c80a22bc1..f78580a7403 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -278,7 +278,8 @@ sysc_return: bnz BASED(sysc_work) # there is work to do (signals etc.) sysc_restore: #ifdef CONFIG_TRACE_IRQFLAGS - la %r1,BASED(sysc_restore_trace_psw) + la %r1,BASED(sysc_restore_trace_psw_addr) + l %r1,0(%r1) lpsw 0(%r1) sysc_restore_trace: TRACE_IRQS_CHECK @@ -289,10 +290,15 @@ sysc_leave: sysc_done: #ifdef CONFIG_TRACE_IRQFLAGS +sysc_restore_trace_psw_addr: + .long sysc_restore_trace_psw + + .section .data,"aw",@progbits .align 8 .globl sysc_restore_trace_psw sysc_restore_trace_psw: .long 0, sysc_restore_trace + 0x80000000 + .previous #endif # @@ -606,7 +612,8 @@ io_return: bnz BASED(io_work) # there is work to do (signals etc.) io_restore: #ifdef CONFIG_TRACE_IRQFLAGS - la %r1,BASED(io_restore_trace_psw) + la %r1,BASED(io_restore_trace_psw_addr) + l %r1,0(%r1) lpsw 0(%r1) io_restore_trace: TRACE_IRQS_CHECK @@ -617,10 +624,15 @@ io_leave: io_done: #ifdef CONFIG_TRACE_IRQFLAGS +io_restore_trace_psw_addr: + .long io_restore_trace_psw + + .section .data,"aw",@progbits .align 8 .globl io_restore_trace_psw io_restore_trace_psw: .long 0, io_restore_trace + 0x80000000 + .previous #endif # diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index f6618e9e15e..009ca6175db 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -284,10 +284,12 @@ sysc_leave: sysc_done: #ifdef CONFIG_TRACE_IRQFLAGS + .section .data,"aw",@progbits .align 8 .globl sysc_restore_trace_psw sysc_restore_trace_psw: .quad 0, sysc_restore_trace + .previous #endif # @@ -595,10 +597,12 @@ io_leave: io_done: #ifdef CONFIG_TRACE_IRQFLAGS + .section .data,"aw",@progbits .align 8 .globl io_restore_trace_psw io_restore_trace_psw: .quad 0, io_restore_trace + .previous #endif # diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index ec688234852..c52b4f7742f 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -27,6 +27,7 @@ #include <asm/asm-offsets.h> #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/cpu.h> #ifdef CONFIG_64BIT #define ARCH_OFFSET 4 diff --git a/arch/s390/kernel/head31.S b/arch/s390/kernel/head31.S index 2ced846065b..602b508cd4c 100644 --- a/arch/s390/kernel/head31.S +++ b/arch/s390/kernel/head31.S @@ -24,6 +24,7 @@ startup_continue: # Setup stack # l %r15,.Linittu-.LPG1(%r13) + st %r15,__LC_THREAD_INFO # cache thread info in lowcore mvc __LC_CURRENT(4),__TI_task(%r15) ahi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union+THREAD_SIZE st %r15,__LC_KERNEL_STACK # set end of kernel stack diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 65667b2e65c..6a250808092 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -62,9 +62,9 @@ startup_continue: clr %r11,%r12 je 5f # no more space in prefix array 4: - ahi %r8,1 # next cpu (r8 += 1) - cl %r8,.Llast_cpu-.LPG1(%r13) # is last possible cpu ? - jl 1b # jump if not last cpu + ahi %r8,1 # next cpu (r8 += 1) + chi %r8,MAX_CPU_ADDRESS # is last possible cpu ? + jle 1b # jump if not last cpu 5: lhi %r1,2 # mode 2 = esame (dump) j 6f @@ -92,6 +92,7 @@ startup_continue: # Setup stack # larl %r15,init_thread_union + stg %r15,__LC_THREAD_INFO # cache thread info in lowcore lg %r14,__TI_task(%r15) # cache current in lowcore stg %r14,__LC_CURRENT aghi %r15,1<<(PAGE_SHIFT+THREAD_ORDER) # init_task_union + THREAD_SIZE @@ -129,8 +130,6 @@ startup_continue: #ifdef CONFIG_ZFCPDUMP .Lcurrent_cpu: .long 0x0 -.Llast_cpu: - .long 0x0000ffff .Lpref_arr_ptr: .long zfcpdump_prefix_array #endif /* CONFIG_ZFCPDUMP */ diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 371a2d88f4a..ee57a42e6e9 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -272,17 +272,18 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr, static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type); /* VM IPL PARM routines */ -static void reipl_get_ascii_vmparm(char *dest, +size_t reipl_get_ascii_vmparm(char *dest, size_t size, const struct ipl_parameter_block *ipb) { int i; - int len = 0; + size_t len; char has_lowercase = 0; + len = 0; if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) && (ipb->ipl_info.ccw.vm_parm_len > 0)) { - len = ipb->ipl_info.ccw.vm_parm_len; + len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len); memcpy(dest, ipb->ipl_info.ccw.vm_parm, len); /* If at least one character is lowercase, we assume mixed * case; otherwise we convert everything to lowercase. @@ -299,14 +300,20 @@ static void reipl_get_ascii_vmparm(char *dest, EBCASC(dest, len); } dest[len] = 0; + + return len; } -void get_ipl_vmparm(char *dest) +size_t append_ipl_vmparm(char *dest, size_t size) { + size_t rc; + + rc = 0; if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW)) - reipl_get_ascii_vmparm(dest, &ipl_block); + rc = reipl_get_ascii_vmparm(dest, size, &ipl_block); else dest[0] = 0; + return rc; } static ssize_t ipl_vm_parm_show(struct kobject *kobj, @@ -314,10 +321,65 @@ static ssize_t ipl_vm_parm_show(struct kobject *kobj, { char parm[DIAG308_VMPARM_SIZE + 1] = {}; - get_ipl_vmparm(parm); + append_ipl_vmparm(parm, sizeof(parm)); return sprintf(page, "%s\n", parm); } +static size_t scpdata_length(const char* buf, size_t count) +{ + while (count) { + if (buf[count - 1] != '\0' && buf[count - 1] != ' ') + break; + count--; + } + return count; +} + +size_t reipl_append_ascii_scpdata(char *dest, size_t size, + const struct ipl_parameter_block *ipb) +{ + size_t count; + size_t i; + int has_lowercase; + + count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data, + ipb->ipl_info.fcp.scp_data_len)); + if (!count) + goto out; + + has_lowercase = 0; + for (i = 0; i < count; i++) { + if (!isascii(ipb->ipl_info.fcp.scp_data[i])) { + count = 0; + goto out; + } + if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i])) + has_lowercase = 1; + } + + if (has_lowercase) + memcpy(dest, ipb->ipl_info.fcp.scp_data, count); + else + for (i = 0; i < count; i++) + dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]); +out: + dest[count] = '\0'; + return count; +} + +size_t append_ipl_scpdata(char *dest, size_t len) +{ + size_t rc; + + rc = 0; + if (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP) + rc = reipl_append_ascii_scpdata(dest, len, &ipl_block); + else + dest[0] = 0; + return rc; +} + + static struct kobj_attribute sys_ipl_vm_parm_attr = __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL); @@ -553,7 +615,7 @@ static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb, { char vmparm[DIAG308_VMPARM_SIZE + 1] = {}; - reipl_get_ascii_vmparm(vmparm, ipb); + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); return sprintf(page, "%s\n", vmparm); } @@ -626,6 +688,59 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr = /* FCP reipl device attributes */ +static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len; + void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data; + + return memory_read_from_buffer(buf, count, &off, scp_data, size); +} + +static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, size_t count) +{ + size_t padding; + size_t scpdata_len; + + if (off < 0) + return -EINVAL; + + if (off >= DIAG308_SCPDATA_SIZE) + return -ENOSPC; + + if (count > DIAG308_SCPDATA_SIZE - off) + count = DIAG308_SCPDATA_SIZE - off; + + memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf + off, count); + scpdata_len = off + count; + + if (scpdata_len % 8) { + padding = 8 - (scpdata_len % 8); + memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len, + 0, padding); + scpdata_len += padding; + } + + reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len; + reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len; + reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len; + + return count; +} + +static struct bin_attribute sys_reipl_fcp_scp_data_attr = { + .attr = { + .name = "scp_data", + .mode = S_IRUGO | S_IWUSR, + }, + .size = PAGE_SIZE, + .read = reipl_fcp_scpdata_read, + .write = reipl_fcp_scpdata_write, +}; + DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n", reipl_block_fcp->ipl_info.fcp.wwpn); DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%016llx\n", @@ -647,7 +762,6 @@ static struct attribute *reipl_fcp_attrs[] = { }; static struct attribute_group reipl_fcp_attr_group = { - .name = IPL_FCP_STR, .attrs = reipl_fcp_attrs, }; @@ -895,6 +1009,7 @@ static struct kobj_attribute reipl_type_attr = __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store); static struct kset *reipl_kset; +static struct kset *reipl_fcp_kset; static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, const enum ipl_method m) @@ -906,7 +1021,7 @@ static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb, reipl_get_ascii_loadparm(loadparm, ipb); reipl_get_ascii_nss_name(nss_name, ipb); - reipl_get_ascii_vmparm(vmparm, ipb); + reipl_get_ascii_vmparm(vmparm, sizeof(vmparm), ipb); switch (m) { case REIPL_METHOD_CCW_VM: @@ -1076,23 +1191,44 @@ static int __init reipl_fcp_init(void) int rc; if (!diag308_set_works) { - if (ipl_info.type == IPL_TYPE_FCP) + if (ipl_info.type == IPL_TYPE_FCP) { make_attrs_ro(reipl_fcp_attrs); - else + sys_reipl_fcp_scp_data_attr.attr.mode = S_IRUGO; + } else return 0; } reipl_block_fcp = (void *) get_zeroed_page(GFP_KERNEL); if (!reipl_block_fcp) return -ENOMEM; - rc = sysfs_create_group(&reipl_kset->kobj, &reipl_fcp_attr_group); + + /* sysfs: create fcp kset for mixing attr group and bin attrs */ + reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, + &reipl_kset->kobj); + if (!reipl_kset) { + free_page((unsigned long) reipl_block_fcp); + return -ENOMEM; + } + + rc = sysfs_create_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + if (rc) { + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); + return rc; + } + + rc = sysfs_create_bin_file(&reipl_fcp_kset->kobj, + &sys_reipl_fcp_scp_data_attr); if (rc) { - free_page((unsigned long)reipl_block_fcp); + sysfs_remove_group(&reipl_fcp_kset->kobj, &reipl_fcp_attr_group); + kset_unregister(reipl_fcp_kset); + free_page((unsigned long) reipl_block_fcp); return rc; } - if (ipl_info.type == IPL_TYPE_FCP) { + + if (ipl_info.type == IPL_TYPE_FCP) memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE); - } else { + else { reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN; reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION; reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 2a0a5e97ba8..dfe015d7398 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -11,111 +11,27 @@ ftrace_stub: br %r14 -#ifdef CONFIG_64BIT - -#ifdef CONFIG_DYNAMIC_FTRACE - .globl _mcount _mcount: - br %r14 - - .globl ftrace_caller -ftrace_caller: - larl %r1,function_trace_stop - icm %r1,0xf,0(%r1) - bnzr %r14 - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - lgr %r2,%r14 - lg %r3,168(%r15) - larl %r14,ftrace_dyn_func - lg %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl ftrace_graph_caller -ftrace_graph_caller: - # This unconditional branch gets runtime patched. Change only if - # you know what you are doing. See ftrace_enable_graph_caller(). - j 0f - lg %r2,272(%r15) - lg %r3,168(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,168(%r15) -0: -#endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) +#ifdef CONFIG_DYNAMIC_FTRACE br %r14 .data .globl ftrace_dyn_func ftrace_dyn_func: - .quad ftrace_stub + .long ftrace_stub .previous -#else /* CONFIG_DYNAMIC_FTRACE */ - - .globl _mcount -_mcount: - larl %r1,function_trace_stop - icm %r1,0xf,0(%r1) - bnzr %r14 - stmg %r2,%r5,32(%r15) - stg %r14,112(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - lgr %r2,%r14 - lg %r3,168(%r15) - larl %r14,ftrace_trace_function - lg %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - lg %r2,272(%r15) - lg %r3,168(%r15) - brasl %r14,prepare_ftrace_return - stg %r2,168(%r15) -#endif - aghi %r15,160 - lmg %r2,%r5,32(%r15) - lg %r14,112(%r15) - br %r14 - -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - - .globl return_to_handler -return_to_handler: - stmg %r2,%r5,32(%r15) - lgr %r1,%r15 - aghi %r15,-160 - stg %r1,__SF_BACKCHAIN(%r15) - brasl %r14,ftrace_return_to_handler - aghi %r15,160 - lgr %r14,%r2 - lmg %r2,%r5,32(%r15) - br %r14 - -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#else /* CONFIG_64BIT */ - -#ifdef CONFIG_DYNAMIC_FTRACE - - .globl _mcount -_mcount: - br %r14 - .globl ftrace_caller ftrace_caller: +#endif stm %r2,%r5,16(%r15) bras %r1,2f +#ifdef CONFIG_DYNAMIC_FTRACE +0: .long ftrace_dyn_func +#else 0: .long ftrace_trace_function +#endif 1: .long function_trace_stop 2: l %r2,1b-0b(%r1) icm %r2,0xf,0(%r2) @@ -131,53 +47,13 @@ ftrace_caller: l %r14,0(%r14) basr %r14,%r14 #ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE .globl ftrace_graph_caller ftrace_graph_caller: # This unconditional branch gets runtime patched. Change only if # you know what you are doing. See ftrace_enable_graph_caller(). j 1f - bras %r1,0f - .long prepare_ftrace_return -0: l %r2,152(%r15) - l %r4,0(%r1) - l %r3,100(%r15) - basr %r14,%r4 - st %r2,100(%r15) -1: #endif - ahi %r15,96 - l %r14,56(%r15) -3: lm %r2,%r5,16(%r15) - br %r14 - - .data - .globl ftrace_dyn_func -ftrace_dyn_func: - .long ftrace_stub - .previous - -#else /* CONFIG_DYNAMIC_FTRACE */ - - .globl _mcount -_mcount: - stm %r2,%r5,16(%r15) - bras %r1,2f -0: .long ftrace_trace_function -1: .long function_trace_stop -2: l %r2,1b-0b(%r1) - icm %r2,0xf,0(%r2) - jnz 3f - st %r14,56(%r15) - lr %r0,%r15 - ahi %r15,-96 - l %r3,100(%r15) - la %r2,0(%r14) - st %r0,__SF_BACKCHAIN(%r15) - la %r3,0(%r3) - l %r14,0b-0b(%r1) - l %r14,0(%r14) - basr %r14,%r14 -#ifdef CONFIG_FUNCTION_GRAPH_TRACER bras %r1,0f .long prepare_ftrace_return 0: l %r2,152(%r15) @@ -185,14 +61,13 @@ _mcount: l %r3,100(%r15) basr %r14,%r4 st %r2,100(%r15) +1: #endif ahi %r15,96 l %r14,56(%r15) 3: lm %r2,%r5,16(%r15) br %r14 -#endif /* CONFIG_DYNAMIC_FTRACE */ - #ifdef CONFIG_FUNCTION_GRAPH_TRACER .globl return_to_handler @@ -211,6 +86,4 @@ return_to_handler: lm %r2,%r5,16(%r15) br %r14 -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#endif /* CONFIG_64BIT */ +#endif diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S new file mode 100644 index 00000000000..c37211c6092 --- /dev/null +++ b/arch/s390/kernel/mcount64.S @@ -0,0 +1,78 @@ +/* + * Copyright IBM Corp. 2008,2009 + * + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + */ + +#include <asm/asm-offsets.h> + + .globl ftrace_stub +ftrace_stub: + br %r14 + + .globl _mcount +_mcount: +#ifdef CONFIG_DYNAMIC_FTRACE + br %r14 + + .data + .globl ftrace_dyn_func +ftrace_dyn_func: + .quad ftrace_stub + .previous + + .globl ftrace_caller +ftrace_caller: +#endif + larl %r1,function_trace_stop + icm %r1,0xf,0(%r1) + bnzr %r14 + stmg %r2,%r5,32(%r15) + stg %r14,112(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + lgr %r2,%r14 + lg %r3,168(%r15) +#ifdef CONFIG_DYNAMIC_FTRACE + larl %r14,ftrace_dyn_func +#else + larl %r14,ftrace_trace_function +#endif + lg %r14,0(%r14) + basr %r14,%r14 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE + .globl ftrace_graph_caller +ftrace_graph_caller: + # This unconditional branch gets runtime patched. Change only if + # you know what you are doing. See ftrace_enable_graph_caller(). + j 0f +#endif + lg %r2,272(%r15) + lg %r3,168(%r15) + brasl %r14,prepare_ftrace_return + stg %r2,168(%r15) +0: +#endif + aghi %r15,160 + lmg %r2,%r5,32(%r15) + lg %r14,112(%r15) + br %r14 + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + + .globl return_to_handler +return_to_handler: + stmg %r2,%r5,32(%r15) + lgr %r1,%r15 + aghi %r15,-160 + stg %r1,__SF_BACKCHAIN(%r15) + brasl %r14,ftrace_return_to_handler + aghi %r15,160 + lgr %r14,%r2 + lmg %r2,%r5,32(%r15) + br %r14 + +#endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index cbb897bc50b..9ed13a1ed37 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -156,15 +156,11 @@ __setup("condev=", condev_setup); static void __init set_preferred_console(void) { - if (MACHINE_IS_KVM) { + if (MACHINE_IS_KVM) add_preferred_console("hvc", 0, NULL); - s390_virtio_console_init(); - return; - } - - if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) + else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP) add_preferred_console("ttyS", 0, NULL); - if (CONSOLE_IS_3270) + else if (CONSOLE_IS_3270) add_preferred_console("tty3270", 0, NULL); } diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 062bd64e65f..6b4fef877f9 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -536,4 +536,6 @@ void do_notify_resume(struct pt_regs *regs) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index be2cae08340..56c16876b91 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -49,6 +49,7 @@ #include <asm/sclp.h> #include <asm/cputime.h> #include <asm/vdso.h> +#include <asm/cpu.h> #include "entry.h" static struct task_struct *current_set[NR_CPUS]; @@ -70,6 +71,23 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); static void smp_ext_bitcall(int, ec_bit_sig); +static int cpu_stopped(int cpu) +{ + __u32 status; + + switch (signal_processor_ps(&status, 0, cpu, sigp_sense)) { + case sigp_order_code_accepted: + case sigp_status_stored: + /* Check for stopped and check stop state */ + if (status & 0x50) + return 1; + break; + default: + break; + } + return 0; +} + void smp_send_stop(void) { int cpu, rc; @@ -86,7 +104,7 @@ void smp_send_stop(void) rc = signal_processor(cpu, sigp_stop); } while (rc == sigp_busy); - while (!smp_cpu_not_running(cpu)) + while (!cpu_stopped(cpu)) cpu_relax(); } } @@ -269,19 +287,6 @@ static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { } #endif /* CONFIG_ZFCPDUMP */ -static int cpu_stopped(int cpu) -{ - __u32 status; - - /* Check for stopped state */ - if (signal_processor_ps(&status, 0, cpu, sigp_sense) == - sigp_status_stored) { - if (status & 0x40) - return 1; - } - return 0; -} - static int cpu_known(int cpu_id) { int cpu; @@ -300,7 +305,7 @@ static int smp_rescan_cpus_sigp(cpumask_t avail) logical_cpu = cpumask_first(&avail); if (logical_cpu >= nr_cpu_ids) return 0; - for (cpu_id = 0; cpu_id <= 65535; cpu_id++) { + for (cpu_id = 0; cpu_id <= MAX_CPU_ADDRESS; cpu_id++) { if (cpu_known(cpu_id)) continue; __cpu_logical_map[logical_cpu] = cpu_id; @@ -379,7 +384,7 @@ static void __init smp_detect_cpus(void) /* Use sigp detection algorithm if sclp doesn't work. */ if (sclp_get_cpu_info(info)) { smp_use_sigp_detection = 1; - for (cpu = 0; cpu <= 65535; cpu++) { + for (cpu = 0; cpu <= MAX_CPU_ADDRESS; cpu++) { if (cpu == boot_cpu_addr) continue; __cpu_logical_map[CPU_INIT_NO] = cpu; @@ -635,7 +640,7 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { /* Wait until target cpu is down */ - while (!smp_cpu_not_running(cpu)) + while (!cpu_stopped(cpu)) cpu_relax(); smp_free_lowcore(cpu); pr_info("Processor %d stopped\n", cpu); diff --git a/arch/s390/power/swsusp.c b/arch/s390/kernel/suspend.c index bd1f5c6b0b8..086bee970ca 100644 --- a/arch/s390/power/swsusp.c +++ b/arch/s390/kernel/suspend.c @@ -1,13 +1,44 @@ /* - * Support for suspend and resume on s390 + * Suspend support specific for s390. * * Copyright IBM Corp. 2009 * * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> - * */ +#include <linux/suspend.h> +#include <linux/reboot.h> +#include <linux/pfn.h> +#include <linux/mm.h> +#include <asm/sections.h> #include <asm/system.h> +#include <asm/ipl.h> + +/* + * References to section boundaries + */ +extern const void __nosave_begin, __nosave_end; + +/* + * check if given pfn is in the 'nosave' or in the read only NSS section + */ +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; + unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) + >> PAGE_SHIFT; + unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1; + unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); + + if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) + return 1; + if (pfn >= stext_pfn && pfn <= eshared_pfn) { + if (ipl_info.type == IPL_TYPE_NSS) + return 1; + } else if ((tprot(pfn * PAGE_SIZE) && pfn > 0)) + return 1; + return 0; +} void save_processor_state(void) { diff --git a/arch/s390/power/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index b26df5c5933..7cd6b096f0d 100644 --- a/arch/s390/power/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -21,7 +21,7 @@ * This function runs with disabled interrupts. */ .section .text - .align 2 + .align 4 .globl swsusp_arch_suspend swsusp_arch_suspend: stmg %r6,%r15,__SF_GPRS(%r15) diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d4c8e9c47c8..54e327e9af0 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -60,6 +60,7 @@ #define TICK_SIZE tick u64 sched_clock_base_cc = -1; /* Force to data section. */ +EXPORT_SYMBOL_GPL(sched_clock_base_cc); static DEFINE_PER_CPU(struct clock_event_device, comparators); @@ -68,7 +69,7 @@ static DEFINE_PER_CPU(struct clock_event_device, comparators); */ unsigned long long notrace sched_clock(void) { - return ((get_clock_xt() - sched_clock_base_cc) * 125) >> 9; + return (get_clock_monotonic() * 125) >> 9; } /* diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a53db23ee09..7315f9e67e1 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -52,55 +52,18 @@ SECTIONS . = ALIGN(PAGE_SIZE); _eshared = .; /* End of shareable data */ - . = ALIGN(16); /* Exception table */ - __ex_table : { - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - } :data - - .data : { /* Data */ - DATA_DATA - CONSTRUCTORS - } - - . = ALIGN(PAGE_SIZE); - .data_nosave : { - __nosave_begin = .; - *(.data.nosave) - } - . = ALIGN(PAGE_SIZE); - __nosave_end = .; - - . = ALIGN(PAGE_SIZE); - .data.page_aligned : { - *(.data.idt) - } + EXCEPTION_TABLE(16) :data - . = ALIGN(0x100); - .data.cacheline_aligned : { - *(.data.cacheline_aligned) - } + RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE) - . = ALIGN(0x100); - .data.read_mostly : { - *(.data.read_mostly) - } _edata = .; /* End of data section */ - . = ALIGN(THREAD_SIZE); /* init_task */ - .data.init_task : { - *(.data.init_task) - } - /* will be freed after init */ . = ALIGN(PAGE_SIZE); /* Init code and data */ __init_begin = .; - .init.text : { - _sinittext = .; - INIT_TEXT - _einittext = .; - } + + INIT_TEXT_SECTION(PAGE_SIZE) + /* * .exit.text is discarded at runtime, not link time, * to deal with references from __bug_table @@ -111,49 +74,13 @@ SECTIONS /* early.c uses stsi, which requires page aligned data. */ . = ALIGN(PAGE_SIZE); - .init.data : { - INIT_DATA - } - . = ALIGN(0x100); - .init.setup : { - __setup_start = .; - *(.init.setup) - __setup_end = .; - } - .initcall.init : { - __initcall_start = .; - INITCALLS - __initcall_end = .; - } - - .con_initcall.init : { - __con_initcall_start = .; - *(.con_initcall.init) - __con_initcall_end = .; - } - SECURITY_INIT - -#ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(0x100); - .init.ramfs : { - __initramfs_start = .; - *(.init.ramfs) - . = ALIGN(2); - __initramfs_end = .; - } -#endif + INIT_DATA_SECTION(0x100) PERCPU(PAGE_SIZE) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ - /* BSS */ - .bss : { - __bss_start = .; - *(.bss) - . = ALIGN(2); - __bss_stop = .; - } + BSS_SECTION(0, 2, 0) _end = . ; diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index db05661ac89..eec05448441 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \ + page-states.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o -obj-$(CONFIG_PAGE_STATES) += page-states.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index e5e119fe03b..1abbadd497e 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -10,6 +10,7 @@ * Copyright (C) 1995 Linus Torvalds */ +#include <linux/perf_counter.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -305,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write) * interrupts again and then search the VMAs */ local_irq_enable(); - + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); down_read(&mm->mmap_sem); si_code = SEGV_MAPERR; @@ -363,11 +364,15 @@ good_area: } BUG(); } - if (fault & VM_FAULT_MAJOR) + if (fault & VM_FAULT_MAJOR) { tsk->maj_flt++; - else + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, + regs, address); + } else { tsk->min_flt++; - + perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, + regs, address); + } up_read(&mm->mmap_sem); /* * The instruction that caused the program check will diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c index fc0ad73ffd9..f92ec203ad9 100644 --- a/arch/s390/mm/page-states.c +++ b/arch/s390/mm/page-states.c @@ -1,6 +1,4 @@ /* - * arch/s390/mm/page-states.c - * * Copyright IBM Corp. 2008 * * Guest page hinting for unused pages. @@ -17,11 +15,12 @@ #define ESSA_SET_STABLE 1 #define ESSA_SET_UNUSED 2 -static int cmma_flag; +static int cmma_flag = 1; static int __init cmma(char *str) { char *parm; + parm = strstrip(str); if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) { cmma_flag = 1; @@ -32,7 +31,6 @@ static int __init cmma(char *str) return 1; return 0; } - __setup("cmma=", cmma); void __init cmma_init(void) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 56566720798..c7021524707 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) } page->index = page_to_phys(shadow); } - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); list_add(&page->lru, &mm->context.crst_list); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } @@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) unsigned long *shadow = get_shadow_table(table); struct page *page = virt_to_page(table); - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); list_del(&page->lru); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); if (shadow) free_pages((unsigned long) shadow, ALLOC_ORDER); free_pages((unsigned long) table, ALLOC_ORDER); @@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long bits; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, @@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page = NULL; } if (!page) { - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } table = (unsigned long *) page_to_phys(page); @@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page->flags |= bits; if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) list_move_tail(&page->lru, &mm->context.pgtable_list); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); return table; } @@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ @@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } else /* All fragments of the 4K page have been freed. */ list_del(&page->lru); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); if (page) { pgtable_page_dtor(page); __free_page(page); @@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) { struct page *page; - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); /* Free shadow region and segment tables. */ list_for_each_entry(page, &mm->context.crst_list, lru) if (page->index) { @@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) /* "Free" second halves of page tables. */ list_for_each_entry(page, &mm->context.pgtable_list, lru) page->flags &= ~SECOND_HALVES; - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); mm->context.noexec = 0; update_mm(mm, tsk); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e4868bfc672..5f91a38d759 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -331,6 +331,7 @@ void __init vmem_map_init(void) unsigned long start, end; int i; + spin_lock_init(&init_mm.context.list_lock); INIT_LIST_HEAD(&init_mm.context.crst_list); INIT_LIST_HEAD(&init_mm.context.pgtable_list); init_mm.context.noexec = 0; diff --git a/arch/s390/power/Makefile b/arch/s390/power/Makefile deleted file mode 100644 index 973bb45a8fe..00000000000 --- a/arch/s390/power/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for s390 PM support -# - -obj-$(CONFIG_HIBERNATION) += suspend.o -obj-$(CONFIG_HIBERNATION) += swsusp.o -obj-$(CONFIG_HIBERNATION) += swsusp_64.o -obj-$(CONFIG_HIBERNATION) += swsusp_asm64.o diff --git a/arch/s390/power/suspend.c b/arch/s390/power/suspend.c deleted file mode 100644 index b3351eceebb..00000000000 --- a/arch/s390/power/suspend.c +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Suspend support specific for s390. - * - * Copyright IBM Corp. 2009 - * - * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> - */ - -#include <linux/mm.h> -#include <linux/suspend.h> -#include <linux/reboot.h> -#include <linux/pfn.h> -#include <asm/sections.h> -#include <asm/ipl.h> - -/* - * References to section boundaries - */ -extern const void __nosave_begin, __nosave_end; - -/* - * check if given pfn is in the 'nosave' or in the read only NSS section - */ -int pfn_is_nosave(unsigned long pfn) -{ - unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; - unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) - >> PAGE_SHIFT; - unsigned long eshared_pfn = PFN_DOWN(__pa(&_eshared)) - 1; - unsigned long stext_pfn = PFN_DOWN(__pa(&_stext)); - - if (pfn >= nosave_begin_pfn && pfn < nosave_end_pfn) - return 1; - if (pfn >= stext_pfn && pfn <= eshared_pfn) { - if (ipl_info.type == IPL_TYPE_NSS) - return 1; - } else if ((tprot(pfn * PAGE_SIZE) && pfn > 0)) - return 1; - return 0; -} diff --git a/arch/s390/power/swsusp_64.c b/arch/s390/power/swsusp_64.c deleted file mode 100644 index 9516a517d72..00000000000 --- a/arch/s390/power/swsusp_64.c +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Support for suspend and resume on s390 - * - * Copyright IBM Corp. 2009 - * - * Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com> - * - */ - -#include <asm/system.h> -#include <linux/interrupt.h> - -void do_after_copyback(void) -{ - mb(); -} - diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index b5afbec1db5..04a21883f32 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -640,5 +640,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0, if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c index 0663a0ee602..9e5c9b1d7e9 100644 --- a/arch/sh/kernel/signal_64.c +++ b/arch/sh/kernel/signal_64.c @@ -772,5 +772,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 3f8b6a92eab..233cff53a62 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,6 +25,8 @@ config SPARC select ARCH_WANT_OPTIONAL_GPIOLIB select RTC_CLASS select RTC_DRV_M48T59 + select HAVE_DMA_ATTRS + select HAVE_DMA_API_DEBUG config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 204e4bf6443..5a8c308e2b5 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -3,6 +3,7 @@ #include <linux/scatterlist.h> #include <linux/mm.h> +#include <linux/dma-debug.h> #define DMA_ERROR_CODE (~(dma_addr_t)0x0) @@ -13,142 +14,40 @@ extern int dma_set_mask(struct device *dev, u64 dma_mask); #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) #define dma_is_consistent(d, h) (1) -struct dma_ops { - void *(*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - void (*free_coherent)(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle); - dma_addr_t (*map_page)(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction); - void (*unmap_page)(struct device *dev, dma_addr_t dma_addr, - size_t size, - enum dma_data_direction direction); - int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction direction); - void (*unmap_sg)(struct device *dev, struct scatterlist *sg, - int nhwentries, - enum dma_data_direction direction); - void (*sync_single_for_cpu)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction); - void (*sync_single_for_device)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction); - void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, - int nelems, - enum dma_data_direction direction); - void (*sync_sg_for_device)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); -}; -extern const struct dma_ops *dma_ops; +extern struct dma_map_ops *dma_ops, pci32_dma_ops; +extern struct bus_type pci_bus_type; -static inline void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ - return dma_ops->alloc_coherent(dev, size, dma_handle, flag); -} - -static inline void dma_free_coherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ - dma_ops->free_coherent(dev, size, cpu_addr, dma_handle); -} - -static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, - size_t size, - enum dma_data_direction direction) -{ - return dma_ops->map_page(dev, virt_to_page(cpu_addr), - (unsigned long)cpu_addr & ~PAGE_MASK, size, - direction); -} - -static inline void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, - size_t size, - enum dma_data_direction direction) -{ - dma_ops->unmap_page(dev, dma_addr, size, direction); -} - -static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - return dma_ops->map_page(dev, page, offset, size, direction); -} - -static inline void dma_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, - enum dma_data_direction direction) -{ - dma_ops->unmap_page(dev, dma_address, size, direction); -} - -static inline int dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ - return dma_ops->map_sg(dev, sg, nents, direction); -} - -static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) +static inline struct dma_map_ops *get_dma_ops(struct device *dev) { - dma_ops->unmap_sg(dev, sg, nents, direction); -} - -static inline void dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - dma_ops->sync_single_for_cpu(dev, dma_handle, size, direction); +#if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) + if (dev->bus == &pci_bus_type) + return &pci32_dma_ops; +#endif + return dma_ops; } -static inline void dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, - size_t size, - enum dma_data_direction direction) -{ - if (dma_ops->sync_single_for_device) - dma_ops->sync_single_for_device(dev, dma_handle, size, - direction); -} +#include <asm-generic/dma-mapping-common.h> -static inline void dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction direction) +static inline void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag) { - dma_ops->sync_sg_for_cpu(dev, sg, nelems, direction); -} + struct dma_map_ops *ops = get_dma_ops(dev); + void *cpu_addr; -static inline void dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - if (dma_ops->sync_sg_for_device) - dma_ops->sync_sg_for_device(dev, sg, nelems, direction); + cpu_addr = ops->alloc_coherent(dev, size, dma_handle, flag); + debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr); + return cpu_addr; } -static inline void dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - enum dma_data_direction dir) +static inline void dma_free_coherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_handle) { - dma_sync_single_for_cpu(dev, dma_handle+offset, size, dir); -} + struct dma_map_ops *ops = get_dma_ops(dev); -static inline void dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, - enum dma_data_direction dir) -{ - dma_sync_single_for_device(dev, dma_handle+offset, size, dir); + debug_dma_free_coherent(dev, size, cpu_addr, dma_handle); + ops->free_coherent(dev, size, cpu_addr, dma_handle); } - static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return (dma_addr == DMA_ERROR_CODE); diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 1934f2cbf51..a0b443cb3c1 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -89,8 +89,8 @@ static inline unsigned long get_softint(void) return retval; } -void __trigger_all_cpu_backtrace(void); -#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace extern void *hardirq_stack[NR_CPUS]; extern void *softirq_stack[NR_CPUS]; diff --git a/arch/sparc/include/asm/pci.h b/arch/sparc/include/asm/pci.h index 6e14fd17933..d9c031f9910 100644 --- a/arch/sparc/include/asm/pci.h +++ b/arch/sparc/include/asm/pci.h @@ -5,4 +5,7 @@ #else #include <asm/pci_32.h> #endif + +#include <asm-generic/pci-dma-compat.h> + #endif diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h index b41c4c19815..ac0e8369fd9 100644 --- a/arch/sparc/include/asm/pci_32.h +++ b/arch/sparc/include/asm/pci_32.h @@ -31,42 +31,8 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) */ #define PCI_DMA_BUS_IS_PHYS (0) -#include <asm/scatterlist.h> - struct pci_dev; -/* Allocate and map kernel buffer using consistent mode DMA for a device. - * hwdev should be valid struct pci_dev pointer for PCI devices. - */ -extern void *pci_alloc_consistent(struct pci_dev *hwdev, size_t size, dma_addr_t *dma_handle); - -/* Free and unmap a consistent DMA buffer. - * cpu_addr is what was returned from pci_alloc_consistent, - * size must be the same as what as passed into pci_alloc_consistent, - * and likewise dma_addr must be the same as what *dma_addrp was set to. - * - * References to the memory and mappings assosciated with cpu_addr/dma_addr - * past this call are illegal. - */ -extern void pci_free_consistent(struct pci_dev *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); - -/* Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single_for_cpu is performed. - */ -extern dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, int direction); - -/* Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t size, int direction); - /* pci_unmap_{single,page} is not a nop, thus... */ #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; @@ -81,69 +47,6 @@ extern void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t dma_addr, size_t #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -/* - * Same as above, only with pages instead of mapped addresses. - */ -extern dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction); -extern void pci_unmap_page(struct pci_dev *hwdev, - dma_addr_t dma_address, size_t size, int direction); - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scather-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -extern int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nents, int direction); - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -extern void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sg, int nhwents, int direction); - -/* Make physical memory consistent for a single - * streaming mode DMA translation after a transfer. - * - * If you perform a pci_map_single() but wish to interrogate the - * buffer using the cpu, yet do not wish to teardown the PCI dma - * mapping, you must call this function before doing so. At the - * next point you give the PCI dma address back to the card, you - * must first perform a pci_dma_sync_for_device, and then the device - * again owns the buffer. - */ -extern void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction); -extern void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction); - -/* Make physical memory consistent for a set of streaming - * mode DMA translations after a transfer. - * - * The same as pci_dma_sync_single_* but for a scatter-gather list, - * same rules and usage. - */ -extern void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction); -extern void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sg, int nelems, int direction); - -/* Return whether the given PCI device DMA address mask can - * be supported properly. For example, if your device can - * only drive the low 24-bits during PCI bus mastering, then - * you would pass 0x00ffffff as the mask to this function. - */ -static inline int pci_dma_supported(struct pci_dev *hwdev, u64 mask) -{ - return 1; -} - #ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, @@ -154,14 +57,6 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, } #endif -#define PCI_DMA_ERROR_CODE (~(dma_addr_t)0x0) - -static inline int pci_dma_mapping_error(struct pci_dev *pdev, - dma_addr_t dma_addr) -{ - return (dma_addr == PCI_DMA_ERROR_CODE); -} - struct device_node; extern struct device_node *pci_device_to_OF_node(struct pci_dev *pdev); diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index 7a1e3566e59..5cc9f6aa549 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -35,37 +35,6 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) */ #define PCI_DMA_BUS_IS_PHYS (0) -static inline void *pci_alloc_consistent(struct pci_dev *pdev, size_t size, - dma_addr_t *dma_handle) -{ - return dma_alloc_coherent(&pdev->dev, size, dma_handle, GFP_ATOMIC); -} - -static inline void pci_free_consistent(struct pci_dev *pdev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - return dma_free_coherent(&pdev->dev, size, vaddr, dma_handle); -} - -static inline dma_addr_t pci_map_single(struct pci_dev *pdev, void *ptr, - size_t size, int direction) -{ - return dma_map_single(&pdev->dev, ptr, size, - (enum dma_data_direction) direction); -} - -static inline void pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, - size_t size, int direction) -{ - dma_unmap_single(&pdev->dev, dma_addr, size, - (enum dma_data_direction) direction); -} - -#define pci_map_page(dev, page, off, size, dir) \ - pci_map_single(dev, (page_address(page) + (off)), size, dir) -#define pci_unmap_page(dev,addr,sz,dir) \ - pci_unmap_single(dev,addr,sz,dir) - /* pci_unmap_{single,page} is not a nop, thus... */ #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; @@ -80,57 +49,6 @@ static inline void pci_unmap_single(struct pci_dev *pdev, dma_addr_t dma_addr, #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -static inline int pci_map_sg(struct pci_dev *pdev, struct scatterlist *sg, - int nents, int direction) -{ - return dma_map_sg(&pdev->dev, sg, nents, - (enum dma_data_direction) direction); -} - -static inline void pci_unmap_sg(struct pci_dev *pdev, struct scatterlist *sg, - int nents, int direction) -{ - dma_unmap_sg(&pdev->dev, sg, nents, - (enum dma_data_direction) direction); -} - -static inline void pci_dma_sync_single_for_cpu(struct pci_dev *pdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - dma_sync_single_for_cpu(&pdev->dev, dma_handle, size, - (enum dma_data_direction) direction); -} - -static inline void pci_dma_sync_single_for_device(struct pci_dev *pdev, - dma_addr_t dma_handle, - size_t size, int direction) -{ - /* No flushing needed to sync cpu writes to the device. */ -} - -static inline void pci_dma_sync_sg_for_cpu(struct pci_dev *pdev, - struct scatterlist *sg, - int nents, int direction) -{ - dma_sync_sg_for_cpu(&pdev->dev, sg, nents, - (enum dma_data_direction) direction); -} - -static inline void pci_dma_sync_sg_for_device(struct pci_dev *pdev, - struct scatterlist *sg, - int nelems, int direction) -{ - /* No flushing needed to sync cpu writes to the device. */ -} - -/* Return whether the given PCI device DMA address mask can - * be supported properly. For example, if your device can - * only drive the low 24-bits during PCI bus mastering, then - * you would pass 0x00ffffff as the mask to this function. - */ -extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); - /* PCI IOMMU mapping bypass support. */ /* PCI 64-bit addressing works for all slots on all controller @@ -140,12 +58,6 @@ extern int pci_dma_supported(struct pci_dev *hwdev, u64 mask); #define PCI64_REQUIRED_MASK (~(dma64_addr_t)0) #define PCI64_ADDR_BASE 0xfffc000000000000UL -static inline int pci_dma_mapping_error(struct pci_dev *pdev, - dma_addr_t dma_addr) -{ - return dma_mapping_error(&pdev->dev, dma_addr); -} - #ifdef CONFIG_PCI static inline void pci_dma_burst_advice(struct pci_dev *pdev, enum pci_dma_burst_strategy *strat, diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 46f91ab66a5..857630cff63 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -76,7 +76,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) * * Unfortunately this scheme limits us to ~16,000,000 cpus. */ -static inline void __read_lock(raw_rwlock_t *rw) +static inline void arch_read_lock(raw_rwlock_t *rw) { register raw_rwlock_t *lp asm("g1"); lp = rw; @@ -92,11 +92,11 @@ static inline void __read_lock(raw_rwlock_t *rw) #define __raw_read_lock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - __read_lock(lock); \ + arch_read_lock(lock); \ local_irq_restore(flags); \ } while(0) -static inline void __read_unlock(raw_rwlock_t *rw) +static inline void arch_read_unlock(raw_rwlock_t *rw) { register raw_rwlock_t *lp asm("g1"); lp = rw; @@ -112,7 +112,7 @@ static inline void __read_unlock(raw_rwlock_t *rw) #define __raw_read_unlock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - __read_unlock(lock); \ + arch_read_unlock(lock); \ local_irq_restore(flags); \ } while(0) @@ -150,7 +150,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) return (val == 0); } -static inline int __read_trylock(raw_rwlock_t *rw) +static inline int arch_read_trylock(raw_rwlock_t *rw) { register raw_rwlock_t *lp asm("g1"); register int res asm("o0"); @@ -169,7 +169,7 @@ static inline int __read_trylock(raw_rwlock_t *rw) ({ unsigned long flags; \ int res; \ local_irq_save(flags); \ - res = __read_trylock(lock); \ + res = arch_read_trylock(lock); \ local_irq_restore(flags); \ res; \ }) diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index f6b2b92ad8d..43e51478358 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -92,7 +92,7 @@ static inline void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline __read_lock(raw_rwlock_t *lock) +static void inline arch_read_lock(raw_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -115,7 +115,7 @@ static void inline __read_lock(raw_rwlock_t *lock) : "memory"); } -static int inline __read_trylock(raw_rwlock_t *lock) +static int inline arch_read_trylock(raw_rwlock_t *lock) { int tmp1, tmp2; @@ -136,7 +136,7 @@ static int inline __read_trylock(raw_rwlock_t *lock) return tmp1; } -static void inline __read_unlock(raw_rwlock_t *lock) +static void inline arch_read_unlock(raw_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -152,7 +152,7 @@ static void inline __read_unlock(raw_rwlock_t *lock) : "memory"); } -static void inline __write_lock(raw_rwlock_t *lock) +static void inline arch_write_lock(raw_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -177,7 +177,7 @@ static void inline __write_lock(raw_rwlock_t *lock) : "memory"); } -static void inline __write_unlock(raw_rwlock_t *lock) +static void inline arch_write_unlock(raw_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -186,7 +186,7 @@ static void inline __write_unlock(raw_rwlock_t *lock) : "memory"); } -static int inline __write_trylock(raw_rwlock_t *lock) +static int inline arch_write_trylock(raw_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; @@ -210,14 +210,14 @@ static int inline __write_trylock(raw_rwlock_t *lock) return result; } -#define __raw_read_lock(p) __read_lock(p) -#define __raw_read_lock_flags(p, f) __read_lock(p) -#define __raw_read_trylock(p) __read_trylock(p) -#define __raw_read_unlock(p) __read_unlock(p) -#define __raw_write_lock(p) __write_lock(p) -#define __raw_write_lock_flags(p, f) __write_lock(p) -#define __raw_write_unlock(p) __write_unlock(p) -#define __raw_write_trylock(p) __write_trylock(p) +#define __raw_read_lock(p) arch_read_lock(p) +#define __raw_read_lock_flags(p, f) arch_read_lock(p) +#define __raw_read_trylock(p) arch_read_trylock(p) +#define __raw_read_unlock(p) arch_read_unlock(p) +#define __raw_write_lock(p) arch_write_lock(p) +#define __raw_write_lock_flags(p, f) arch_write_lock(p) +#define __raw_write_unlock(p) arch_write_unlock(p) +#define __raw_write_trylock(p) arch_write_trylock(p) #define __raw_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) #define __raw_write_can_lock(rw) (!(rw)->lock) diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 475ce4696ac..29b88a58066 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -61,7 +61,7 @@ obj-$(CONFIG_SPARC64_SMP) += cpumap.o obj-$(CONFIG_SPARC32) += devres.o devres-y := ../../../kernel/irq/devres.o -obj-$(CONFIG_SPARC32) += dma.o +obj-y += dma.o obj-$(CONFIG_SPARC32_PCI) += pcic.o diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c index 524c32f97c5..e1ba8ee21b9 100644 --- a/arch/sparc/kernel/dma.c +++ b/arch/sparc/kernel/dma.c @@ -1,178 +1,13 @@ -/* dma.c: PCI and SBUS DMA accessors for 32-bit sparc. - * - * Copyright (C) 2008 David S. Miller <davem@davemloft.net> - */ - #include <linux/kernel.h> #include <linux/module.h> #include <linux/dma-mapping.h> -#include <linux/scatterlist.h> -#include <linux/mm.h> - -#ifdef CONFIG_PCI -#include <linux/pci.h> -#endif +#include <linux/dma-debug.h> -#include "dma.h" +#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 15) -int dma_supported(struct device *dev, u64 mask) +static int __init dma_init(void) { -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - return pci_dma_supported(to_pci_dev(dev), mask); -#endif + dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); return 0; } -EXPORT_SYMBOL(dma_supported); - -int dma_set_mask(struct device *dev, u64 dma_mask) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - return pci_set_dma_mask(to_pci_dev(dev), dma_mask); -#endif - return -EOPNOTSUPP; -} -EXPORT_SYMBOL(dma_set_mask); - -static void *dma32_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - return pci_alloc_consistent(to_pci_dev(dev), size, dma_handle); -#endif - return sbus_alloc_consistent(dev, size, dma_handle); -} - -static void dma32_free_coherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_handle) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_free_consistent(to_pci_dev(dev), size, - cpu_addr, dma_handle); - return; - } -#endif - sbus_free_consistent(dev, size, cpu_addr, dma_handle); -} - -static dma_addr_t dma32_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - return pci_map_page(to_pci_dev(dev), page, offset, - size, (int)direction); -#endif - return sbus_map_single(dev, page_address(page) + offset, - size, (int)direction); -} - -static void dma32_unmap_page(struct device *dev, dma_addr_t dma_address, - size_t size, enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_unmap_page(to_pci_dev(dev), dma_address, - size, (int)direction); - return; - } -#endif - sbus_unmap_single(dev, dma_address, size, (int)direction); -} - -static int dma32_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) - return pci_map_sg(to_pci_dev(dev), sg, nents, (int)direction); -#endif - return sbus_map_sg(dev, sg, nents, direction); -} - -void dma32_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_unmap_sg(to_pci_dev(dev), sg, nents, (int)direction); - return; - } -#endif - sbus_unmap_sg(dev, sg, nents, (int)direction); -} - -static void dma32_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, - enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_dma_sync_single_for_cpu(to_pci_dev(dev), dma_handle, - size, (int)direction); - return; - } -#endif - sbus_dma_sync_single_for_cpu(dev, dma_handle, size, (int) direction); -} - -static void dma32_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_dma_sync_single_for_device(to_pci_dev(dev), dma_handle, - size, (int)direction); - return; - } -#endif - sbus_dma_sync_single_for_device(dev, dma_handle, size, (int) direction); -} - -static void dma32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_dma_sync_sg_for_cpu(to_pci_dev(dev), sg, - nelems, (int)direction); - return; - } -#endif - BUG(); -} - -static void dma32_sync_sg_for_device(struct device *dev, - struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ -#ifdef CONFIG_PCI - if (dev->bus == &pci_bus_type) { - pci_dma_sync_sg_for_device(to_pci_dev(dev), sg, - nelems, (int)direction); - return; - } -#endif - BUG(); -} - -static const struct dma_ops dma32_dma_ops = { - .alloc_coherent = dma32_alloc_coherent, - .free_coherent = dma32_free_coherent, - .map_page = dma32_map_page, - .unmap_page = dma32_unmap_page, - .map_sg = dma32_map_sg, - .unmap_sg = dma32_unmap_sg, - .sync_single_for_cpu = dma32_sync_single_for_cpu, - .sync_single_for_device = dma32_sync_single_for_device, - .sync_sg_for_cpu = dma32_sync_sg_for_cpu, - .sync_sg_for_device = dma32_sync_sg_for_device, -}; - -const struct dma_ops *dma_ops = &dma32_dma_ops; -EXPORT_SYMBOL(dma_ops); +fs_initcall(dma_init); diff --git a/arch/sparc/kernel/dma.h b/arch/sparc/kernel/dma.h deleted file mode 100644 index f8d8951adb5..00000000000 --- a/arch/sparc/kernel/dma.h +++ /dev/null @@ -1,14 +0,0 @@ -void *sbus_alloc_consistent(struct device *dev, long len, u32 *dma_addrp); -void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba); -dma_addr_t sbus_map_single(struct device *dev, void *va, - size_t len, int direction); -void sbus_unmap_single(struct device *dev, dma_addr_t ba, - size_t n, int direction); -int sbus_map_sg(struct device *dev, struct scatterlist *sg, - int n, int direction); -void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, - int n, int direction); -void sbus_dma_sync_single_for_cpu(struct device *dev, dma_addr_t ba, - size_t size, int direction); -void sbus_dma_sync_single_for_device(struct device *dev, dma_addr_t ba, - size_t size, int direction); diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 0aeaefe696b..7690cc219ec 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -353,7 +353,8 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, static dma_addr_t dma_4u_map_page(struct device *dev, struct page *page, unsigned long offset, size_t sz, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { struct iommu *iommu; struct strbuf *strbuf; @@ -474,7 +475,8 @@ do_flush_sync: } static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, - size_t sz, enum dma_data_direction direction) + size_t sz, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct iommu *iommu; struct strbuf *strbuf; @@ -520,7 +522,8 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, } static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *s, *outs, *segstart; unsigned long flags, handle, prot, ctx; @@ -691,7 +694,8 @@ static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) } static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { unsigned long flags, ctx; struct scatterlist *sg; @@ -822,7 +826,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, spin_unlock_irqrestore(&iommu->lock, flags); } -static const struct dma_ops sun4u_dma_ops = { +static struct dma_map_ops sun4u_dma_ops = { .alloc_coherent = dma_4u_alloc_coherent, .free_coherent = dma_4u_free_coherent, .map_page = dma_4u_map_page, @@ -833,9 +837,11 @@ static const struct dma_ops sun4u_dma_ops = { .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu, }; -const struct dma_ops *dma_ops = &sun4u_dma_ops; +struct dma_map_ops *dma_ops = &sun4u_dma_ops; EXPORT_SYMBOL(dma_ops); +extern int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask); + int dma_supported(struct device *dev, u64 device_mask) { struct iommu *iommu = dev->archdata.iommu; @@ -849,7 +855,7 @@ int dma_supported(struct device *dev, u64 device_mask) #ifdef CONFIG_PCI if (dev->bus == &pci_bus_type) - return pci_dma_supported(to_pci_dev(dev), device_mask); + return pci64_dma_supported(to_pci_dev(dev), device_mask); #endif return 0; diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 87ea0d03d97..edbea232c61 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -48,8 +48,6 @@ #include <asm/iommu.h> #include <asm/io-unit.h> -#include "dma.h" - #define mmu_inval_dma_area(p, l) /* Anton pulled it out for 2.4.0-xx */ static struct resource *_sparc_find_resource(struct resource *r, @@ -246,7 +244,8 @@ EXPORT_SYMBOL(sbus_set_sbus64); * Typically devices use them for control blocks. * CPU may access them without any explicit flushing. */ -void *sbus_alloc_consistent(struct device *dev, long len, u32 *dma_addrp) +static void *sbus_alloc_coherent(struct device *dev, size_t len, + dma_addr_t *dma_addrp, gfp_t gfp) { struct of_device *op = to_of_device(dev); unsigned long len_total = (len + PAGE_SIZE-1) & PAGE_MASK; @@ -299,7 +298,8 @@ err_nopages: return NULL; } -void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba) +static void sbus_free_coherent(struct device *dev, size_t n, void *p, + dma_addr_t ba) { struct resource *res; struct page *pgv; @@ -317,7 +317,7 @@ void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba) n = (n + PAGE_SIZE-1) & PAGE_MASK; if ((res->end-res->start)+1 != n) { - printk("sbus_free_consistent: region 0x%lx asked 0x%lx\n", + printk("sbus_free_consistent: region 0x%lx asked 0x%zx\n", (long)((res->end-res->start)+1), n); return; } @@ -337,8 +337,13 @@ void sbus_free_consistent(struct device *dev, long n, void *p, u32 ba) * CPU view of this memory may be inconsistent with * a device view and explicit flushing is necessary. */ -dma_addr_t sbus_map_single(struct device *dev, void *va, size_t len, int direction) +static dma_addr_t sbus_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t len, + enum dma_data_direction dir, + struct dma_attrs *attrs) { + void *va = page_address(page) + offset; + /* XXX why are some lengths signed, others unsigned? */ if (len <= 0) { return 0; @@ -350,12 +355,14 @@ dma_addr_t sbus_map_single(struct device *dev, void *va, size_t len, int directi return mmu_get_scsi_one(dev, va, len); } -void sbus_unmap_single(struct device *dev, dma_addr_t ba, size_t n, int direction) +static void sbus_unmap_page(struct device *dev, dma_addr_t ba, size_t n, + enum dma_data_direction dir, struct dma_attrs *attrs) { mmu_release_scsi_one(dev, ba, n); } -int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, int direction) +static int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, + enum dma_data_direction dir, struct dma_attrs *attrs) { mmu_get_scsi_sgl(dev, sg, n); @@ -366,19 +373,38 @@ int sbus_map_sg(struct device *dev, struct scatterlist *sg, int n, int direction return n; } -void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n, int direction) +static void sbus_unmap_sg(struct device *dev, struct scatterlist *sg, int n, + enum dma_data_direction dir, struct dma_attrs *attrs) { mmu_release_scsi_sgl(dev, sg, n); } -void sbus_dma_sync_single_for_cpu(struct device *dev, dma_addr_t ba, size_t size, int direction) +static void sbus_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, + int n, enum dma_data_direction dir) { + BUG(); } -void sbus_dma_sync_single_for_device(struct device *dev, dma_addr_t ba, size_t size, int direction) +static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg, + int n, enum dma_data_direction dir) { + BUG(); } +struct dma_map_ops sbus_dma_ops = { + .alloc_coherent = sbus_alloc_coherent, + .free_coherent = sbus_free_coherent, + .map_page = sbus_map_page, + .unmap_page = sbus_unmap_page, + .map_sg = sbus_map_sg, + .unmap_sg = sbus_unmap_sg, + .sync_sg_for_cpu = sbus_sync_sg_for_cpu, + .sync_sg_for_device = sbus_sync_sg_for_device, +}; + +struct dma_map_ops *dma_ops = &sbus_dma_ops; +EXPORT_SYMBOL(dma_ops); + static int __init sparc_register_ioport(void) { register_proc_sparc_ioport(); @@ -395,7 +421,8 @@ arch_initcall(sparc_register_ioport); /* Allocate and map kernel buffer using consistent mode DMA for a device. * hwdev should be valid struct pci_dev pointer for PCI devices. */ -void *pci_alloc_consistent(struct pci_dev *pdev, size_t len, dma_addr_t *pba) +static void *pci32_alloc_coherent(struct device *dev, size_t len, + dma_addr_t *pba, gfp_t gfp) { unsigned long len_total = (len + PAGE_SIZE-1) & PAGE_MASK; unsigned long va; @@ -439,7 +466,6 @@ void *pci_alloc_consistent(struct pci_dev *pdev, size_t len, dma_addr_t *pba) *pba = virt_to_phys(va); /* equals virt_to_bus (R.I.P.) for us. */ return (void *) res->start; } -EXPORT_SYMBOL(pci_alloc_consistent); /* Free and unmap a consistent DMA buffer. * cpu_addr is what was returned from pci_alloc_consistent, @@ -449,7 +475,8 @@ EXPORT_SYMBOL(pci_alloc_consistent); * References to the memory and mappings associated with cpu_addr/dma_addr * past this call are illegal. */ -void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba) +static void pci32_free_coherent(struct device *dev, size_t n, void *p, + dma_addr_t ba) { struct resource *res; unsigned long pgp; @@ -481,60 +508,18 @@ void pci_free_consistent(struct pci_dev *pdev, size_t n, void *p, dma_addr_t ba) free_pages(pgp, get_order(n)); } -EXPORT_SYMBOL(pci_free_consistent); - -/* Map a single buffer of the indicated size for DMA in streaming mode. - * The 32-bit bus address to use is returned. - * - * Once the device is given the dma address, the device owns this memory - * until either pci_unmap_single or pci_dma_sync_single_* is performed. - */ -dma_addr_t pci_map_single(struct pci_dev *hwdev, void *ptr, size_t size, - int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); - /* IIep is write-through, not flushing. */ - return virt_to_phys(ptr); -} -EXPORT_SYMBOL(pci_map_single); - -/* Unmap a single streaming mode DMA translation. The dma_addr and size - * must match what was provided for in a previous pci_map_single call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -void pci_unmap_single(struct pci_dev *hwdev, dma_addr_t ba, size_t size, - int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); - if (direction != PCI_DMA_TODEVICE) { - mmu_inval_dma_area((unsigned long)phys_to_virt(ba), - (size + PAGE_SIZE-1) & PAGE_MASK); - } -} -EXPORT_SYMBOL(pci_unmap_single); /* * Same as pci_map_single, but with pages. */ -dma_addr_t pci_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) +static dma_addr_t pci32_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, + struct dma_attrs *attrs) { - BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ return page_to_phys(page) + offset; } -EXPORT_SYMBOL(pci_map_page); - -void pci_unmap_page(struct pci_dev *hwdev, - dma_addr_t dma_address, size_t size, int direction) -{ - BUG_ON(direction == PCI_DMA_NONE); - /* mmu_inval_dma_area XXX */ -} -EXPORT_SYMBOL(pci_unmap_page); /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scather-gather version of the @@ -551,13 +536,13 @@ EXPORT_SYMBOL(pci_unmap_page); * Device ownership issues as mentioned above for pci_map_single are * the same here. */ -int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, - int direction) +static int pci32_map_sg(struct device *device, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct scatterlist *sg; int n; - BUG_ON(direction == PCI_DMA_NONE); /* IIep is write-through, not flushing. */ for_each_sg(sgl, sg, nents, n) { BUG_ON(page_address(sg_page(sg)) == NULL); @@ -566,20 +551,19 @@ int pci_map_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, } return nents; } -EXPORT_SYMBOL(pci_map_sg); /* Unmap a set of streaming mode DMA translations. * Again, cpu read rules concerning calls here are the same as for * pci_unmap_single() above. */ -void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, - int direction) +static void pci32_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, + struct dma_attrs *attrs) { struct scatterlist *sg; int n; - BUG_ON(direction == PCI_DMA_NONE); - if (direction != PCI_DMA_TODEVICE) { + if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { BUG_ON(page_address(sg_page(sg)) == NULL); mmu_inval_dma_area( @@ -588,7 +572,6 @@ void pci_unmap_sg(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, } } } -EXPORT_SYMBOL(pci_unmap_sg); /* Make physical memory consistent for a single * streaming mode DMA translation before or after a transfer. @@ -600,25 +583,23 @@ EXPORT_SYMBOL(pci_unmap_sg); * must first perform a pci_dma_sync_for_device, and then the * device again owns the buffer. */ -void pci_dma_sync_single_for_cpu(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) +static void pci32_sync_single_for_cpu(struct device *dev, dma_addr_t ba, + size_t size, enum dma_data_direction dir) { - BUG_ON(direction == PCI_DMA_NONE); - if (direction != PCI_DMA_TODEVICE) { + if (dir != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); } } -EXPORT_SYMBOL(pci_dma_sync_single_for_cpu); -void pci_dma_sync_single_for_device(struct pci_dev *hwdev, dma_addr_t ba, size_t size, int direction) +static void pci32_sync_single_for_device(struct device *dev, dma_addr_t ba, + size_t size, enum dma_data_direction dir) { - BUG_ON(direction == PCI_DMA_NONE); - if (direction != PCI_DMA_TODEVICE) { + if (dir != PCI_DMA_TODEVICE) { mmu_inval_dma_area((unsigned long)phys_to_virt(ba), (size + PAGE_SIZE-1) & PAGE_MASK); } } -EXPORT_SYMBOL(pci_dma_sync_single_for_device); /* Make physical memory consistent for a set of streaming * mode DMA translations after a transfer. @@ -626,13 +607,13 @@ EXPORT_SYMBOL(pci_dma_sync_single_for_device); * The same as pci_dma_sync_single_* but for a scatter-gather list, * same rules and usage. */ -void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction) +static void pci32_sync_sg_for_cpu(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) { struct scatterlist *sg; int n; - BUG_ON(direction == PCI_DMA_NONE); - if (direction != PCI_DMA_TODEVICE) { + if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { BUG_ON(page_address(sg_page(sg)) == NULL); mmu_inval_dma_area( @@ -641,15 +622,14 @@ void pci_dma_sync_sg_for_cpu(struct pci_dev *hwdev, struct scatterlist *sgl, int } } } -EXPORT_SYMBOL(pci_dma_sync_sg_for_cpu); -void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl, int nents, int direction) +static void pci32_sync_sg_for_device(struct device *device, struct scatterlist *sgl, + int nents, enum dma_data_direction dir) { struct scatterlist *sg; int n; - BUG_ON(direction == PCI_DMA_NONE); - if (direction != PCI_DMA_TODEVICE) { + if (dir != PCI_DMA_TODEVICE) { for_each_sg(sgl, sg, nents, n) { BUG_ON(page_address(sg_page(sg)) == NULL); mmu_inval_dma_area( @@ -658,9 +638,49 @@ void pci_dma_sync_sg_for_device(struct pci_dev *hwdev, struct scatterlist *sgl, } } } -EXPORT_SYMBOL(pci_dma_sync_sg_for_device); + +struct dma_map_ops pci32_dma_ops = { + .alloc_coherent = pci32_alloc_coherent, + .free_coherent = pci32_free_coherent, + .map_page = pci32_map_page, + .map_sg = pci32_map_sg, + .unmap_sg = pci32_unmap_sg, + .sync_single_for_cpu = pci32_sync_single_for_cpu, + .sync_single_for_device = pci32_sync_single_for_device, + .sync_sg_for_cpu = pci32_sync_sg_for_cpu, + .sync_sg_for_device = pci32_sync_sg_for_device, +}; +EXPORT_SYMBOL(pci32_dma_ops); + #endif /* CONFIG_PCI */ +/* + * Return whether the given PCI device DMA address mask can be + * supported properly. For example, if your device can only drive the + * low 24-bits during PCI bus mastering, then you would pass + * 0x00ffffff as the mask to this function. + */ +int dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PCI + if (dev->bus == &pci_bus_type) + return 1; +#endif + return 0; +} +EXPORT_SYMBOL(dma_supported); + +int dma_set_mask(struct device *dev, u64 dma_mask) +{ +#ifdef CONFIG_PCI + if (dev->bus == &pci_bus_type) + return pci_set_dma_mask(to_pci_dev(dev), dma_mask); +#endif + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(dma_set_mask); + + #ifdef CONFIG_PROC_FS static int diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index f0ee7905540..8daab33fc17 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -886,7 +886,7 @@ void notrace init_irqwork_curcpu(void) * Therefore you cannot make any OBP calls, not even prom_printf, * from these two routines. */ -static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask) +static void __cpuinit notrace register_one_mondo(unsigned long paddr, unsigned long type, unsigned long qmask) { unsigned long num_entries = (qmask + 1) / 64; unsigned long status; diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index 2c0cc72d295..b75bf502cd4 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -103,7 +103,7 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) } if (!touched && __get_cpu_var(last_irq_sum) == sum) { local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + if (local_read(&__get_cpu_var(alert_counter)) == 30 * nmi_hz) die_nmi("BUG: NMI Watchdog detected LOCKUP", regs, panic_on_timeout); } else { diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c index 57859ad2354..c6864866280 100644 --- a/arch/sparc/kernel/pci.c +++ b/arch/sparc/kernel/pci.c @@ -1039,7 +1039,7 @@ static void ali_sound_dma_hack(struct pci_dev *pdev, int set_bit) pci_dev_put(ali_isa_bridge); } -int pci_dma_supported(struct pci_dev *pdev, u64 device_mask) +int pci64_dma_supported(struct pci_dev *pdev, u64 device_mask) { u64 dma_addr_mask; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 2485eaa2310..23c33ff9c31 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -232,7 +232,8 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, unsigned long offset, size_t sz, - enum dma_data_direction direction) + enum dma_data_direction direction, + struct dma_attrs *attrs) { struct iommu *iommu; unsigned long flags, npages, oaddr; @@ -296,7 +297,8 @@ iommu_map_fail: } static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, - size_t sz, enum dma_data_direction direction) + size_t sz, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct pci_pbm_info *pbm; struct iommu *iommu; @@ -336,7 +338,8 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct scatterlist *s, *outs, *segstart; unsigned long flags, handle, prot; @@ -478,7 +481,8 @@ iommu_map_failed: } static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction) + int nelems, enum dma_data_direction direction, + struct dma_attrs *attrs) { struct pci_pbm_info *pbm; struct scatterlist *sg; @@ -521,29 +525,13 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, spin_unlock_irqrestore(&iommu->lock, flags); } -static void dma_4v_sync_single_for_cpu(struct device *dev, - dma_addr_t bus_addr, size_t sz, - enum dma_data_direction direction) -{ - /* Nothing to do... */ -} - -static void dma_4v_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction direction) -{ - /* Nothing to do... */ -} - -static const struct dma_ops sun4v_dma_ops = { +static struct dma_map_ops sun4v_dma_ops = { .alloc_coherent = dma_4v_alloc_coherent, .free_coherent = dma_4v_free_coherent, .map_page = dma_4v_map_page, .unmap_page = dma_4v_unmap_page, .map_sg = dma_4v_map_sg, .unmap_sg = dma_4v_unmap_sg, - .sync_single_for_cpu = dma_4v_sync_single_for_cpu, - .sync_sg_for_cpu = dma_4v_sync_sg_for_cpu, }; static void __devinit pci_sun4v_scan_bus(struct pci_pbm_info *pbm, diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 4041f94e772..18d67854a1b 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -251,7 +251,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp) } } -void __trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(void) { struct thread_info *tp = current_thread_info(); struct pt_regs *regs = get_irq_regs(); @@ -304,7 +304,7 @@ void __trigger_all_cpu_backtrace(void) static void sysrq_handle_globreg(int key, struct tty_struct *tty) { - __trigger_all_cpu_backtrace(); + arch_trigger_all_cpu_backtrace(); } static struct sysrq_key_op sparc_globalreg_op = { diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 181d069a2d4..7ce1a1005b1 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -590,6 +590,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index ec82d76dc6f..647afbda7ae 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -613,5 +613,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } } + diff --git a/arch/sparc/prom/misc_64.c b/arch/sparc/prom/misc_64.c index eedffb4fec2..39fc6af21b7 100644 --- a/arch/sparc/prom/misc_64.c +++ b/arch/sparc/prom/misc_64.c @@ -88,7 +88,7 @@ void prom_cmdline(void) /* Drop into the prom, but completely terminate the program. * No chance of continuing. */ -void prom_halt(void) +void notrace prom_halt(void) { #ifdef CONFIG_SUN_LDOMS if (ldom_domaining_enabled) diff --git a/arch/sparc/prom/printf.c b/arch/sparc/prom/printf.c index 660943ee4c2..ca869266b9f 100644 --- a/arch/sparc/prom/printf.c +++ b/arch/sparc/prom/printf.c @@ -14,14 +14,14 @@ */ #include <linux/kernel.h> +#include <linux/compiler.h> #include <asm/openprom.h> #include <asm/oplib.h> static char ppbuf[1024]; -void -prom_write(const char *buf, unsigned int n) +void notrace prom_write(const char *buf, unsigned int n) { char ch; @@ -33,8 +33,7 @@ prom_write(const char *buf, unsigned int n) } } -void -prom_printf(const char *fmt, ...) +void notrace prom_printf(const char *fmt, ...) { va_list args; int i; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 13ffa5df37d..1d9c18aa17e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -586,7 +586,6 @@ config GART_IOMMU bool "GART IOMMU support" if EMBEDDED default y select SWIOTLB - select AGP depends on X86_64 && PCI ---help--- Support for full DMA access of devices with 32bit memory access only diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index c580c5ec1ca..d3ec8d588d4 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -636,7 +636,7 @@ static int __init aesni_init(void) int err; if (!cpu_has_aes) { - printk(KERN_ERR "Intel AES-NI instructions are not detected.\n"); + printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); return -ENODEV; } if ((err = crypto_register_alg(&aesni_alg))) diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f0..ac95995b7ba 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -25,6 +25,7 @@ #ifdef CONFIG_AMD_IOMMU extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 0c878caaa0a..2a2cc7a78a8 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -143,22 +143,29 @@ #define EVT_BUFFER_SIZE 8192 /* 512 entries */ #define EVT_LEN_MASK (0x9ULL << 56) +#define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 #define PAGE_MODE_3_LEVEL 0x03 - -#define IOMMU_PDE_NL_0 0x000ULL -#define IOMMU_PDE_NL_1 0x200ULL -#define IOMMU_PDE_NL_2 0x400ULL -#define IOMMU_PDE_NL_3 0x600ULL - -#define IOMMU_PTE_L2_INDEX(address) (((address) >> 30) & 0x1ffULL) -#define IOMMU_PTE_L1_INDEX(address) (((address) >> 21) & 0x1ffULL) -#define IOMMU_PTE_L0_INDEX(address) (((address) >> 12) & 0x1ffULL) - -#define IOMMU_MAP_SIZE_L1 (1ULL << 21) -#define IOMMU_MAP_SIZE_L2 (1ULL << 30) -#define IOMMU_MAP_SIZE_L3 (1ULL << 39) +#define PAGE_MODE_4_LEVEL 0x04 +#define PAGE_MODE_5_LEVEL 0x05 +#define PAGE_MODE_6_LEVEL 0x06 + +#define PM_LEVEL_SHIFT(x) (12 + ((x) * 9)) +#define PM_LEVEL_SIZE(x) (((x) < 6) ? \ + ((1ULL << PM_LEVEL_SHIFT((x))) - 1): \ + (0xffffffffffffffffULL)) +#define PM_LEVEL_INDEX(x, a) (((a) >> PM_LEVEL_SHIFT((x))) & 0x1ffULL) +#define PM_LEVEL_ENC(x) (((x) << 9) & 0xe00ULL) +#define PM_LEVEL_PDE(x, a) ((a) | PM_LEVEL_ENC((x)) | \ + IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) +#define PM_PTE_LEVEL(pte) (((pte) >> 9) & 0x7ULL) + +#define PM_MAP_4k 0 +#define PM_ADDR_MASK 0x000ffffffffff000ULL +#define PM_MAP_MASK(lvl) (PM_ADDR_MASK & \ + (~((1ULL << (12 + ((lvl) * 9))) - 1))) +#define PM_ALIGNED(lvl, addr) ((PM_MAP_MASK(lvl) & (addr)) == (addr)) #define IOMMU_PTE_P (1ULL << 0) #define IOMMU_PTE_TV (1ULL << 1) @@ -167,11 +174,6 @@ #define IOMMU_PTE_IR (1ULL << 61) #define IOMMU_PTE_IW (1ULL << 62) -#define IOMMU_L1_PDE(address) \ - ((address) | IOMMU_PDE_NL_1 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) -#define IOMMU_L2_PDE(address) \ - ((address) | IOMMU_PDE_NL_2 | IOMMU_PTE_P | IOMMU_PTE_IR | IOMMU_PTE_IW) - #define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL) #define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_P) #define IOMMU_PTE_PAGE(pte) (phys_to_virt((pte) & IOMMU_PAGE_MASK)) @@ -194,11 +196,14 @@ #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page + translation */ + extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ do { \ if (amd_iommu_dump) \ - printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ + printk(KERN_INFO "AMD-Vi: " format, ## arg); \ } while(0); /* @@ -226,6 +231,7 @@ struct protection_domain { int mode; /* paging mode (0-6 levels) */ u64 *pt_root; /* page table root pointer */ unsigned long flags; /* flags to find out type of domain */ + bool updated; /* complete domain flush required */ unsigned dev_cnt; /* devices assigned to this domain */ void *priv; /* private data */ }; @@ -337,6 +343,9 @@ struct amd_iommu { /* if one, we need to send a completion wait command */ bool need_sync; + /* becomes true if a command buffer reset is running */ + bool reset_in_progress; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; @@ -457,4 +466,7 @@ static inline void amd_iommu_stats_init(void) { } #endif /* CONFIG_AMD_IOMMU_STATS */ +/* some function prototypes */ +extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); + #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 1c3f9435f1c..0ee770d23d0 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -55,6 +55,24 @@ extern int dma_set_mask(struct device *dev, u64 mask); extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t flag); +static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) +{ + if (!dev->dma_mask) + return 0; + + return addr + size <= *dev->dma_mask; +} + +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return paddr; +} + +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) +{ + return daddr; +} + static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction dir) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c86e5ed4af5..e63cf7d441e 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; -void __trigger_all_cpu_backtrace(void); -#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace() +void arch_trigger_all_cpu_backtrace(void); +#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace static inline void localise_nmi_watchdog(void) { diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index fa64e401589..e7b7c938ae2 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h @@ -84,6 +84,16 @@ union cpuid10_edx { #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) +/* + * We model BTS tracing as another fixed-mode PMC. + * + * We choose a value in the middle of the fixed counter range, since lower + * values are used by actual fixed counters and higher values are used + * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr. + */ +#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) + + #ifdef CONFIG_PERF_COUNTERS extern void init_hw_perf_counters(void); extern void perf_counters_lapic_init(void); diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f503780..98f230f6a28 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); static LIST_HEAD(iommu_pd_list); static DEFINE_SPINLOCK(iommu_pd_list_lock); -#ifdef CONFIG_IOMMU_API +/* + * Domain for untranslated devices - only allocated + * if iommu=pt passed on kernel cmd line. + */ +static struct protection_domain *pt_domain; + static struct iommu_ops amd_iommu_ops; -#endif /* * general struct to manage commands send to an IOMMU @@ -55,16 +59,16 @@ struct iommu_cmd { static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, struct unity_map_entry *e); static struct dma_ops_domain *find_protection_domain(u16 devid); -static u64* alloc_pte(struct protection_domain *dom, - unsigned long address, u64 - **pte_page, gfp_t gfp); +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, int end_lvl, + u64 **pte_page, gfp_t gfp); static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, unsigned long start_page, unsigned int pages); - -#ifndef BUS_NOTIFY_UNBOUND_DRIVER -#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 -#endif +static void reset_iommu_command_buffer(struct amd_iommu *iommu); +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, int map_size); +static void update_domain(struct protection_domain *domain); #ifdef CONFIG_AMD_IOMMU_STATS @@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu) * ****************************************************************************/ -static void iommu_print_event(void *__evt) +static void dump_dte_entry(u16 devid) +{ + int i; + + for (i = 0; i < 8; ++i) + pr_err("AMD-Vi: DTE[%d]: %08x\n", i, + amd_iommu_dev_table[devid].data[i]); +} + +static void dump_command(unsigned long phys_addr) +{ + struct iommu_cmd *cmd = phys_to_virt(phys_addr); + int i; + + for (i = 0; i < 4; ++i) + pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); +} + +static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { u32 *event = __evt; int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; @@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt) int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; u64 address = (u64)(((u64)event[3]) << 32) | event[2]; - printk(KERN_ERR "AMD IOMMU: Event logged ["); + printk(KERN_ERR "AMD-Vi: Event logged ["); switch (type) { case EVENT_TYPE_ILL_DEV: @@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt) "address=0x%016llx flags=0x%04x]\n", PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), address, flags); + dump_dte_entry(devid); break; case EVENT_TYPE_IO_FAULT: printk("IO_PAGE_FAULT device=%02x:%02x.%x " @@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt) break; case EVENT_TYPE_ILL_CMD: printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); + reset_iommu_command_buffer(iommu); + dump_command(address); break; case EVENT_TYPE_CMD_HARD_ERR: printk("COMMAND_HARDWARE_ERROR address=0x%016llx " @@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); while (head != tail) { - iommu_print_event(iommu->evt_buf + head); + iommu_print_event(iommu, iommu->evt_buf + head); head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; } @@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (unlikely(i == EXIT_LOOP_COUNT)) - panic("AMD IOMMU: Completion wait loop failed\n"); + if (unlikely(i == EXIT_LOOP_COUNT)) { + spin_unlock(&iommu->lock); + reset_iommu_command_buffer(iommu); + spin_lock(&iommu->lock); + } } /* @@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) } /* + * This function flushes one domain on one IOMMU + */ +static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) +{ + struct iommu_cmd cmd; + unsigned long flags; + + __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, + domid, 1, 1); + + spin_lock_irqsave(&iommu->lock, flags); + __iommu_queue_command(iommu, &cmd); + __iommu_completion_wait(iommu); + __iommu_wait_for_completion(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); +} + +static void flush_all_domains_on_iommu(struct amd_iommu *iommu) +{ + int i; + + for (i = 1; i < MAX_DOMAIN_ID; ++i) { + if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) + continue; + flush_domain_on_iommu(iommu, i); + } + +} + +/* * This function is used to flush the IO/TLB for a given protection domain * on every IOMMU in the system */ static void iommu_flush_domain(u16 domid) { - unsigned long flags; struct amd_iommu *iommu; - struct iommu_cmd cmd; INC_STATS_COUNTER(domain_flush_all); - __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, - domid, 1, 1); - - for_each_iommu(iommu) { - spin_lock_irqsave(&iommu->lock, flags); - __iommu_queue_command(iommu, &cmd); - __iommu_completion_wait(iommu); - __iommu_wait_for_completion(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); - } + for_each_iommu(iommu) + flush_domain_on_iommu(iommu, domid); } void amd_iommu_flush_all_domains(void) { + struct amd_iommu *iommu; + + for_each_iommu(iommu) + flush_all_domains_on_iommu(iommu); +} + +static void flush_all_devices_for_iommu(struct amd_iommu *iommu) +{ int i; - for (i = 1; i < MAX_DOMAIN_ID; ++i) { - if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) + for (i = 0; i <= amd_iommu_last_bdf; ++i) { + if (iommu != amd_iommu_rlookup_table[i]) continue; - iommu_flush_domain(i); + + iommu_queue_inv_dev_entry(iommu, i); + iommu_completion_wait(iommu); } } -void amd_iommu_flush_all_devices(void) +static void flush_devices_by_domain(struct protection_domain *domain) { struct amd_iommu *iommu; int i; for (i = 0; i <= amd_iommu_last_bdf; ++i) { - if (amd_iommu_pd_table[i] == NULL) + if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || + (amd_iommu_pd_table[i] != domain)) continue; iommu = amd_iommu_rlookup_table[i]; @@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void) } } +static void reset_iommu_command_buffer(struct amd_iommu *iommu) +{ + pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); + + if (iommu->reset_in_progress) + panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); + + iommu->reset_in_progress = true; + + amd_iommu_reset_cmd_buffer(iommu); + flush_all_devices_for_iommu(iommu); + flush_all_domains_on_iommu(iommu); + + iommu->reset_in_progress = false; +} + +void amd_iommu_flush_all_devices(void) +{ + flush_devices_by_domain(NULL); +} + /**************************************************************************** * * The functions below are used the create the page table mappings for @@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void) static int iommu_map_page(struct protection_domain *dom, unsigned long bus_addr, unsigned long phys_addr, - int prot) + int prot, + int map_size) { u64 __pte, *pte; bus_addr = PAGE_ALIGN(bus_addr); phys_addr = PAGE_ALIGN(phys_addr); - /* only support 512GB address spaces for now */ - if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) + BUG_ON(!PM_ALIGNED(map_size, bus_addr)); + BUG_ON(!PM_ALIGNED(map_size, phys_addr)); + + if (!(prot & IOMMU_PROT_MASK)) return -EINVAL; - pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); + pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); if (IOMMU_PTE_PRESENT(*pte)) return -EBUSY; @@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom, *pte = __pte; + update_domain(dom); + return 0; } static void iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr) + unsigned long bus_addr, int map_size) { - u64 *pte; - - pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; - - if (!IOMMU_PTE_PRESENT(*pte)) - return; - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; + u64 *pte = fetch_pte(dom, bus_addr, map_size); - if (!IOMMU_PTE_PRESENT(*pte)) - return; - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; - - *pte = 0; + if (pte) + *pte = 0; } /* @@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, for (addr = e->address_start; addr < e->address_end; addr += PAGE_SIZE) { - ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); + ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, + PM_MAP_4k); if (ret) return ret; /* @@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, * This function checks if there is a PTE for a given dma address. If * there is one, it returns the pointer to it. */ -static u64* fetch_pte(struct protection_domain *domain, - unsigned long address) +static u64 *fetch_pte(struct protection_domain *domain, + unsigned long address, int map_size) { + int level; u64 *pte; - pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; + while (level > map_size) { + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(address)]; + level -= 1; - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L0_INDEX(address)]; + if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { + pte = NULL; + break; + } + } return pte; } @@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu, u64 *pte, *pte_page; for (i = 0; i < num_ptes; ++i) { - pte = alloc_pte(&dma_dom->domain, address, + pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, &pte_page, gfp); if (!pte) goto out_free; @@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu, for (i = dma_dom->aperture[index]->offset; i < dma_dom->aperture_size; i += PAGE_SIZE) { - u64 *pte = fetch_pte(&dma_dom->domain, i); + u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); if (!pte || !IOMMU_PTE_PRESENT(*pte)) continue; dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); } + update_domain(&dma_dom->domain); + return 0; out_free: + update_domain(&dma_dom->domain); + free_page((unsigned long)dma_dom->aperture[index]->bitmap); kfree(dma_dom->aperture[index]); @@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) dma_dom->domain.id = domain_id_alloc(); if (dma_dom->domain.id == 0) goto free_dma_dom; - dma_dom->domain.mode = PAGE_MODE_3_LEVEL; + dma_dom->domain.mode = PAGE_MODE_2_LEVEL; dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); dma_dom->domain.flags = PD_DMA_OPS_MASK; dma_dom->domain.priv = dma_dom; @@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid) return dom; } +static void set_dte_entry(u16 devid, struct protection_domain *domain) +{ + u64 pte_root = virt_to_phys(domain->pt_root); + + pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + << DEV_ENTRY_MODE_SHIFT; + pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; + + amd_iommu_dev_table[devid].data[2] = domain->id; + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); + + amd_iommu_pd_table[devid] = domain; +} + +/* + * If a device is not yet associated with a domain, this function does + * assigns it visible for the hardware + */ +static void __attach_device(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) +{ + /* lock domain */ + spin_lock(&domain->lock); + + /* update DTE entry */ + set_dte_entry(devid, domain); + + domain->dev_cnt += 1; + + /* ready */ + spin_unlock(&domain->lock); +} + /* * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware @@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu, u16 devid) { unsigned long flags; - u64 pte_root = virt_to_phys(domain->pt_root); - - domain->dev_cnt += 1; - - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) - << DEV_ENTRY_MODE_SHIFT; - pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); - amd_iommu_dev_table[devid].data[2] = domain->id; - - amd_iommu_pd_table[devid] = domain; + __attach_device(iommu, domain, devid); write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); - /* - * We might boot into a crash-kernel here. The crashed kernel - * left the caches in the IOMMU dirty. So we have to flush - * here to evict all dirty stuff. - */ + /* + * We might boot into a crash-kernel here. The crashed kernel + * left the caches in the IOMMU dirty. So we have to flush + * here to evict all dirty stuff. + */ iommu_queue_inv_dev_entry(iommu, devid); iommu_flush_tlb_pde(iommu, domain->id); } @@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) /* ready */ spin_unlock(&domain->lock); + + /* + * If we run in passthrough mode the device must be assigned to the + * passthrough domain if it is detached from any other domain + */ + if (iommu_pass_through) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + __attach_device(iommu, pt_domain, devid); + } } /* @@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb, case BUS_NOTIFY_UNBOUND_DRIVER: if (!domain) goto out; + if (iommu_pass_through) + break; detach_device(domain, devid); break; case BUS_NOTIFY_ADD_DEVICE: @@ -1292,39 +1409,91 @@ static int get_device_resources(struct device *dev, return 1; } +static void update_device_table(struct protection_domain *domain) +{ + unsigned long flags; + int i; + + for (i = 0; i <= amd_iommu_last_bdf; ++i) { + if (amd_iommu_pd_table[i] != domain) + continue; + write_lock_irqsave(&amd_iommu_devtable_lock, flags); + set_dte_entry(i, domain); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + } +} + +static void update_domain(struct protection_domain *domain) +{ + if (!domain->updated) + return; + + update_device_table(domain); + flush_devices_by_domain(domain); + iommu_flush_domain(domain->id); + + domain->updated = false; +} + /* - * If the pte_page is not yet allocated this function is called + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. */ -static u64* alloc_pte(struct protection_domain *dom, - unsigned long address, u64 **pte_page, gfp_t gfp) +static bool increase_address_space(struct protection_domain *domain, + gfp_t gfp) +{ + u64 *pte; + + if (domain->mode == PAGE_MODE_6_LEVEL) + /* address space already 64 bit large */ + return false; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + *pte = PM_LEVEL_PDE(domain->mode, + virt_to_phys(domain->pt_root)); + domain->pt_root = pte; + domain->mode += 1; + domain->updated = true; + + return true; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + int end_lvl, + u64 **pte_page, + gfp_t gfp) { u64 *pte, *page; + int level; - pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; + while (address > PM_LEVEL_SIZE(domain->mode)) + increase_address_space(domain, gfp); - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = IOMMU_L2_PDE(virt_to_phys(page)); - } + level = domain->mode - 1; + pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(address)]; + while (level > end_lvl) { + if (!IOMMU_PTE_PRESENT(*pte)) { + page = (u64 *)get_zeroed_page(gfp); + if (!page) + return NULL; + *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); + } - if (!IOMMU_PTE_PRESENT(*pte)) { - page = (u64 *)get_zeroed_page(gfp); - if (!page) - return NULL; - *pte = IOMMU_L1_PDE(virt_to_phys(page)); - } + level -= 1; - pte = IOMMU_PTE_PAGE(*pte); + pte = IOMMU_PTE_PAGE(*pte); - if (pte_page) - *pte_page = pte; + if (pte_page && level == end_lvl) + *pte_page = pte; - pte = &pte[IOMMU_PTE_L0_INDEX(address)]; + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } return pte; } @@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; if (!pte) { - pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); + pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, + GFP_ATOMIC); aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; } else - pte += IOMMU_PTE_L0_INDEX(address); + pte += PM_LEVEL_INDEX(0, address); + + update_domain(&dom->domain); return pte; } @@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (!pte) return; - pte += IOMMU_PTE_L0_INDEX(address); + pte += PM_LEVEL_INDEX(0, address); WARN_ON(!*pte); @@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -static int amd_iommu_domain_init(struct iommu_domain *dom) +static void protection_domain_free(struct protection_domain *domain) +{ + if (!domain) + return; + + if (domain->id) + domain_id_free(domain->id); + + kfree(domain); +} + +static struct protection_domain *protection_domain_alloc(void) { struct protection_domain *domain; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) - return -ENOMEM; + return NULL; spin_lock_init(&domain->lock); - domain->mode = PAGE_MODE_3_LEVEL; domain->id = domain_id_alloc(); if (!domain->id) + goto out_err; + + return domain; + +out_err: + kfree(domain); + + return NULL; +} + +static int amd_iommu_domain_init(struct iommu_domain *dom) +{ + struct protection_domain *domain; + + domain = protection_domain_alloc(); + if (!domain) goto out_free; + + domain->mode = PAGE_MODE_3_LEVEL; domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); if (!domain->pt_root) goto out_free; @@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) return 0; out_free: - kfree(domain); + protection_domain_free(domain); return -ENOMEM; } @@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom, paddr &= PAGE_MASK; for (i = 0; i < npages; ++i) { - ret = iommu_map_page(domain, iova, paddr, prot); + ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); if (ret) return ret; @@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova &= PAGE_MASK; for (i = 0; i < npages; ++i) { - iommu_unmap_page(domain, iova); + iommu_unmap_page(domain, iova, PM_MAP_4k); iova += PAGE_SIZE; } @@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, phys_addr_t paddr; u64 *pte; - pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; - - if (!IOMMU_PTE_PRESENT(*pte)) - return 0; - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; - - if (!IOMMU_PTE_PRESENT(*pte)) - return 0; - - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; + pte = fetch_pte(domain, iova, PM_MAP_4k); - if (!IOMMU_PTE_PRESENT(*pte)) + if (!pte || !IOMMU_PTE_PRESENT(*pte)) return 0; paddr = *pte & IOMMU_PAGE_MASK; @@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = { .domain_has_cap = amd_iommu_domain_has_cap, }; +/***************************************************************************** + * + * The next functions do a basic initialization of IOMMU for pass through + * mode + * + * In passthrough mode the IOMMU is initialized and enabled but not used for + * DMA-API translation. + * + *****************************************************************************/ + +int __init amd_iommu_init_passthrough(void) +{ + struct pci_dev *dev = NULL; + u16 devid, devid2; + + /* allocate passthroug domain */ + pt_domain = protection_domain_alloc(); + if (!pt_domain) + return -ENOMEM; + + pt_domain->mode |= PAGE_MODE_NONE; + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + struct amd_iommu *iommu; + + devid = calc_devid(dev->bus->number, dev->devfn); + if (devid > amd_iommu_last_bdf) + continue; + + devid2 = amd_iommu_alias_table[devid]; + + iommu = amd_iommu_rlookup_table[devid2]; + if (!iommu) + continue; + + __attach_device(iommu, pt_domain, devid); + __attach_device(iommu, pt_domain, devid2); + } + + pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); + + return 0; +} diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252..b4b61d462dc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) /* Function to enable the hardware */ static void iommu_enable(struct amd_iommu *iommu) { - printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", + printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", dev_name(&iommu->dev->dev), iommu->cap_ptr); iommu_feature_enable(iommu, CONTROL_IOMMU_EN); @@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) } /* + * This function resets the command buffer if the IOMMU stopped fetching + * commands from it. + */ +void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); + + writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); +} + +/* * This function writes the command buffer address to the hardware and * enables it. */ @@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, &entry, sizeof(entry)); - /* set head and tail to zero manually */ - writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - - iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); + amd_iommu_reset_cmd_buffer(iommu); } static void __init free_command_buffer(struct amd_iommu *iommu) @@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) switch (*p) { case ACPI_IVHD_TYPE: - DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " + DUMP_printk("device: %02x:%02x.%01x cap: %04x " "seg: %d flags: %01x info %04x\n", PCI_BUS(h->devid), PCI_SLOT(h->devid), PCI_FUNC(h->devid), h->cap_ptr, @@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) r = request_irq(iommu->dev->irq, amd_iommu_int_handler, IRQF_SAMPLE_RANDOM, - "AMD IOMMU", + "AMD-Vi", NULL); if (r) { @@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void) if (no_iommu) { - printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); + printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); return 0; } @@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void) if (ret) goto free; - ret = amd_iommu_init_dma_ops(); + if (iommu_pass_through) + ret = amd_iommu_init_passthrough(); + else + ret = amd_iommu_init_dma_ops(); if (ret) goto free; enable_iommus(); - printk(KERN_INFO "AMD IOMMU: device isolation "); + if (iommu_pass_through) + goto out; + + printk(KERN_INFO "AMD-Vi: device isolation "); if (amd_iommu_isolate) printk("enabled\n"); else printk("disabled\n"); if (amd_iommu_unmap_flush) - printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); + printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); else - printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); + printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); out: return ret; diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 676debfc170..128111d8ffe 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,6 +20,7 @@ #include <linux/bitops.h> #include <linux/ioport.h> #include <linux/suspend.h> +#include <linux/kmemleak.h> #include <asm/e820.h> #include <asm/io.h> #include <asm/iommu.h> @@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void) * code for safe */ p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); + /* + * Kmemleak should not scan this block as it may not be mapped via the + * kernel direct mapping. + */ + kmemleak_ignore(p); if (!p || __pa(p)+aper_size > 0xffffffff) { printk(KERN_ERR "Cannot allocate aperture memory hole (%p,%uK)\n", diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b43b63..db7220220d0 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -39,7 +39,7 @@ int unknown_nmi_panic; int nmi_watchdog_enabled; -static cpumask_var_t backtrace_mask; +static cpumask_t backtrace_mask __read_mostly; /* nmi_active: * >0: the lapic NMI watchdog is active, but can be disabled @@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) if (!prev_nmi_count) goto error; - alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); printk(KERN_INFO "Testing NMI watchdog ... "); #ifdef CONFIG_SMP @@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) } /* We can be called before check_nmi_watchdog, hence NULL check. */ - if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { + if (cpumask_test_cpu(cpu, &backtrace_mask)) { static DEFINE_SPINLOCK(lock); /* Serialise the printks */ spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); + show_regs(regs); dump_stack(); spin_unlock(&lock); - cpumask_clear_cpu(cpu, backtrace_mask); + cpumask_clear_cpu(cpu, &backtrace_mask); + + rc = 1; } /* Could check oops_in_progress here too, but it's safer not to */ @@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) return 0; } -void __trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(void) { int i; - cpumask_copy(backtrace_mask, cpu_online_mask); + cpumask_copy(&backtrace_mask, cpu_online_mask); + + printk(KERN_INFO "sending NMI to all CPUs:\n"); + apic->send_IPI_all(NMI_VECTOR); + /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { - if (cpumask_empty(backtrace_mask)) + if (cpumask_empty(&backtrace_mask)) break; mdelay(1); } diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880f9b8..fcec2f1d34a 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -44,6 +44,11 @@ static struct apic *apic_probe[] __initdata = { NULL, }; +static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) +{ + return hard_smp_processor_id() >> index_msb; +} + /* * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. */ @@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void) printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); } + if (is_vsmp_box()) { + /* need to update phys_pkg_id */ + apic->phys_pkg_id = apicid_phys_pkg_id; + } + /* * Now that apic routing model is selected, configure the * fault handling for intr remapping. diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 900332b800f..f9cd0849bd4 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c @@ -6,6 +6,7 @@ * Copyright (C) 2009 Jaswinder Singh Rajput * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> + * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> * * For licencing details see kernel-base/COPYING */ @@ -20,6 +21,7 @@ #include <linux/sched.h> #include <linux/uaccess.h> #include <linux/highmem.h> +#include <linux/cpu.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -27,12 +29,52 @@ static u64 perf_counter_mask __read_mostly; +/* The maximal number of PEBS counters: */ +#define MAX_PEBS_COUNTERS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +/* The size of a per-cpu BTS buffer in bytes: */ +#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) + +/* The BTS overflow threshold in bytes from the end of the buffer: */ +#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) + + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; +}; + struct cpu_hw_counters { struct perf_counter *counters[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; + struct debug_store *ds; }; /* @@ -58,6 +100,8 @@ struct x86_pmu { int apic; u64 max_period; u64 intel_ctrl; + void (*enable_bts)(u64 config); + void (*disable_bts)(void); }; static struct x86_pmu x86_pmu __read_mostly; @@ -577,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter, u64 prev_raw_count, new_raw_count; s64 delta; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * Careful: an NMI might modify the previous counter value. * @@ -666,10 +713,110 @@ static void release_pmc_hardware(void) #endif } +static inline bool bts_available(void) +{ + return x86_pmu.enable_bts != NULL; +} + +static inline void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +static inline void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_counters, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_bts_hardware(void) +{ + int cpu; + + if (!bts_available()) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_counters, cpu).ds = NULL; + + kfree((void *)(unsigned long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_bts_hardware(void) +{ + int cpu, err = 0; + + if (!bts_available()) + return 0; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + + err = -ENOMEM; + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = + ds->bts_buffer_base + BTS_BUFFER_SIZE; + ds->bts_interrupt_threshold = + ds->bts_absolute_maximum - BTS_OVFL_TH; + + per_cpu(cpu_hw_counters, cpu).ds = ds; + err = 0; + } + + if (err) + release_bts_hardware(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + static void hw_perf_counter_destroy(struct perf_counter *counter) { if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { release_pmc_hardware(); + release_bts_hardware(); mutex_unlock(&pmc_reserve_mutex); } } @@ -712,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) return 0; } +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + /* * Setup the hardware configuration for a given attr_type */ @@ -728,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) err = 0; if (!atomic_inc_not_zero(&active_counters)) { mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) - err = -EBUSY; - else + if (atomic_read(&active_counters) == 0) { + if (!reserve_pmc_hardware()) + err = -EBUSY; + else + err = reserve_bts_hardware(); + } + if (!err) atomic_inc(&active_counters); mutex_unlock(&pmc_reserve_mutex); } @@ -793,6 +980,20 @@ static int __hw_perf_counter_init(struct perf_counter *counter) if (config == -1LL) return -EINVAL; + /* + * Branch tracing: + */ + if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && + (hwc->sample_period == 1)) { + /* BTS is not supported by this architecture. */ + if (!bts_available()) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + return -EOPNOTSUPP; + } + hwc->config |= config; return 0; @@ -817,7 +1018,18 @@ static void p6_pmu_disable_all(void) static void intel_pmu_disable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (!cpuc->enabled) + return; + + cpuc->enabled = 0; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); } static void amd_pmu_disable_all(void) @@ -875,7 +1087,25 @@ static void p6_pmu_enable_all(void) static void intel_pmu_enable_all(void) { + struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); + + if (cpuc->enabled) + return; + + cpuc->enabled = 1; + barrier(); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_counter *counter = + cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!counter)) + return; + + intel_pmu_enable_bts(counter->hw.config); + } } static void amd_pmu_enable_all(void) @@ -962,6 +1192,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) static inline void intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_disable_fixed(hwc, idx); return; @@ -990,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, s64 period = hwc->sample_period; int err, ret = 0; + if (idx == X86_PMC_IDX_FIXED_BTS) + return 0; + /* * If we are way outside a reasoable range then just skip forward: */ @@ -1072,6 +1310,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) { + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_counters).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { intel_pmu_enable_fixed(hwc, idx); return; @@ -1093,11 +1339,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) { unsigned int event; + event = hwc->config & ARCH_PERFMON_EVENT_MASK; + + if (unlikely((event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (hwc->sample_period == 1))) + return X86_PMC_IDX_FIXED_BTS; + if (!x86_pmu.num_counters_fixed) return -1; - event = hwc->config & ARCH_PERFMON_EVENT_MASK; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) return X86_PMC_IDX_FIXED_INSTRUCTIONS; if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) @@ -1118,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter) int idx; idx = fixed_mode_idx(counter, hwc); - if (idx >= 0) { + if (idx == X86_PMC_IDX_FIXED_BTS) { + /* BTS is already occupied. */ + if (test_and_set_bit(idx, cpuc->used_mask)) + return -EAGAIN; + + hwc->config_base = 0; + hwc->counter_base = 0; + hwc->idx = idx; + } else if (idx >= 0) { /* * Try to get the fixed counter, if that is already taken * then try to get a generic counter: @@ -1229,6 +1488,44 @@ void perf_counter_print_debug(void) local_irq_restore(flags); } +static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, + struct perf_sample_data *data) +{ + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; + unsigned long orig_ip = data->regs->ip; + struct bts_record *at, *top; + + if (!counter) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + ds->bts_index = ds->bts_buffer_base; + + for (; at < top; at++) { + data->regs->ip = at->from; + data->addr = at->to; + + perf_counter_output(counter, 1, data); + } + + data->regs->ip = orig_ip; + data->addr = 0; + + /* There's new data available. */ + counter->pending_kill = POLL_IN; +} + static void x86_pmu_disable(struct perf_counter *counter) { struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); @@ -1253,6 +1550,15 @@ static void x86_pmu_disable(struct perf_counter *counter) * that we are disabling: */ x86_perf_counter_update(counter, hwc, idx); + + /* Drain the remaining BTS records. */ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + struct perf_sample_data data; + struct pt_regs regs; + + data.regs = ®s; + intel_pmu_drain_bts_buffer(cpuc, &data); + } cpuc->counters[idx] = NULL; clear_bit(idx, cpuc->used_mask); @@ -1280,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) static void intel_pmu_reset(void) { + struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; unsigned long flags; int idx; @@ -1297,6 +1604,8 @@ static void intel_pmu_reset(void) for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); } + if (ds) + ds->bts_index = ds->bts_buffer_base; local_irq_restore(flags); } @@ -1362,6 +1671,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_counters); perf_disable(); + intel_pmu_drain_bts_buffer(cpuc, &data); status = intel_pmu_get_status(); if (!status) { perf_enable(); @@ -1571,6 +1881,8 @@ static struct x86_pmu intel_pmu = { * the generic counter period: */ .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, }; static struct x86_pmu amd_pmu = { @@ -1962,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } + +void hw_perf_counter_setup_online(int cpu) +{ + init_debug_store_on_cpu(cpu); +} diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bcf506..d71c8655905 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -3,6 +3,7 @@ #include <linux/dmar.h> #include <linux/bootmem.h> #include <linux/pci.h> +#include <linux/kmemleak.h> #include <asm/proto.h> #include <asm/dma.h> @@ -32,7 +33,14 @@ int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; -int iommu_pass_through; +/* + * This variable becomes 1 if iommu=pt is passed on the kernel command line. + * If this variable is 1, IOMMU implementations do no DMA ranslation for + * devices and allow every device to access to whole physical memory. This is + * useful if a user want to use an IOMMU only for KVM device assignment to + * guests and not for driver dma translation. + */ +int iommu_pass_through __read_mostly; dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); @@ -88,6 +96,11 @@ void __init dma32_reserve_bootmem(void) size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); + /* + * Kmemleak should not scan this block as it may not be mapped via the + * kernel direct mapping. + */ + kmemleak_ignore(dma32_bootmem_ptr); if (dma32_bootmem_ptr) dma32_bootmem_size = size; else @@ -147,7 +160,7 @@ again: return NULL; addr = page_to_phys(page); - if (!is_buffer_dma_capable(dma_mask, addr, size)) { + if (addr + size > dma_mask) { __free_pages(page, get_order(size)); if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d2e56b8f48e..98a827ee9ed 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir) static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) { - return force_iommu || - !is_buffer_dma_capable(*dev->dma_mask, addr, size); + return force_iommu || !dma_capable(dev, addr, size); } static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) { - return !is_buffer_dma_capable(*dev->dma_mask, addr, size); + return !dma_capable(dev, addr, size); } /* Map a single continuous physical area into the IOMMU. diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 71d412a09f3..a3933d4330c 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { + if (hwdev && !dma_capable(hwdev, bus, size)) { if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", @@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, free_pages((unsigned long)vaddr, get_order(size)); } +static void nommu_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + + +static void nommu_sync_sg_for_device(struct device *dev, + struct scatterlist *sg, int nelems, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + struct dma_map_ops nommu_dma_ops = { - .alloc_coherent = dma_generic_alloc_coherent, - .free_coherent = nommu_free_coherent, - .map_sg = nommu_map_sg, - .map_page = nommu_map_page, - .is_phys = 1, + .alloc_coherent = dma_generic_alloc_coherent, + .free_coherent = nommu_free_coherent, + .map_sg = nommu_map_sg, + .map_page = nommu_map_page, + .sync_single_for_device = nommu_sync_single_for_device, + .sync_sg_for_device = nommu_sync_sg_for_device, + .is_phys = 1, }; void __init no_iommu_init(void) diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee4420..e8a35016115 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -13,31 +13,6 @@ int swiotlb __read_mostly; -void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) -{ - return alloc_bootmem_low_pages(size); -} - -void *swiotlb_alloc(unsigned order, unsigned long nslabs) -{ - return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); -} - -dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) -{ - return paddr; -} - -phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) -{ - return baddr; -} - -int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) -{ - return 0; -} - static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags) { diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94..81e58238c4c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); + if (current->replacement_session_keyring) + key_replace_session_keyring(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index 2c55ed09865..528bf954eb7 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs, kmemcheck_shadow_set(shadow, size); } +bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) +{ + enum kmemcheck_shadow status; + void *shadow; + + shadow = kmemcheck_shadow_lookup(addr); + if (!shadow) + return true; + + status = kmemcheck_shadow_test(shadow, size); + + return status == KMEMCHECK_SHADOW_INITIALIZED; +} + /* Access may cross page boundary */ static void kmemcheck_read(struct pt_regs *regs, unsigned long addr, unsigned int size) diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 89b9a5cd63d..cb88b1a0bd5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -1,11 +1,14 @@ /** * @file nmi_int.c * - * @remark Copyright 2002-2008 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon <levon@movementarian.org> * @author Robert Richter <robert.richter@amd.com> + * @author Barry Kasindorf <barry.kasindorf@amd.com> + * @author Jason Yeh <jason.yeh@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> */ #include <linux/init.h> @@ -24,13 +27,35 @@ #include "op_counter.h" #include "op_x86_model.h" -static struct op_x86_model_spec const *model; +static struct op_x86_model_spec *model; static DEFINE_PER_CPU(struct op_msrs, cpu_msrs); static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; +struct op_counter_config counter_config[OP_MAX_COUNTER]; + +/* common functions */ + +u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, + struct op_counter_config *counter_config) +{ + u64 val = 0; + u16 event = (u16)counter_config->event; + + val |= ARCH_PERFMON_EVENTSEL_INT; + val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; + val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; + val |= (counter_config->unit_mask & 0xFF) << 8; + event &= model->event_mask ? model->event_mask : 0xFF; + val |= event & 0xFF; + val |= (event & 0x0F00) << 24; + + return val; +} + + static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { @@ -52,36 +77,214 @@ static int profile_exceptions_notify(struct notifier_block *self, static void nmi_cpu_save_registers(struct op_msrs *msrs) { - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; - for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr) { - rdmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } + for (i = 0; i < model->num_counters; ++i) { + if (counters[i].addr) + rdmsrl(counters[i].addr, counters[i].saved); + } + + for (i = 0; i < model->num_controls; ++i) { + if (controls[i].addr) + rdmsrl(controls[i].addr, controls[i].saved); + } +} + +static void nmi_cpu_start(void *dummy) +{ + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + model->start(msrs); +} + +static int nmi_start(void) +{ + on_each_cpu(nmi_cpu_start, NULL, 1); + return 0; +} + +static void nmi_cpu_stop(void *dummy) +{ + struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); + model->stop(msrs); +} + +static void nmi_stop(void) +{ + on_each_cpu(nmi_cpu_stop, NULL, 1); +} + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static DEFINE_PER_CPU(int, switch_index); + +static inline int has_mux(void) +{ + return !!model->switch_ctrl; +} + +inline int op_x86_phys_to_virt(int phys) +{ + return __get_cpu_var(switch_index) + phys; +} + +inline int op_x86_virt_to_phys(int virt) +{ + return virt % model->num_counters; +} + +static void nmi_shutdown_mux(void) +{ + int i; + + if (!has_mux()) + return; + + for_each_possible_cpu(i) { + kfree(per_cpu(cpu_msrs, i).multiplex); + per_cpu(cpu_msrs, i).multiplex = NULL; + per_cpu(switch_index, i) = 0; } +} + +static int nmi_setup_mux(void) +{ + size_t multiplex_size = + sizeof(struct op_msr) * model->num_virt_counters; + int i; + + if (!has_mux()) + return 1; + + for_each_possible_cpu(i) { + per_cpu(cpu_msrs, i).multiplex = + kmalloc(multiplex_size, GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).multiplex) + return 0; + } + + return 1; +} + +static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) +{ + int i; + struct op_msr *multiplex = msrs->multiplex; + + if (!has_mux()) + return; - for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr) { - rdmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); + for (i = 0; i < model->num_virt_counters; ++i) { + if (counter_config[i].enabled) { + multiplex[i].saved = -(u64)counter_config[i].count; + } else { + multiplex[i].addr = 0; + multiplex[i].saved = 0; } } + + per_cpu(switch_index, cpu) = 0; +} + +static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) +{ + struct op_msr *multiplex = msrs->multiplex; + int i; + + for (i = 0; i < model->num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (multiplex[virt].addr) + rdmsrl(multiplex[virt].addr, multiplex[virt].saved); + } +} + +static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) +{ + struct op_msr *multiplex = msrs->multiplex; + int i; + + for (i = 0; i < model->num_counters; ++i) { + int virt = op_x86_phys_to_virt(i); + if (multiplex[virt].addr) + wrmsrl(multiplex[virt].addr, multiplex[virt].saved); + } } -static void nmi_save_registers(void *dummy) +static void nmi_cpu_switch(void *dummy) { int cpu = smp_processor_id(); + int si = per_cpu(switch_index, cpu); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); - nmi_cpu_save_registers(msrs); + + nmi_cpu_stop(NULL); + nmi_cpu_save_mpx_registers(msrs); + + /* move to next set */ + si += model->num_counters; + if ((si > model->num_virt_counters) || (counter_config[si].count == 0)) + per_cpu(switch_index, cpu) = 0; + else + per_cpu(switch_index, cpu) = si; + + model->switch_ctrl(model, msrs); + nmi_cpu_restore_mpx_registers(msrs); + + nmi_cpu_start(NULL); +} + + +/* + * Quick check to see if multiplexing is necessary. + * The check should be sufficient since counters are used + * in ordre. + */ +static int nmi_multiplex_on(void) +{ + return counter_config[model->num_counters].count ? 0 : -EINVAL; +} + +static int nmi_switch_event(void) +{ + if (!has_mux()) + return -ENOSYS; /* not implemented */ + if (nmi_multiplex_on() < 0) + return -EINVAL; /* not necessary */ + + on_each_cpu(nmi_cpu_switch, NULL, 1); + + return 0; +} + +static inline void mux_init(struct oprofile_operations *ops) +{ + if (has_mux()) + ops->switch_events = nmi_switch_event; +} + +static void mux_clone(int cpu) +{ + if (!has_mux()) + return; + + memcpy(per_cpu(cpu_msrs, cpu).multiplex, + per_cpu(cpu_msrs, 0).multiplex, + sizeof(struct op_msr) * model->num_virt_counters); } +#else + +inline int op_x86_phys_to_virt(int phys) { return phys; } +inline int op_x86_virt_to_phys(int virt) { return virt; } +static inline void nmi_shutdown_mux(void) { } +static inline int nmi_setup_mux(void) { return 1; } +static inline void +nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { } +static inline void mux_init(struct oprofile_operations *ops) { } +static void mux_clone(int cpu) { } + +#endif + static void free_msrs(void) { int i; @@ -95,38 +298,32 @@ static void free_msrs(void) static int allocate_msrs(void) { - int success = 1; size_t controls_size = sizeof(struct op_msr) * model->num_controls; size_t counters_size = sizeof(struct op_msr) * model->num_counters; int i; for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, - GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).counters) { - success = 0; - break; - } + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).counters) + return 0; per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, - GFP_KERNEL); - if (!per_cpu(cpu_msrs, i).controls) { - success = 0; - break; - } + GFP_KERNEL); + if (!per_cpu(cpu_msrs, i).controls) + return 0; } - if (!success) - free_msrs(); - - return success; + return 1; } static void nmi_cpu_setup(void *dummy) { int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); + nmi_cpu_save_registers(msrs); spin_lock(&oprofilefs_lock); - model->setup_ctrs(msrs); + model->setup_ctrs(model, msrs); + nmi_cpu_setup_mux(cpu, msrs); spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); @@ -144,11 +341,15 @@ static int nmi_setup(void) int cpu; if (!allocate_msrs()) - return -ENOMEM; + err = -ENOMEM; + else if (!nmi_setup_mux()) + err = -ENOMEM; + else + err = register_die_notifier(&profile_exceptions_nb); - err = register_die_notifier(&profile_exceptions_nb); if (err) { free_msrs(); + nmi_shutdown_mux(); return err; } @@ -159,45 +360,38 @@ static int nmi_setup(void) /* Assume saved/restored counters are the same on all CPUs */ model->fill_in_addresses(&per_cpu(cpu_msrs, 0)); for_each_possible_cpu(cpu) { - if (cpu != 0) { - memcpy(per_cpu(cpu_msrs, cpu).counters, - per_cpu(cpu_msrs, 0).counters, - sizeof(struct op_msr) * model->num_counters); - - memcpy(per_cpu(cpu_msrs, cpu).controls, - per_cpu(cpu_msrs, 0).controls, - sizeof(struct op_msr) * model->num_controls); - } + if (!cpu) + continue; + + memcpy(per_cpu(cpu_msrs, cpu).counters, + per_cpu(cpu_msrs, 0).counters, + sizeof(struct op_msr) * model->num_counters); + + memcpy(per_cpu(cpu_msrs, cpu).controls, + per_cpu(cpu_msrs, 0).controls, + sizeof(struct op_msr) * model->num_controls); + mux_clone(cpu); } - on_each_cpu(nmi_save_registers, NULL, 1); on_each_cpu(nmi_cpu_setup, NULL, 1); nmi_enabled = 1; return 0; } -static void nmi_restore_registers(struct op_msrs *msrs) +static void nmi_cpu_restore_registers(struct op_msrs *msrs) { - unsigned int const nr_ctrs = model->num_counters; - unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; - for (i = 0; i < nr_ctrls; ++i) { - if (controls[i].addr) { - wrmsr(controls[i].addr, - controls[i].saved.low, - controls[i].saved.high); - } + for (i = 0; i < model->num_controls; ++i) { + if (controls[i].addr) + wrmsrl(controls[i].addr, controls[i].saved); } - for (i = 0; i < nr_ctrs; ++i) { - if (counters[i].addr) { - wrmsr(counters[i].addr, - counters[i].saved.low, - counters[i].saved.high); - } + for (i = 0; i < model->num_counters; ++i) { + if (counters[i].addr) + wrmsrl(counters[i].addr, counters[i].saved); } } @@ -205,7 +399,7 @@ static void nmi_cpu_shutdown(void *dummy) { unsigned int v; int cpu = smp_processor_id(); - struct op_msrs *msrs = &__get_cpu_var(cpu_msrs); + struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); /* restoring APIC_LVTPC can trigger an apic error because the delivery * mode and vector nr combination can be illegal. That's by design: on @@ -216,7 +410,7 @@ static void nmi_cpu_shutdown(void *dummy) apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu)); apic_write(APIC_LVTERR, v); - nmi_restore_registers(msrs); + nmi_cpu_restore_registers(msrs); } static void nmi_shutdown(void) @@ -226,42 +420,18 @@ static void nmi_shutdown(void) nmi_enabled = 0; on_each_cpu(nmi_cpu_shutdown, NULL, 1); unregister_die_notifier(&profile_exceptions_nb); + nmi_shutdown_mux(); msrs = &get_cpu_var(cpu_msrs); model->shutdown(msrs); free_msrs(); put_cpu_var(cpu_msrs); } -static void nmi_cpu_start(void *dummy) -{ - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); - model->start(msrs); -} - -static int nmi_start(void) -{ - on_each_cpu(nmi_cpu_start, NULL, 1); - return 0; -} - -static void nmi_cpu_stop(void *dummy) -{ - struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs); - model->stop(msrs); -} - -static void nmi_stop(void) -{ - on_each_cpu(nmi_cpu_stop, NULL, 1); -} - -struct op_counter_config counter_config[OP_MAX_COUNTER]; - static int nmi_create_files(struct super_block *sb, struct dentry *root) { unsigned int i; - for (i = 0; i < model->num_counters; ++i) { + for (i = 0; i < model->num_virt_counters; ++i) { struct dentry *dir; char buf[4]; @@ -270,7 +440,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root) * NOTE: assumes 1:1 mapping here (that counters are organized * sequentially in their struct assignment). */ - if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i))) + if (!avail_to_resrv_perfctr_nmi_bit(op_x86_virt_to_phys(i))) continue; snprintf(buf, sizeof(buf), "%d", i); @@ -402,6 +572,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; + struct op_x86_model_spec *spec = &op_ppro_spec; /* default */ if (force_arch_perfmon && cpu_has_arch_perfmon) return 0; @@ -428,7 +599,7 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/core_2"; break; case 26: - arch_perfmon_setup_counters(); + spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; break; case 28: @@ -439,17 +610,7 @@ static int __init ppro_init(char **cpu_type) return 0; } - model = &op_ppro_spec; - return 1; -} - -static int __init arch_perfmon_init(char **cpu_type) -{ - if (!cpu_has_arch_perfmon) - return 0; - *cpu_type = "i386/arch_perfmon"; - model = &op_arch_perfmon_spec; - arch_perfmon_setup_counters(); + model = spec; return 1; } @@ -471,27 +632,26 @@ int __init op_nmi_init(struct oprofile_operations *ops) /* Needs to be at least an Athlon (or hammer in 32bit mode) */ switch (family) { - default: - return -ENODEV; case 6: - model = &op_amd_spec; cpu_type = "i386/athlon"; break; case 0xf: - model = &op_amd_spec; - /* Actually it could be i386/hammer too, but give - user space an consistent name. */ + /* + * Actually it could be i386/hammer too, but + * give user space an consistent name. + */ cpu_type = "x86-64/hammer"; break; case 0x10: - model = &op_amd_spec; cpu_type = "x86-64/family10"; break; case 0x11: - model = &op_amd_spec; cpu_type = "x86-64/family11h"; break; + default: + return -ENODEV; } + model = &op_amd_spec; break; case X86_VENDOR_INTEL: @@ -510,8 +670,15 @@ int __init op_nmi_init(struct oprofile_operations *ops) break; } - if (!cpu_type && !arch_perfmon_init(&cpu_type)) + if (cpu_type) + break; + + if (!cpu_has_arch_perfmon) return -ENODEV; + + /* use arch perfmon as fallback */ + cpu_type = "i386/arch_perfmon"; + model = &op_arch_perfmon_spec; break; default: @@ -522,18 +689,23 @@ int __init op_nmi_init(struct oprofile_operations *ops) register_cpu_notifier(&oprofile_cpu_nb); #endif /* default values, can be overwritten by model */ - ops->create_files = nmi_create_files; - ops->setup = nmi_setup; - ops->shutdown = nmi_shutdown; - ops->start = nmi_start; - ops->stop = nmi_stop; - ops->cpu_type = cpu_type; + ops->create_files = nmi_create_files; + ops->setup = nmi_setup; + ops->shutdown = nmi_shutdown; + ops->start = nmi_start; + ops->stop = nmi_stop; + ops->cpu_type = cpu_type; if (model->init) ret = model->init(ops); if (ret) return ret; + if (!model->num_virt_counters) + model->num_virt_counters = model->num_counters; + + mux_init(ops); + init_sysfs(); using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h index 91b6a116165..e28398df0df 100644 --- a/arch/x86/oprofile/op_counter.h +++ b/arch/x86/oprofile/op_counter.h @@ -10,7 +10,7 @@ #ifndef OP_COUNTER_H #define OP_COUNTER_H -#define OP_MAX_COUNTER 8 +#define OP_MAX_COUNTER 32 /* Per-perfctr configuration as set via * oprofilefs. diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8fdf06e4edf..39686c29f03 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -9,12 +9,15 @@ * @author Philippe Elie * @author Graydon Hoare * @author Robert Richter <robert.richter@amd.com> - * @author Barry Kasindorf + * @author Barry Kasindorf <barry.kasindorf@amd.com> + * @author Jason Yeh <jason.yeh@amd.com> + * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> */ #include <linux/oprofile.h> #include <linux/device.h> #include <linux/pci.h> +#include <linux/percpu.h> #include <asm/ptrace.h> #include <asm/msr.h> @@ -25,43 +28,36 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX +#define NUM_VIRT_COUNTERS 32 +#define NUM_VIRT_CONTROLS 32 +#else +#define NUM_VIRT_COUNTERS NUM_COUNTERS +#define NUM_VIRT_CONTROLS NUM_CONTROLS +#endif + +#define OP_EVENT_MASK 0x0FFF +#define OP_CTR_OVERFLOW (1ULL<<31) -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) - -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR_LO(x) (x &= (1<<21)) -#define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) -#define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) -#define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) -#define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) - -static unsigned long reset_value[NUM_COUNTERS]; +#define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) + +static unsigned long reset_value[NUM_VIRT_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS /* IbsFetchCtl bits/masks */ -#define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ -#define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ -#define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ +#define IBS_FETCH_RAND_EN (1ULL<<57) +#define IBS_FETCH_VAL (1ULL<<49) +#define IBS_FETCH_ENABLE (1ULL<<48) +#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL /*IbsOpCtl bits */ -#define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ -#define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ +#define IBS_OP_CNT_CTL (1ULL<<19) +#define IBS_OP_VAL (1ULL<<18) +#define IBS_OP_ENABLE (1ULL<<17) -#define IBS_FETCH_SIZE 6 -#define IBS_OP_SIZE 12 +#define IBS_FETCH_SIZE 6 +#define IBS_OP_SIZE 12 static int has_ibs; /* AMD Family10h and later */ @@ -78,6 +74,45 @@ static struct op_ibs_config ibs_config; #endif +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void op_mux_fill_in_addresses(struct op_msrs * const msrs) +{ + int i; + + for (i = 0; i < NUM_VIRT_COUNTERS; i++) { + int hw_counter = op_x86_virt_to_phys(i); + if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) + msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; + else + msrs->multiplex[i].addr = 0; + } +} + +static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) +{ + u64 val; + int i; + + /* enable active counters */ + for (i = 0; i < NUM_COUNTERS; ++i) { + int virt = op_x86_phys_to_virt(i); + if (!counter_config[virt].enabled) + continue; + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); + } +} + +#else + +static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } + +#endif + /* functions for op_amd_spec */ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) @@ -97,150 +132,174 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) else msrs->controls[i].addr = 0; } -} + op_mux_fill_in_addresses(msrs); +} -static void op_amd_setup_ctrs(struct op_msrs const * const msrs) +static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; + /* setup reset_value */ + for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { + if (counter_config[i].enabled) + reset_value[i] = counter_config[i].count; + else + reset_value[i] = 0; + } + /* clear all counters */ - for (i = 0 ; i < NUM_CONTROLS; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + for (i = 0; i < NUM_CONTROLS; ++i) { + if (unlikely(!msrs->controls[i].addr)) continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + wrmsrl(msrs->controls[i].addr, val); } /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->counters[i].addr)) continue; - CTR_WRITE(1, msrs, i); + wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { - reset_value[i] = counter_config[i].count; + int virt = op_x86_phys_to_virt(i); + if (!counter_config[virt].enabled) + continue; + if (!msrs->counters[i].addr) + continue; - CTR_WRITE(counter_config[i].count, msrs, i); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR_LO(low); - CTRL_CLEAR_HI(high); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT_LOW(low, counter_config[i].event); - CTRL_SET_EVENT_HIGH(high, counter_config[i].event); - CTRL_SET_HOST_ONLY(high, 0); - CTRL_SET_GUEST_ONLY(high, 0); - - CTRL_WRITE(low, high, msrs, i); - } else { - reset_value[i] = 0; - } + /* setup counter registers */ + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); + + /* setup control registers */ + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[virt]); + wrmsrl(msrs->controls[i].addr, val); } } #ifdef CONFIG_OPROFILE_IBS -static inline int +static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - u32 low, high; - u64 msr; + u64 val, ctl; struct op_entry entry; if (!has_ibs) - return 1; + return; if (ibs_config.fetch_enabled) { - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); - if (high & IBS_FETCH_HIGH_VALID_BIT) { - rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); - oprofile_write_reserve(&entry, regs, msr, + rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); + if (ctl & IBS_FETCH_VAL) { + rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); + oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - oprofile_add_data(&entry, low); - oprofile_add_data(&entry, high); - rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data64(&entry, val); + oprofile_add_data64(&entry, ctl); + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ - high &= ~IBS_FETCH_HIGH_VALID_BIT; - high |= IBS_FETCH_HIGH_ENABLE; - low &= IBS_FETCH_LOW_MAX_CNT_MASK; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); + ctl |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); } } if (ibs_config.op_enabled) { - rdmsr(MSR_AMD64_IBSOPCTL, low, high); - if (low & IBS_OP_LOW_VALID_BIT) { - rdmsrl(MSR_AMD64_IBSOPRIP, msr); - oprofile_write_reserve(&entry, regs, msr, + rdmsrl(MSR_AMD64_IBSOPCTL, ctl); + if (ctl & IBS_OP_VAL) { + rdmsrl(MSR_AMD64_IBSOPRIP, val); + oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, IBS_OP_SIZE); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA2, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSOPDATA3, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSDCLINAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); - rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); - oprofile_add_data(&entry, (u32)msr); - oprofile_add_data(&entry, (u32)(msr >> 32)); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSOPDATA, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSOPDATA2, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSOPDATA3, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSDCLINAD, val); + oprofile_add_data64(&entry, val); + rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); + oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ - high = 0; - low &= ~IBS_OP_LOW_VALID_BIT; - low |= IBS_OP_LOW_ENABLE; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); + ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; + ctl |= IBS_OP_ENABLE; + wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } +} - return 1; +static inline void op_amd_start_ibs(void) +{ + u64 val; + if (has_ibs && ibs_config.fetch_enabled) { + val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; + val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; + val |= IBS_FETCH_ENABLE; + wrmsrl(MSR_AMD64_IBSFETCHCTL, val); + } + + if (has_ibs && ibs_config.op_enabled) { + val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; + val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; + val |= IBS_OP_ENABLE; + wrmsrl(MSR_AMD64_IBSOPCTL, val); + } +} + +static void op_amd_stop_ibs(void) +{ + if (has_ibs && ibs_config.fetch_enabled) + /* clear max count and enable */ + wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); + + if (has_ibs && ibs_config.op_enabled) + /* clear max count and enable */ + wrmsrl(MSR_AMD64_IBSOPCTL, 0); } +#else + +static inline void op_amd_handle_ibs(struct pt_regs * const regs, + struct op_msrs const * const msrs) { } +static inline void op_amd_start_ibs(void) { } +static inline void op_amd_stop_ibs(void) { } + #endif static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; - for (i = 0 ; i < NUM_COUNTERS; ++i) { - if (!reset_value[i]) + for (i = 0; i < NUM_COUNTERS; ++i) { + int virt = op_x86_phys_to_virt(i); + if (!reset_value[virt]) continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { - oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], msrs, i); - } + rdmsrl(msrs->counters[i].addr, val); + /* bit is clear if overflowed: */ + if (val & OP_CTR_OVERFLOW) + continue; + oprofile_add_sample(regs, virt); + wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]); } -#ifdef CONFIG_OPROFILE_IBS op_amd_handle_ibs(regs, msrs); -#endif /* See op_model_ppro.c */ return 1; @@ -248,79 +307,50 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, static void op_amd_start(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - } -#ifdef CONFIG_OPROFILE_IBS - if (has_ibs && ibs_config.fetch_enabled) { - low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; - high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ - + IBS_FETCH_HIGH_ENABLE; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + for (i = 0; i < NUM_COUNTERS; ++i) { + if (!reset_value[op_x86_phys_to_virt(i)]) + continue; + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } - if (has_ibs && ibs_config.op_enabled) { - low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) - + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ - + IBS_OP_LOW_ENABLE; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } -#endif + op_amd_start_ibs(); } - static void op_amd_stop(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; /* * Subtle: stop on all counters to avoid race with setting our * pm callback */ - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (!reset_value[i]) + for (i = 0; i < NUM_COUNTERS; ++i) { + if (!reset_value[op_x86_phys_to_virt(i)]) continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); - } - -#ifdef CONFIG_OPROFILE_IBS - if (has_ibs && ibs_config.fetch_enabled) { - /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); + rdmsrl(msrs->controls[i].addr, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } - if (has_ibs && ibs_config.op_enabled) { - /* clear max count and enable */ - low = 0; - high = 0; - wrmsr(MSR_AMD64_IBSOPCTL, low, high); - } -#endif + op_amd_stop_ibs(); } static void op_amd_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < NUM_COUNTERS ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) + for (i = 0; i < NUM_COUNTERS; ++i) { + if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } - for (i = 0 ; i < NUM_CONTROLS ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) + for (i = 0; i < NUM_CONTROLS; ++i) { + if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } } @@ -490,15 +520,21 @@ static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ -struct op_x86_model_spec const op_amd_spec = { - .init = op_amd_init, - .exit = op_amd_exit, +struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, + .num_virt_counters = NUM_VIRT_COUNTERS, + .reserved = MSR_AMD_EVENTSEL_RESERVED, + .event_mask = OP_EVENT_MASK, + .init = op_amd_init, + .exit = op_amd_exit, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, - .shutdown = &op_amd_shutdown + .shutdown = &op_amd_shutdown, +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + .switch_ctrl = &op_mux_switch_ctrl, +#endif }; diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 819b131fd75..ac6b354becd 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -32,6 +32,8 @@ #define NUM_CCCRS_HT2 9 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) +#define OP_CTR_OVERFLOW (1ULL<<31) + static unsigned int num_counters = NUM_COUNTERS_NON_HT; static unsigned int num_controls = NUM_CONTROLS_NON_HT; @@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) -#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) -#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) @@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) -#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) -#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) -#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) -#define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) - /* this assigns a "stagger" to the current CPU, which is used throughout the code in this module as an extra array offset, to select the "even" @@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) if (ev->bindings[i].virt_counter & counter_bit) { /* modify ESCR */ - ESCR_READ(escr, high, ev, i); + rdmsr(ev->bindings[i].escr_address, escr, high); ESCR_CLEAR(escr); if (stag == 0) { ESCR_SET_USR_0(escr, counter_config[ctr].user); @@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } ESCR_SET_EVENT_SELECT(escr, ev->event_select); ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); - ESCR_WRITE(escr, high, ev, i); + wrmsr(ev->bindings[i].escr_address, escr, high); /* modify CCCR */ - CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); + rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, + cccr, high); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); @@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) CCCR_SET_PMI_OVF_0(cccr); else CCCR_SET_PMI_OVF_1(cccr); - CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); + wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, + cccr, high); return; } } @@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } -static void p4_setup_ctrs(struct op_msrs const * const msrs) +static void p4_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; @@ -563,8 +558,8 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) } /* clear the cccrs we will use */ - for (i = 0 ; i < num_counters ; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + for (i = 0; i < num_counters; i++) { + if (unlikely(!msrs->controls[i].addr)) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); @@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->controls[i].addr)) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ - for (i = 0 ; i < num_counters ; ++i) { - if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { + for (i = 0; i < num_counters; ++i) { + if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); - CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); + wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, + -(u64)counter_config[i].count); } else { reset_value[i] = 0; } @@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, real = VIRT_CTR(stag, i); - CCCR_READ(low, high, real); - CTR_READ(ctr, high, real); - if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { + rdmsr(p4_counters[real].cccr_address, low, high); + rdmsr(p4_counters[real].counter_address, ctr, high); + if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], real); + wrmsrl(p4_counters[real].counter_address, + -(u64)reset_value[i]); CCCR_CLEAR_OVF(low); - CCCR_WRITE(low, high, real); - CTR_WRITE(reset_value[i], real); + wrmsr(p4_counters[real].cccr_address, low, high); + wrmsrl(p4_counters[real].counter_address, + -(u64)reset_value[i]); } } @@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_ENABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } @@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CCCR_READ(low, high, VIRT_CTR(stag, i)); + rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_DISABLE(low); - CCCR_WRITE(low, high, VIRT_CTR(stag, i)); + wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } @@ -680,8 +678,8 @@ static void p4_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) + for (i = 0; i < num_counters; ++i) { + if (msrs->counters[i].addr) release_perfctr_nmi(msrs->counters[i].addr); } /* @@ -689,15 +687,15 @@ static void p4_shutdown(struct op_msrs const * const msrs) * conjunction with the counter registers (hence the starting offset). * This saves a few bits. */ - for (i = num_counters ; i < num_controls ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) + for (i = num_counters; i < num_controls; ++i) { + if (msrs->controls[i].addr) release_evntsel_nmi(msrs->controls[i].addr); } } #ifdef CONFIG_SMP -struct op_x86_model_spec const op_p4_ht2_spec = { +struct op_x86_model_spec op_p4_ht2_spec = { .num_counters = NUM_COUNTERS_HT2, .num_controls = NUM_CONTROLS_HT2, .fill_in_addresses = &p4_fill_in_addresses, @@ -709,7 +707,7 @@ struct op_x86_model_spec const op_p4_ht2_spec = { }; #endif -struct op_x86_model_spec const op_p4_spec = { +struct op_x86_model_spec op_p4_spec = { .num_counters = NUM_COUNTERS_NON_HT, .num_controls = NUM_CONTROLS_NON_HT, .fill_in_addresses = &p4_fill_in_addresses, diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 4da7230b3d1..4899215999d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -10,6 +10,7 @@ * @author Philippe Elie * @author Graydon Hoare * @author Andi Kleen + * @author Robert Richter <robert.richter@amd.com> */ #include <linux/oprofile.h> @@ -18,7 +19,6 @@ #include <asm/msr.h> #include <asm/apic.h> #include <asm/nmi.h> -#include <asm/perf_counter.h> #include "op_x86_model.h" #include "op_counter.h" @@ -26,20 +26,7 @@ static int num_counters = 2; static int counter_width = 32; -#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) - -#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) -#define CTRL_WRITE(l, h, msrs, c) do {wrmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) -#define CTRL_SET_ACTIVE(n) (n |= (1<<22)) -#define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) -#define CTRL_CLEAR(x) (x &= (1<<21)) -#define CTRL_SET_ENABLE(val) (val |= 1<<20) -#define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) -#define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) -#define CTRL_SET_UM(val, m) (val |= (m << 8)) -#define CTRL_SET_EVENT(val, e) (val |= e) +#define MSR_PPRO_EVENTSEL_RESERVED ((0xFFFFFFFFULL<<32)|(1ULL<<21)) static u64 *reset_value; @@ -63,9 +50,10 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) } -static void ppro_setup_ctrs(struct op_msrs const * const msrs) +static void ppro_setup_ctrs(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; if (!reset_value) { @@ -93,36 +81,30 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) } /* clear all counters */ - for (i = 0 ; i < num_counters; ++i) { - if (unlikely(!CTRL_IS_RESERVED(msrs, i))) + for (i = 0; i < num_counters; ++i) { + if (unlikely(!msrs->controls[i].addr)) continue; - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + wrmsrl(msrs->controls[i].addr, val); } /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < num_counters; ++i) { - if (unlikely(!CTR_IS_RESERVED(msrs, i))) + if (unlikely(!msrs->counters[i].addr)) continue; wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < num_counters; ++i) { - if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { + if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; - wrmsrl(msrs->counters[i].addr, -reset_value[i]); - - CTRL_READ(low, high, msrs, i); - CTRL_CLEAR(low); - CTRL_SET_ENABLE(low); - CTRL_SET_USR(low, counter_config[i].user); - CTRL_SET_KERN(low, counter_config[i].kernel); - CTRL_SET_UM(low, counter_config[i].unit_mask); - CTRL_SET_EVENT(low, counter_config[i].event); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val &= model->reserved; + val |= op_x86_get_ctrl(model, &counter_config[i]); + wrmsrl(msrs->controls[i].addr, val); } else { reset_value[i] = 0; } @@ -143,14 +125,14 @@ static int ppro_check_ctrs(struct pt_regs * const regs, if (unlikely(!reset_value)) goto out; - for (i = 0 ; i < num_counters; ++i) { + for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; rdmsrl(msrs->counters[i].addr, val); - if (CTR_OVERFLOWED(val)) { - oprofile_add_sample(regs, i); - wrmsrl(msrs->counters[i].addr, -reset_value[i]); - } + if (val & (1ULL << (counter_width - 1))) + continue; + oprofile_add_sample(regs, i); + wrmsrl(msrs->counters[i].addr, -reset_value[i]); } out: @@ -171,16 +153,16 @@ out: static void ppro_start(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; if (!reset_value) return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { - CTRL_READ(low, high, msrs, i); - CTRL_SET_ACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } } @@ -188,7 +170,7 @@ static void ppro_start(struct op_msrs const * const msrs) static void ppro_stop(struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; if (!reset_value) @@ -196,9 +178,9 @@ static void ppro_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTRL_READ(low, high, msrs, i); - CTRL_SET_INACTIVE(low); - CTRL_WRITE(low, high, msrs, i); + rdmsrl(msrs->controls[i].addr, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(msrs->controls[i].addr, val); } } @@ -206,12 +188,12 @@ static void ppro_shutdown(struct op_msrs const * const msrs) { int i; - for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs, i)) + for (i = 0; i < num_counters; ++i) { + if (msrs->counters[i].addr) release_perfctr_nmi(MSR_P6_PERFCTR0 + i); } - for (i = 0 ; i < num_counters ; ++i) { - if (CTRL_IS_RESERVED(msrs, i)) + for (i = 0; i < num_counters; ++i) { + if (msrs->controls[i].addr) release_evntsel_nmi(MSR_P6_EVNTSEL0 + i); } if (reset_value) { @@ -222,8 +204,9 @@ static void ppro_shutdown(struct op_msrs const * const msrs) struct op_x86_model_spec op_ppro_spec = { - .num_counters = 2, /* can be overriden */ - .num_controls = 2, /* dito */ + .num_counters = 2, + .num_controls = 2, + .reserved = MSR_PPRO_EVENTSEL_RESERVED, .fill_in_addresses = &ppro_fill_in_addresses, .setup_ctrs = &ppro_setup_ctrs, .check_ctrs = &ppro_check_ctrs, @@ -241,7 +224,7 @@ struct op_x86_model_spec op_ppro_spec = { * the specific CPU. */ -void arch_perfmon_setup_counters(void) +static void arch_perfmon_setup_counters(void) { union cpuid10_eax eax; @@ -259,11 +242,17 @@ void arch_perfmon_setup_counters(void) op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; - op_ppro_spec.num_counters = num_counters; - op_ppro_spec.num_controls = num_counters; +} + +static int arch_perfmon_init(struct oprofile_operations *ignore) +{ + arch_perfmon_setup_counters(); + return 0; } struct op_x86_model_spec op_arch_perfmon_spec = { + .reserved = MSR_PPRO_EVENTSEL_RESERVED, + .init = &arch_perfmon_init, /* num_counters/num_controls filled in at runtime */ .fill_in_addresses = &ppro_fill_in_addresses, /* user space does the cpuid check for available events */ diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 825e79064d6..b83776180c7 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -6,51 +6,66 @@ * @remark Read the file COPYING * * @author Graydon Hoare + * @author Robert Richter <robert.richter@amd.com> */ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H -struct op_saved_msr { - unsigned int high; - unsigned int low; -}; +#include <asm/types.h> +#include <asm/perf_counter.h> struct op_msr { - unsigned long addr; - struct op_saved_msr saved; + unsigned long addr; + u64 saved; }; struct op_msrs { struct op_msr *counters; struct op_msr *controls; + struct op_msr *multiplex; }; struct pt_regs; +struct oprofile_operations; + /* The model vtable abstracts the differences between * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { - int (*init)(struct oprofile_operations *ops); - void (*exit)(void); - unsigned int num_counters; - unsigned int num_controls; - void (*fill_in_addresses)(struct op_msrs * const msrs); - void (*setup_ctrs)(struct op_msrs const * const msrs); - int (*check_ctrs)(struct pt_regs * const regs, - struct op_msrs const * const msrs); - void (*start)(struct op_msrs const * const msrs); - void (*stop)(struct op_msrs const * const msrs); - void (*shutdown)(struct op_msrs const * const msrs); + unsigned int num_counters; + unsigned int num_controls; + unsigned int num_virt_counters; + u64 reserved; + u16 event_mask; + int (*init)(struct oprofile_operations *ops); + void (*exit)(void); + void (*fill_in_addresses)(struct op_msrs * const msrs); + void (*setup_ctrs)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); + int (*check_ctrs)(struct pt_regs * const regs, + struct op_msrs const * const msrs); + void (*start)(struct op_msrs const * const msrs); + void (*stop)(struct op_msrs const * const msrs); + void (*shutdown)(struct op_msrs const * const msrs); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + void (*switch_ctrl)(struct op_x86_model_spec const *model, + struct op_msrs const * const msrs); +#endif }; +struct op_counter_config; + +extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, + struct op_counter_config *counter_config); +extern int op_x86_phys_to_virt(int phys); +extern int op_x86_virt_to_phys(int virt); + extern struct op_x86_model_spec op_ppro_spec; -extern struct op_x86_model_spec const op_p4_spec; -extern struct op_x86_model_spec const op_p4_ht2_spec; -extern struct op_x86_model_spec const op_amd_spec; +extern struct op_x86_model_spec op_p4_spec; +extern struct op_x86_model_spec op_p4_ht2_spec; +extern struct op_x86_model_spec op_amd_spec; extern struct op_x86_model_spec op_arch_perfmon_spec; -extern void arch_perfmon_setup_counters(void); - #endif /* OP_X86_MODEL_H */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index e90540a46a0..eb33aaa8415 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -215,6 +215,7 @@ static __init void xen_init_cpuid_mask(void) (1 << X86_FEATURE_ACPI)); /* disable ACPI */ ax = 1; + cx = 0; xen_cpuid(&ax, &bx, &cx, &dx); /* cpuid claims we support xsave; try enabling it to see what happens */ @@ -1059,6 +1060,7 @@ asmlinkage void __init xen_start_kernel(void) /* set up basic CPUID stuff */ cpu_detect(&new_cpu_data); new_cpu_data.hard_math = 1; + new_cpu_data.wp_works_ok = 1; new_cpu_data.x86_capability[0] = cpuid_edx(1); #endif |