diff options
280 files changed, 11642 insertions, 7876 deletions
diff --git a/Documentation/Smack.txt b/Documentation/Smack.txt index 629c92e9978..34614b4c708 100644 --- a/Documentation/Smack.txt +++ b/Documentation/Smack.txt @@ -184,8 +184,9 @@ length. Single character labels using special characters, that being anything other than a letter or digit, are reserved for use by the Smack development team. Smack labels are unstructured, case sensitive, and the only operation ever performed on them is comparison for equality. Smack labels cannot -contain unprintable characters or the "/" (slash) character. Smack labels -cannot begin with a '-', which is reserved for special options. +contain unprintable characters, the "/" (slash), the "\" (backslash), the "'" +(quote) and '"' (double-quote) characters. +Smack labels cannot begin with a '-', which is reserved for special options. There are some predefined labels: @@ -523,3 +524,18 @@ Smack supports some mount options: These mount options apply to all file system types. +Smack auditing + +If you want Smack auditing of security events, you need to set CONFIG_AUDIT +in your kernel configuration. +By default, all denied events will be audited. You can change this behavior by +writing a single character to the /smack/logging file : +0 : no logging +1 : log denied (default) +2 : log accepted +3 : log denied & accepted + +Events are logged as 'key=value' pairs, for each event you at least will get +the subjet, the object, the rights requested, the action, the kernel function +that triggered the event, plus other pairs depending on the type of event +audited. diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4a3c2209a12..72d3bf08d79 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -928,6 +928,12 @@ and is between 256 and 4096 characters. It is defined in the file Formt: { "sha1" | "md5" } default: "sha1" + ima_tcb [IMA] + Load a policy which meets the needs of the Trusted + Computing Base. This means IMA will measure all + programs exec'd, files mmap'd for exec, and all files + opened for read by uid=0. + in2000= [HW,SCSI] See header of drivers/scsi/in2000.c. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index f11ca7979fa..322a00bb99d 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -32,6 +32,7 @@ show up in /proc/sys/kernel: - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] - modprobe ==> Documentation/debugging-modules.txt +- modules_disabled - msgmax - msgmnb - msgmni @@ -184,6 +185,16 @@ kernel stack. ============================================================== +modules_disabled: + +A toggle value indicating if modules are allowed to be loaded +in an otherwise modular kernel. This toggle defaults to off +(0), but can be set true (1). Once true, modules can be +neither loaded nor unloaded, and the toggle cannot be set back +to false. + +============================================================== + osrelease, ostype & version: # cat osrelease diff --git a/MAINTAINERS b/MAINTAINERS index cf4abddfc8a..84285b5ba35 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -71,7 +71,7 @@ P: Person M: Mail patches to L: Mailing list that is relevant to this area W: Web-page with status/info -T: SCM tree type and location. Type is one of: git, hg, quilt. +T: SCM tree type and location. Type is one of: git, hg, quilt, stgit. S: Status, one of the following: Supported: Someone is actually paid to look after this. @@ -159,7 +159,8 @@ F: drivers/net/r8169.c 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER L: linux-serial@vger.kernel.org W: http://serial.sourceforge.net -S: Orphan +M: alan@lxorguk.ukuu.org.uk +S: Odd Fixes F: drivers/serial/8250* F: include/linux/serial_8250.h @@ -5629,6 +5630,7 @@ P: Alan Cox M: alan@lxorguk.ukuu.org.uk L: linux-kernel@vger.kernel.org S: Maintained +T: stgit http://zeniv.linux.org.uk/~alan/ttydev/ TULIP NETWORK DRIVERS P: Grant Grundler diff --git a/arch/arm/plat-mxc/include/mach/imx-uart.h b/arch/arm/plat-mxc/include/mach/imx-uart.h index 599217b2e13..f9bd17dd8dd 100644 --- a/arch/arm/plat-mxc/include/mach/imx-uart.h +++ b/arch/arm/plat-mxc/include/mach/imx-uart.h @@ -20,11 +20,16 @@ #define ASMARM_ARCH_UART_H #define IMXUART_HAVE_RTSCTS (1<<0) +#define IMXUART_IRDA (1<<1) struct imxuart_platform_data { int (*init)(struct platform_device *pdev); int (*exit)(struct platform_device *pdev); unsigned int flags; + void (*irda_enable)(int enable); + unsigned int irda_inv_rx:1; + unsigned int irda_inv_tx:1; + unsigned short transceiver_delay; }; #endif diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index 9d1552a9ee2..8a5bd7a9c6f 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -6,6 +6,7 @@ config FRV bool default y select HAVE_IDE + select HAVE_ARCH_TRACEHOOK config ZONE_DMA bool diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h index 287f6f697ce..50ae91b2967 100644 --- a/arch/frv/include/asm/bitops.h +++ b/arch/frv/include/asm/bitops.h @@ -112,7 +112,7 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig #define atomic_clear_mask(mask, v) atomic_test_and_ANDNOT_mask((mask), (v)) #define atomic_set_mask(mask, v) atomic_test_and_OR_mask((mask), (v)) -static inline int test_and_clear_bit(int nr, volatile void *addr) +static inline int test_and_clear_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *ptr = addr; unsigned long mask = 1UL << (nr & 31); @@ -120,7 +120,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0; } -static inline int test_and_set_bit(int nr, volatile void *addr) +static inline int test_and_set_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *ptr = addr; unsigned long mask = 1UL << (nr & 31); @@ -128,7 +128,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0; } -static inline int test_and_change_bit(int nr, volatile void *addr) +static inline int test_and_change_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *ptr = addr; unsigned long mask = 1UL << (nr & 31); @@ -136,22 +136,22 @@ static inline int test_and_change_bit(int nr, volatile void *addr) return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0; } -static inline void clear_bit(int nr, volatile void *addr) +static inline void clear_bit(unsigned long nr, volatile void *addr) { test_and_clear_bit(nr, addr); } -static inline void set_bit(int nr, volatile void *addr) +static inline void set_bit(unsigned long nr, volatile void *addr) { test_and_set_bit(nr, addr); } -static inline void change_bit(int nr, volatile void * addr) +static inline void change_bit(unsigned long nr, volatile void *addr) { test_and_change_bit(nr, addr); } -static inline void __clear_bit(int nr, volatile void * addr) +static inline void __clear_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask; @@ -161,7 +161,7 @@ static inline void __clear_bit(int nr, volatile void * addr) *a &= ~mask; } -static inline void __set_bit(int nr, volatile void * addr) +static inline void __set_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask; @@ -171,7 +171,7 @@ static inline void __set_bit(int nr, volatile void * addr) *a |= mask; } -static inline void __change_bit(int nr, volatile void *addr) +static inline void __change_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask; @@ -181,7 +181,7 @@ static inline void __change_bit(int nr, volatile void *addr) *a ^= mask; } -static inline int __test_and_clear_bit(int nr, volatile void * addr) +static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask, retval; @@ -193,7 +193,7 @@ static inline int __test_and_clear_bit(int nr, volatile void * addr) return retval; } -static inline int __test_and_set_bit(int nr, volatile void * addr) +static inline int __test_and_set_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask, retval; @@ -205,7 +205,7 @@ static inline int __test_and_set_bit(int nr, volatile void * addr) return retval; } -static inline int __test_and_change_bit(int nr, volatile void * addr) +static inline int __test_and_change_bit(unsigned long nr, volatile void *addr) { volatile unsigned long *a = addr; int mask, retval; @@ -220,12 +220,13 @@ static inline int __test_and_change_bit(int nr, volatile void * addr) /* * This routine doesn't need to be atomic. */ -static inline int __constant_test_bit(int nr, const volatile void * addr) +static inline int +__constant_test_bit(unsigned long nr, const volatile void *addr) { return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; } -static inline int __test_bit(int nr, const volatile void * addr) +static inline int __test_bit(unsigned long nr, const volatile void *addr) { int * a = (int *) addr; int mask; diff --git a/arch/frv/include/asm/elf.h b/arch/frv/include/asm/elf.h index 7279ec07d62..7bbf6e47f8c 100644 --- a/arch/frv/include/asm/elf.h +++ b/arch/frv/include/asm/elf.h @@ -116,6 +116,7 @@ do { \ } while(0) #define USE_ELF_CORE_DUMP +#define CORE_DUMP_USE_REGSET #define ELF_FDPIC_CORE_EFLAGS EF_FRV_FDPIC #define ELF_EXEC_PAGESIZE 16384 diff --git a/arch/frv/include/asm/pci.h b/arch/frv/include/asm/pci.h index 585d9b49949..cc685e60b0f 100644 --- a/arch/frv/include/asm/pci.h +++ b/arch/frv/include/asm/pci.h @@ -87,8 +87,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev, dma_addr_t dma_handle, size_t size, int direction) { - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle), (unsigned long)bus_to_virt(dma_handle) + size); @@ -105,9 +104,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev, int nelems, int direction) { int i; - - if (direction == PCI_DMA_NONE) - BUG(); + BUG_ON(direction == PCI_DMA_NONE); for (i = 0; i < nelems; i++) frv_cache_wback_inv(sg_dma_address(&sg[i]), diff --git a/arch/frv/include/asm/ptrace.h b/arch/frv/include/asm/ptrace.h index cf6934012b6..a54b535c9e4 100644 --- a/arch/frv/include/asm/ptrace.h +++ b/arch/frv/include/asm/ptrace.h @@ -65,6 +65,8 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ +struct task_struct; + /* * we dedicate GR28 to keeping a pointer to the current exception frame * - gr28 is destroyed on entry to the kernel from userspace @@ -73,11 +75,18 @@ register struct pt_regs *__frame asm("gr28"); #define user_mode(regs) (!((regs)->psr & PSR_S)) #define instruction_pointer(regs) ((regs)->pc) +#define user_stack_pointer(regs) ((regs)->sp) extern unsigned long user_stack(const struct pt_regs *); extern void show_regs(struct pt_regs *); #define profile_pc(regs) ((regs)->pc) -#endif + +#define task_pt_regs(task) ((task)->thread.frame0) + +#define arch_has_single_step() (1) +extern void user_enable_single_step(struct task_struct *); +extern void user_disable_single_step(struct task_struct *); #endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ #endif /* _ASM_PTRACE_H */ diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h new file mode 100644 index 00000000000..70689eb29b9 --- /dev/null +++ b/arch/frv/include/asm/syscall.h @@ -0,0 +1,123 @@ +/* syscall parameter access functions + * + * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H + +#include <linux/err.h> +#include <asm/ptrace.h> + +/* + * Get the system call number or -1 + */ +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->syscallno; +} + +/* + * Restore the clobbered GR8 register + * (1st syscall arg was overwritten with syscall return or error) + */ +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->gr8 = regs->orig_gr8; +} + +/* + * See if the syscall return value is an error, returning it if it is and 0 if + * not + */ +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0; +} + +/* + * Get the syscall return value + */ +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->gr8; +} + +/* + * Set the syscall return value + */ +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + if (error) + regs->gr8 = -error; + else + regs->gr8 = val; +} + +/* + * Retrieve the system call arguments + */ +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + /* + * Do this simply for now. If we need to start supporting + * fetching arguments from arbitrary indices, this will need some + * extra logic. Presently there are no in-tree users that depend + * on this behaviour. + */ + BUG_ON(i); + + /* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */ + switch (n) { + case 6: args[5] = regs->gr13; + case 5: args[4] = regs->gr12; + case 4: args[3] = regs->gr11; + case 3: args[2] = regs->gr10; + case 2: args[1] = regs->gr9; + case 1: args[0] = regs->gr8; + break; + default: + BUG(); + } +} + +/* + * Alter the system call arguments + */ +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + /* Same note as above applies */ + BUG_ON(i); + + switch (n) { + case 6: regs->gr13 = args[5]; + case 5: regs->gr12 = args[4]; + case 4: regs->gr11 = args[3]; + case 3: regs->gr10 = args[2]; + case 2: regs->gr9 = args[1]; + case 1: regs->gr8 = args[0]; + break; + default: + BUG(); + } +} + +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h index bb53ab753ff..e8a5ed7be02 100644 --- a/arch/frv/include/asm/thread_info.h +++ b/arch/frv/include/asm/thread_info.h @@ -109,20 +109,20 @@ register struct thread_info *__current_thread_info asm("gr15"); * - other flags in MSW */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ -#define TIF_SIGPENDING 1 /* signal pending */ -#define TIF_NEED_RESCHED 2 /* rescheduling necessary */ -#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */ -#define TIF_IRET 4 /* return with iret */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ +#define TIF_SIGPENDING 2 /* signal pending */ +#define TIF_NEED_RESCHED 3 /* rescheduling necessary */ +#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_MEMDIE 17 /* OOM killer killed process */ #define TIF_FREEZE 18 /* freezing for suspend */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) -#define _TIF_IRET (1 << TIF_IRET) #define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_FREEZE (1 << TIF_FREEZE) diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 1da523b3298..356e0e327a8 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -886,7 +886,6 @@ system_call: bnc icc0,#0,__syscall_badsys ldi @(gr15,#TI_FLAGS),gr4 - ori gr4,#_TIF_SYSCALL_TRACE,gr4 andicc gr4,#_TIF_SYSCALL_TRACE,gr0,icc0 bne icc0,#0,__syscall_trace_entry @@ -1150,11 +1149,10 @@ __entry_work_notifysig: # perform syscall entry tracing __syscall_trace_entry: LEDS 0x6320 - setlos.p #0,gr8 - call do_syscall_trace + call syscall_trace_entry - ldi @(gr28,#REG_SYSCALLNO),gr7 - lddi @(gr28,#REG_GR(8)) ,gr8 + lddi.p @(gr28,#REG_GR(8)) ,gr8 + ori gr8,#0,gr7 ; syscall_trace_entry() returned new syscallno lddi @(gr28,#REG_GR(10)),gr10 lddi.p @(gr28,#REG_GR(12)),gr12 @@ -1169,11 +1167,10 @@ __syscall_exit_work: beq icc0,#1,__entry_work_pending movsg psr,gr23 - andi gr23,#~PSR_PIL,gr23 ; could let do_syscall_trace() call schedule() + andi gr23,#~PSR_PIL,gr23 ; could let syscall_trace_exit() call schedule() movgs gr23,psr - setlos.p #1,gr8 - call do_syscall_trace + call syscall_trace_exit bra __entry_resume_userspace __syscall_badsys: diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c index 5e7d401d21e..60eeed3694c 100644 --- a/arch/frv/kernel/ptrace.c +++ b/arch/frv/kernel/ptrace.c @@ -19,6 +19,9 @@ #include <linux/user.h> #include <linux/security.h> #include <linux/signal.h> +#include <linux/regset.h> +#include <linux/elf.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -33,6 +36,169 @@ */ /* + * retrieve the contents of FRV userspace general registers + */ +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct user_int_regs *iregs = &target->thread.user->i; + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + iregs, 0, sizeof(*iregs)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(*iregs), -1); +} + +/* + * update the contents of the FRV userspace general registers + */ +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_int_regs *iregs = &target->thread.user->i; + unsigned int offs_gr0, offs_gr1; + int ret; + + /* not allowed to set PSR or __status */ + if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) && + pos + count > offsetof(struct user_int_regs, psr)) + return -EIO; + + if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) && + pos + count > offsetof(struct user_int_regs, __status)) + return -EIO; + + /* set the control regs */ + offs_gr0 = offsetof(struct user_int_regs, gr[0]); + offs_gr1 = offsetof(struct user_int_regs, gr[1]); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + iregs, 0, offs_gr0); + if (ret < 0) + return ret; + + /* skip GR0/TBR */ + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + offs_gr0, offs_gr1); + if (ret < 0) + return ret; + + /* set the general regs */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &iregs->gr[1], offs_gr1, sizeof(*iregs)); + if (ret < 0) + return ret; + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + sizeof(*iregs), -1); +} + +/* + * retrieve the contents of FRV userspace FP/Media registers + */ +static int fpmregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + const struct user_fpmedia_regs *fpregs = &target->thread.user->f; + int ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + fpregs, 0, sizeof(*fpregs)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(*fpregs), -1); +} + +/* + * update the contents of the FRV userspace FP/Media registers + */ +static int fpmregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_fpmedia_regs *fpregs = &target->thread.user->f; + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + fpregs, 0, sizeof(*fpregs)); + if (ret < 0) + return ret; + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + sizeof(*fpregs), -1); +} + +/* + * determine if the FP/Media registers have actually been used + */ +static int fpmregs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return tsk_used_math(target) ? regset->n : 0; +} + +/* + * Define the register sets available on the FRV under Linux + */ +enum frv_regset { + REGSET_GENERAL, + REGSET_FPMEDIA, +}; + +static const struct user_regset frv_regsets[] = { + /* + * General register format is: + * PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8 + * GNER0-1, IACC0, TBR, GR1-63 + */ + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long), + .get = genregs_get, + .set = genregs_set, + }, + /* + * FPU/Media register format is: + * FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR + */ + [REGSET_FPMEDIA] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fpmedia_regs) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = fpmregs_get, + .set = fpmregs_set, + .active = fpmregs_active, + }, +}; + +static const struct user_regset_view user_frv_native_view = { + .name = "frv", + .e_machine = EM_FRV, + .regsets = frv_regsets, + .n = ARRAY_SIZE(frv_regsets), +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_frv_native_view; +} + +/* * Get contents of register REGNO in task TASK. */ static inline long get_reg(struct task_struct *task, int regno) @@ -69,40 +235,23 @@ static inline int put_reg(struct task_struct *task, int regno, } /* - * check that an address falls within the bounds of the target process's memory - * mappings - */ -static inline int is_user_addr_valid(struct task_struct *child, - unsigned long start, unsigned long len) -{ -#ifdef CONFIG_MMU - if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start) - return -EIO; - return 0; -#else - struct vm_area_struct *vma; - - vma = find_vma(child->mm, start); - if (vma && start >= vma->vm_start && start + len <= vma->vm_end) - return 0; - - return -EIO; -#endif -} - -/* * Called by kernel/ptrace.c when detaching.. * * Control h/w single stepping */ -void ptrace_disable(struct task_struct *child) +void user_enable_single_step(struct task_struct *child) +{ + child->thread.frame0->__status |= REG__STATUS_STEP; +} + +void user_disable_single_step(struct task_struct *child) { child->thread.frame0->__status &= ~REG__STATUS_STEP; } -void ptrace_enable(struct task_struct *child) +void ptrace_disable(struct task_struct *child) { - child->thread.frame0->__status |= REG__STATUS_STEP; + user_disable_single_step(child); } long arch_ptrace(struct task_struct *child, long request, long addr, long data) @@ -111,15 +260,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) int ret; switch (request) { - /* when I and D space are separate, these will need to be fixed. */ - case PTRACE_PEEKTEXT: /* read word at location addr. */ - case PTRACE_PEEKDATA: - ret = -EIO; - if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) - break; - ret = generic_ptrace_peekdata(child, addr, data); - break; - /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { tmp = 0; @@ -163,15 +303,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; } - /* when I and D space are separate, this will have to be fixed. */ - case PTRACE_POKETEXT: /* write the word at location addr. */ - case PTRACE_POKEDATA: - ret = -EIO; - if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0) - break; - ret = generic_ptrace_pokedata(child, addr, data); - break; - case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & 3) || addr < 0) @@ -179,7 +310,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ret = 0; switch (addr >> 2) { - case 0 ... PT__END-1: + case 0 ... PT__END - 1: ret = put_reg(child, addr >> 2, data); break; @@ -189,95 +320,29 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } break; - case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - ptrace_disable(child); - wake_up_process(child); - ret = 0; - break; - - /* make the child exit. Best I can do is send it a sigkill. - * perhaps it should be put in the status that it wants to - * exit. - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - ptrace_disable(child); - wake_up_process(child); - break; - - case PTRACE_SINGLESTEP: /* set the trap flag. */ - ret = -EIO; - if (!valid_signal(data)) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - ptrace_enable(child); - child->exit_code = data; - wake_up_process(child); - ret = 0; - break; - - case PTRACE_DETACH: /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - - case PTRACE_GETREGS: { /* Get all integer regs from the child. */ - int i; - for (i = 0; i < PT__GPEND; i++) { - tmp = get_reg(child, i); - if (put_user(tmp, (unsigned long *) data)) { - ret = -EFAULT; - break; - } - data += sizeof(long); - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all integer regs in the child. */ - int i; - for (i = 0; i < PT__GPEND; i++) { - if (get_user(tmp, (unsigned long *) data)) { - ret = -EFAULT; - break; - } - put_reg(child, i, tmp); - data += sizeof(long); - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */ - ret = 0; - if (copy_to_user((void *) data, - &child->thread.user->f, - sizeof(child->thread.user->f))) - ret = -EFAULT; - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */ - ret = 0; - if (copy_from_user(&child->thread.user->f, - (void *) data, - sizeof(child->thread.user->f))) - ret = -EFAULT; - break; - } + case PTRACE_GETREGS: /* Get all integer regs from the child. */ + return copy_regset_to_user(child, &user_frv_native_view, + REGSET_GENERAL, + 0, sizeof(child->thread.user->i), + (void __user *)data); + + case PTRACE_SETREGS: /* Set all integer regs in the child. */ + return copy_regset_from_user(child, &user_frv_native_view, + REGSET_GENERAL, + 0, sizeof(child->thread.user->i), + (const void __user *)data); + + case PTRACE_GETFPREGS: /* Get the child FP/Media state. */ + return copy_regset_to_user(child, &user_frv_native_view, + REGSET_FPMEDIA, + 0, sizeof(child->thread.user->f), + (void __user *)data); + + case PTRACE_SETFPREGS: /* Set the child FP/Media state. */ + return copy_regset_from_user(child, &user_frv_native_view, + REGSET_FPMEDIA, + 0, sizeof(child->thread.user->f), + (const void __user *)data); case PTRACE_GETFDPIC: tmp = 0; @@ -300,414 +365,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; default: - ret = -EIO; + ret = ptrace_request(child, request, addr, data); break; } return ret; } -int __nongprelbss kstrace; - -static const struct { - const char *name; - unsigned argmask; -} __syscall_name_table[NR_syscalls] = { - [0] = { "restart_syscall" }, - [1] = { "exit", 0x000001 }, - [2] = { "fork", 0xffffff }, - [3] = { "read", 0x000141 }, - [4] = { "write", 0x000141 }, - [5] = { "open", 0x000235 }, - [6] = { "close", 0x000001 }, - [7] = { "waitpid", 0x000141 }, - [8] = { "creat", 0x000025 }, - [9] = { "link", 0x000055 }, - [10] = { "unlink", 0x000005 }, - [11] = { "execve", 0x000445 }, - [12] = { "chdir", 0x000005 }, - [13] = { "time", 0x000004 }, - [14] = { "mknod", 0x000325 }, - [15] = { "chmod", 0x000025 }, - [16] = { "lchown", 0x000025 }, - [17] = { "break" }, - [18] = { "oldstat", 0x000045 }, - [19] = { "lseek", 0x000131 }, - [20] = { "getpid", 0xffffff }, - [21] = { "mount", 0x043555 }, - [22] = { "umount", 0x000005 }, - [23] = { "setuid", 0x000001 }, - [24] = { "getuid", 0xffffff }, - [25] = { "stime", 0x000004 }, - [26] = { "ptrace", 0x004413 }, - [27] = { "alarm", 0x000001 }, - [28] = { "oldfstat", 0x000041 }, - [29] = { "pause", 0xffffff }, - [30] = { "utime", 0x000045 }, - [31] = { "stty" }, - [32] = { "gtty" }, - [33] = { "access", 0x000025 }, - [34] = { "nice", 0x000001 }, - [35] = { "ftime" }, - [36] = { "sync", 0xffffff }, - [37] = { "kill", 0x000011 }, - [38] = { "rename", 0x000055 }, - [39] = { "mkdir", 0x000025 }, - [40] = { "rmdir", 0x000005 }, - [41] = { "dup", 0x000001 }, - [42] = { "pipe", 0x000004 }, - [43] = { "times", 0x000004 }, - [44] = { "prof" }, - [45] = { "brk", 0x000004 }, - [46] = { "setgid", 0x000001 }, - [47] = { "getgid", 0xffffff }, - [48] = { "signal", 0x000041 }, - [49] = { "geteuid", 0xffffff }, - [50] = { "getegid", 0xffffff }, - [51] = { "acct", 0x000005 }, - [52] = { "umount2", 0x000035 }, - [53] = { "lock" }, - [54] = { "ioctl", 0x000331 }, - [55] = { "fcntl", 0x000331 }, - [56] = { "mpx" }, - [57] = { "setpgid", 0x000011 }, - [58] = { "ulimit" }, - [60] = { "umask", 0x000002 }, - [61] = { "chroot", 0x000005 }, - [62] = { "ustat", 0x000043 }, - [63] = { "dup2", 0x000011 }, - [64] = { "getppid", 0xffffff }, - [65] = { "getpgrp", 0xffffff }, - [66] = { "setsid", 0xffffff }, - [67] = { "sigaction" }, - [68] = { "sgetmask" }, - [69] = { "ssetmask" }, - [70] = { "setreuid" }, - [71] = { "setregid" }, - [72] = { "sigsuspend" }, - [73] = { "sigpending" }, - [74] = { "sethostname" }, - [75] = { "setrlimit" }, - [76] = { "getrlimit" }, - [77] = { "getrusage" }, - [78] = { "gettimeofday" }, - [79] = { "settimeofday" }, - [80] = { "getgroups" }, - [81] = { "setgroups" }, - [82] = { "select" }, - [83] = { "symlink" }, - [84] = { "oldlstat" }, - [85] = { "readlink" }, - [86] = { "uselib" }, - [87] = { "swapon" }, - [88] = { "reboot" }, - [89] = { "readdir" }, - [91] = { "munmap", 0x000034 }, - [92] = { "truncate" }, - [93] = { "ftruncate" }, - [94] = { "fchmod" }, - [95] = { "fchown" }, - [96] = { "getpriority" }, - [97] = { "setpriority" }, - [99] = { "statfs" }, - [100] = { "fstatfs" }, - [102] = { "socketcall" }, - [103] = { "syslog" }, - [104] = { "setitimer" }, - [105] = { "getitimer" }, - [106] = { "stat" }, - [107] = { "lstat" }, - [108] = { "fstat" }, - [111] = { "vhangup" }, - [114] = { "wait4" }, - [115] = { "swapoff" }, - [116] = { "sysinfo" }, - [117] = { "ipc" }, - [118] = { "fsync" }, - [119] = { "sigreturn" }, - [120] = { "clone" }, - [121] = { "setdomainname" }, - [122] = { "uname" }, - [123] = { "modify_ldt" }, - [123] = { "cacheflush" }, - [124] = { "adjtimex" }, - [125] = { "mprotect" }, - [126] = { "sigprocmask" }, - [127] = { "create_module" }, - [128] = { "init_module" }, - [129] = { "delete_module" }, - [130] = { "get_kernel_syms" }, - [131] = { "quotactl" }, - [132] = { "getpgid" }, - [133] = { "fchdir" }, - [134] = { "bdflush" }, - [135] = { "sysfs" }, - [136] = { "personality" }, - [137] = { "afs_syscall" }, - [138] = { "setfsuid" }, - [139] = { "setfsgid" }, - [140] = { "_llseek", 0x014331 }, - [141] = { "getdents" }, - [142] = { "_newselect", 0x000141 }, - [143] = { "flock" }, - [144] = { "msync" }, - [145] = { "readv" }, - [146] = { "writev" }, - [147] = { "getsid", 0x000001 }, - [148] = { "fdatasync", 0x000001 }, - [149] = { "_sysctl", 0x000004 }, - [150] = { "mlock" }, - [151] = { "munlock" }, - [152] = { "mlockall" }, - [153] = { "munlockall" }, - [154] = { "sched_setparam" }, - [155] = { "sched_getparam" }, - [156] = { "sched_setscheduler" }, - [157] = { "sched_getscheduler" }, - [158] = { "sched_yield" }, - [159] = { "sched_get_priority_max" }, - [160] = { "sched_get_priority_min" }, - [161] = { "sched_rr_get_interval" }, - [162] = { "nanosleep", 0x000044 }, - [163] = { "mremap" }, - [164] = { "setresuid" }, - [165] = { "getresuid" }, - [166] = { "vm86" }, - [167] = { "query_module" }, - [168] = { "poll" }, - [169] = { "nfsservctl" }, - [170] = { "setresgid" }, - [171] = { "getresgid" }, - [172] = { "prctl", 0x333331 }, - [173] = { "rt_sigreturn", 0xffffff }, - [174] = { "rt_sigaction", 0x001441 }, - [175] = { "rt_sigprocmask", 0x001441 }, - [176] = { "rt_sigpending", 0x000014 }, - [177] = { "rt_sigtimedwait", 0x001444 }, - [178] = { "rt_sigqueueinfo", 0x000411 }, - [179] = { "rt_sigsuspend", 0x000014 }, - [180] = { "pread", 0x003341 }, - [181] = { "pwrite", 0x003341 }, - [182] = { "chown", 0x000115 }, - [183] = { "getcwd" }, - [184] = { "capget" }, - [185] = { "capset" }, - [186] = { "sigaltstack" }, - [187] = { "sendfile" }, - [188] = { "getpmsg" }, - [189] = { "putpmsg" }, - [190] = { "vfork", 0xffffff }, - [191] = { "ugetrlimit" }, - [192] = { "mmap2", 0x313314 }, - [193] = { "truncate64" }, - [194] = { "ftruncate64" }, - [195] = { "stat64", 0x000045 }, - [196] = { "lstat64", 0x000045 }, - [197] = { "fstat64", 0x000041 }, - [198] = { "lchown32" }, - [199] = { "getuid32", 0xffffff }, - [200] = { "getgid32", 0xffffff }, - [201] = { "geteuid32", 0xffffff }, - [202] = { "getegid32", 0xffffff }, - [203] = { "setreuid32" }, - [204] = { "setregid32" }, - [205] = { "getgroups32" }, - [206] = { "setgroups32" }, - [207] = { "fchown32" }, - [208] = { "setresuid32" }, - [209] = { "getresuid32" }, - [210] = { "setresgid32" }, - [211] = { "getresgid32" }, - [212] = { "chown32" }, - [213] = { "setuid32" }, - [214] = { "setgid32" }, - [215] = { "setfsuid32" }, - [216] = { "setfsgid32" }, - [217] = { "pivot_root" }, - [218] = { "mincore" }, - [219] = { "madvise" }, - [220] = { "getdents64" }, - [221] = { "fcntl64" }, - [223] = { "security" }, - [224] = { "gettid" }, - [225] = { "readahead" }, - [226] = { "setxattr" }, - [227] = { "lsetxattr" }, - [228] = { "fsetxattr" }, - [229] = { "getxattr" }, - [230] = { "lgetxattr" }, - [231] = { "fgetxattr" }, - [232] = { "listxattr" }, - [233] = { "llistxattr" }, - [234] = { "flistxattr" }, - [235] = { "removexattr" }, - [236] = { "lremovexattr" }, - [237] = { "fremovexattr" }, - [238] = { "tkill" }, - [239] = { "sendfile64" }, - [240] = { "futex" }, - [241] = { "sched_setaffinity" }, - [242] = { "sched_getaffinity" }, - [243] = { "set_thread_area" }, - [244] = { "get_thread_area" }, - [245] = { "io_setup" }, - [246] = { "io_destroy" }, - [247] = { "io_getevents" }, - [248] = { "io_submit" }, - [249] = { "io_cancel" }, - [250] = { "fadvise64" }, - [252] = { "exit_group", 0x000001 }, - [253] = { "lookup_dcookie" }, - [254] = { "epoll_create" }, - [255] = { "epoll_ctl" }, - [256] = { "epoll_wait" }, - [257] = { "remap_file_pages" }, - [258] = { "set_tid_address" }, - [259] = { "timer_create" }, - [260] = { "timer_settime" }, - [261] = { "timer_gettime" }, - [262] = { "timer_getoverrun" }, - [263] = { "timer_delete" }, - [264] = { "clock_settime" }, - [265] = { "clock_gettime" }, - [266] = { "clock_getres" }, - [267] = { "clock_nanosleep" }, - [268] = { "statfs64" }, - [269] = { "fstatfs64" }, - [270] = { "tgkill" }, - [271] = { "utimes" }, - [272] = { "fadvise64_64" }, - [273] = { "vserver" }, - [274] = { "mbind" }, - [275] = { "get_mempolicy" }, - [276] = { "set_mempolicy" }, - [277] = { "mq_open" }, - [278] = { "mq_unlink" }, - [279] = { "mq_timedsend" }, - [280] = { "mq_timedreceive" }, - [281] = { "mq_notify" }, - [282] = { "mq_getsetattr" }, - [283] = { "sys_kexec_load" }, -}; - -asmlinkage void do_syscall_trace(int leaving) +/* + * handle tracing of system call entry + * - return the revised system call number or ULONG_MAX to cause ENOSYS + */ +asmlinkage unsigned long syscall_trace_entry(void) { -#if 0 - unsigned long *argp; - const char *name; - unsigned argmask; - char buffer[16]; - - if (!kstrace) - return; - - if (!current->mm) - return; - - if (__frame->gr7 == __NR_close) - return; - -#if 0 - if (__frame->gr7 != __NR_mmap2 && - __frame->gr7 != __NR_vfork && - __frame->gr7 != __NR_execve && - __frame->gr7 != __NR_exit) - return; -#endif - - argmask = 0; - name = NULL; - if (__frame->gr7 < NR_syscalls) { - name = __syscall_name_table[__frame->gr7].name; - argmask = __syscall_name_table[__frame->gr7].argmask; - } - if (!name) { - sprintf(buffer, "sys_%lx", __frame->gr7); - name = buffer; - } - - if (!leaving) { - if (!argmask) { - printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n", - current->pid, - name, - __frame->gr8, - __frame->gr9, - __frame->gr10, - __frame->gr11, - __frame->gr12, - __frame->gr13); - } - else if (argmask == 0xffffff) { - printk(KERN_CRIT "[%d] %s()\n", - current->pid, - name); - } - else { - printk(KERN_CRIT "[%d] %s(", - current->pid, - name); - - argp = &__frame->gr8; - - do { - switch (argmask & 0xf) { - case 1: - printk("%ld", (long) *argp); - break; - case 2: - printk("%lo", *argp); - break; - case 3: - printk("%lx", *argp); - break; - case 4: - printk("%p", (void *) *argp); - break; - case 5: - printk("\"%s\"", (char *) *argp); - break; - } - - argp++; - argmask >>= 4; - if (argmask) - printk(","); - - } while (argmask); - - printk(")\n"); - } - } - else { - if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096) - printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8); - else - printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8); + __frame->__status |= REG__STATUS_SYSC_ENTRY; + if (tracehook_report_syscall_entry(__frame)) { + /* tracing decided this syscall should not happen, so + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in + * __frame->syscallno + */ + return ULONG_MAX; } - return; -#endif - - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return; - - if (!(current->ptrace & PT_PTRACED)) - return; - /* we need to indicate entry or exit to strace */ - if (leaving) - __frame->__status |= REG__STATUS_SYSC_EXIT; - else - __frame->__status |= REG__STATUS_SYSC_ENTRY; - - ptrace_notify(SIGTRAP); + return __frame->syscallno; +} - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } +/* + * handle tracing of system call exit + */ +asmlinkage void syscall_trace_exit(void) +{ + __frame->__status |= REG__STATUS_SYSC_EXIT; + tracehook_report_syscall_exit(__frame, 0); } diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c index 3bdb368292a..4a7a62c6e78 100644 --- a/arch/frv/kernel/signal.c +++ b/arch/frv/kernel/signal.c @@ -21,6 +21,7 @@ #include <linux/unistd.h> #include <linux/personality.h> #include <linux/freezer.h> +#include <linux/tracehook.h> #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/cacheflush.h> @@ -516,6 +517,9 @@ static void do_signal(void) * clear the TIF_RESTORE_SIGMASK flag */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); + + tracehook_signal_handler(signr, &info, &ka, __frame, + test_thread_flag(TIF_SINGLESTEP)); } return; @@ -564,4 +568,10 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags) if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(); + /* deal with notification on about to resume userspace execution */ + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(__frame); + } + } /* end do_notify_resume() */ diff --git a/arch/frv/kernel/uaccess.c b/arch/frv/kernel/uaccess.c index 9fb771a20df..374f88d6cc0 100644 --- a/arch/frv/kernel/uaccess.c +++ b/arch/frv/kernel/uaccess.c @@ -23,8 +23,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count) char *p, ch; long err = -EFAULT; - if (count < 0) - BUG(); + BUG_ON(count < 0); p = dst; @@ -76,8 +75,7 @@ long strnlen_user(const char __user *src, long count) long err = 0; char ch; - if (count < 0) - BUG(); + BUG_ON(count < 0); #ifndef CONFIG_MMU if ((unsigned long) src < memory_start) diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index 52ff9aec799..4e1ba0b1544 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -116,8 +116,7 @@ EXPORT_SYMBOL(dma_free_coherent); dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size); @@ -151,8 +150,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, frv_cache_wback_inv(sg_dma_address(&sg[i]), sg_dma_address(&sg[i]) + sg_dma_len(&sg[i])); - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); return nents; } diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c index 3ddedebc4eb..45954f0813d 100644 --- a/arch/frv/mb93090-mb00/pci-dma.c +++ b/arch/frv/mb93090-mb00/pci-dma.c @@ -48,8 +48,7 @@ EXPORT_SYMBOL(dma_free_coherent); dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size, enum dma_data_direction direction) { - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size); @@ -81,8 +80,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, void *vaddr; int i; - if (direction == DMA_NONE) - BUG(); + BUG_ON(direction == DMA_NONE); dampr2 = __get_DAMPR(2); diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 4542651e6ac..5f43697aed3 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -371,6 +371,7 @@ struct kvm_vcpu_arch { int last_run_cpu; int vmm_tr_slot; int vm_tr_slot; + int sn_rtc_tr_slot; #define KVM_MP_STATE_RUNNABLE 0 #define KVM_MP_STATE_UNINITIALIZED 1 @@ -465,6 +466,7 @@ struct kvm_arch { unsigned long vmm_init_rr; int online_vcpus; + int is_sn2; struct kvm_ioapic *vioapic; struct kvm_vm_stat stat; @@ -472,6 +474,7 @@ struct kvm_arch { struct list_head assigned_dev_head; struct iommu_domain *iommu_domain; + int iommu_flags; struct hlist_head irq_ack_notifier_list; unsigned long irq_sources_bitmap; @@ -578,6 +581,8 @@ struct kvm_vmm_info{ kvm_vmm_entry *vmm_entry; kvm_tramp_entry *tramp_entry; unsigned long vmm_ivt; + unsigned long patch_mov_ar; + unsigned long patch_mov_ar_sn2; }; int kvm_highest_pending_irq(struct kvm_vcpu *vcpu); @@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run); void kvm_sal_emul(struct kvm_vcpu *vcpu); -static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {} #endif /* __ASSEMBLY__*/ #endif diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 7a9bff47564..0a9cc73d35c 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -146,6 +146,8 @@ #define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) #define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) #define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) +#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \ + _PAGE_MA_UC) # ifndef __ASSEMBLY__ diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index acc4d19ae62..b448197728b 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = { .name = "IPI" }; +/* + * KVM uses this interrupt to force a cpu out of guest mode + */ static struct irqaction resched_irqaction = { .handler = dummy_handler, .flags = IRQF_DISABLED, diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig index 0a2d6b86075..64d52093787 100644 --- a/arch/ia64/kvm/Kconfig +++ b/arch/ia64/kvm/Kconfig @@ -23,7 +23,7 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" - depends on HAVE_KVM && EXPERIMENTAL + depends on HAVE_KVM && MODULES && EXPERIMENTAL # for device assignment: depends on PCI select PREEMPT_NOTIFIERS diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d20a5db4c4d..80c57b0a21c 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -41,6 +41,9 @@ #include <asm/div64.h> #include <asm/tlb.h> #include <asm/elf.h> +#include <asm/sn/addrs.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/shub_mmr.h> #include "misc.h" #include "vti.h" @@ -65,6 +68,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu) +{ +#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) + if (vcpu->kvm->arch.is_sn2) + return rtc_time(); + else +#endif + return ia64_getreg(_IA64_REG_AR_ITC); +} + static void kvm_flush_icache(unsigned long start, unsigned long len) { int l; @@ -119,8 +132,7 @@ void kvm_arch_hardware_enable(void *garbage) unsigned long saved_psr; int slot; - pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), - PAGE_KERNEL)); + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); local_irq_save(saved_psr); slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); local_irq_restore(saved_psr); @@ -283,6 +295,18 @@ static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } +static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vector, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + kvm_vcpu_kick(vcpu); + return 1; + } + return 0; +} + /* * offset: address offset to IPI space. * value: deliver value. @@ -292,20 +316,20 @@ static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, { switch (dm) { case SAPIC_FIXED: - kvm_apic_set_irq(vcpu, vector, 0); break; case SAPIC_NMI: - kvm_apic_set_irq(vcpu, 2, 0); + vector = 2; break; case SAPIC_EXTINT: - kvm_apic_set_irq(vcpu, 0, 0); + vector = 0; break; case SAPIC_INIT: case SAPIC_PMI: default: printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); - break; + return; } + __apic_accept_irq(vcpu, vector); } static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, @@ -413,6 +437,23 @@ static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 1; } +static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu) +{ + unsigned long pte, rtc_phys_addr, map_addr; + int slot; + + map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT); + rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC; + pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC)); + slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT); + vcpu->arch.sn_rtc_tr_slot = slot; + if (slot < 0) { + printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n"); + slot = 0; + } + return slot; +} + int kvm_emulate_halt(struct kvm_vcpu *vcpu) { @@ -426,7 +467,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) if (irqchip_in_kernel(vcpu->kvm)) { - vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; + vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset; if (time_after(vcpu_now_itc, vpd->itm)) { vcpu->arch.timer_check = 1; @@ -447,10 +488,10 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) hrtimer_cancel(p_ht); vcpu->arch.ht_active = 0; - if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) + if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) || + kvm_cpu_has_pending_timer(vcpu)) if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) - vcpu->arch.mp_state = - KVM_MP_STATE_RUNNABLE; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) return -EINTR; @@ -551,22 +592,35 @@ static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) if (r < 0) goto out; vcpu->arch.vm_tr_slot = r; + +#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) + if (kvm->arch.is_sn2) { + r = kvm_sn2_setup_mappings(vcpu); + if (r < 0) + goto out; + } +#endif + r = 0; out: return r; - } static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) { - + struct kvm *kvm = vcpu->kvm; ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); - +#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) + if (kvm->arch.is_sn2) + ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot); +#endif } static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) { + unsigned long psr; + int r; int cpu = smp_processor_id(); if (vcpu->arch.last_run_cpu != cpu || @@ -578,36 +632,27 @@ static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) vcpu->arch.host_rr6 = ia64_get_rr(RR6); vti_set_rr6(vcpu->arch.vmm_rr); - return kvm_insert_vmm_mapping(vcpu); + local_irq_save(psr); + r = kvm_insert_vmm_mapping(vcpu); + local_irq_restore(psr); + return r; } + static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) { kvm_purge_vmm_mapping(vcpu); vti_set_rr6(vcpu->arch.host_rr6); } -static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { union context *host_ctx, *guest_ctx; int r; - /*Get host and guest context with guest address space.*/ - host_ctx = kvm_get_host_context(vcpu); - guest_ctx = kvm_get_guest_context(vcpu); - - r = kvm_vcpu_pre_transition(vcpu); - if (r < 0) - goto out; - kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); - kvm_vcpu_post_transition(vcpu); - r = 0; -out: - return r; -} - -static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) -{ - int r; + /* + * down_read() may sleep and return with interrupts enabled + */ + down_read(&vcpu->kvm->slots_lock); again: if (signal_pending(current)) { @@ -616,26 +661,31 @@ again: goto out; } - /* - * down_read() may sleep and return with interrupts enabled - */ - down_read(&vcpu->kvm->slots_lock); - preempt_disable(); local_irq_disable(); - vcpu->guest_mode = 1; + /*Get host and guest context with guest address space.*/ + host_ctx = kvm_get_host_context(vcpu); + guest_ctx = kvm_get_guest_context(vcpu); + + clear_bit(KVM_REQ_KICK, &vcpu->requests); + + r = kvm_vcpu_pre_transition(vcpu); + if (r < 0) + goto vcpu_run_fail; + + up_read(&vcpu->kvm->slots_lock); kvm_guest_enter(); - r = vti_vcpu_run(vcpu, kvm_run); - if (r < 0) { - local_irq_enable(); - preempt_enable(); - kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; - goto out; - } + + /* + * Transition to the guest + */ + kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); + + kvm_vcpu_post_transition(vcpu); vcpu->arch.launched = 1; - vcpu->guest_mode = 0; + set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); /* @@ -646,9 +696,10 @@ again: */ barrier(); kvm_guest_exit(); - up_read(&vcpu->kvm->slots_lock); preempt_enable(); + down_read(&vcpu->kvm->slots_lock); + r = kvm_handle_exit(kvm_run, vcpu); if (r > 0) { @@ -657,12 +708,20 @@ again: } out: + up_read(&vcpu->kvm->slots_lock); if (r > 0) { kvm_resched(vcpu); + down_read(&vcpu->kvm->slots_lock); goto again; } return r; + +vcpu_run_fail: + local_irq_enable(); + preempt_enable(); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + goto out; } static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) @@ -788,6 +847,9 @@ struct kvm *kvm_arch_create_vm(void) if (IS_ERR(kvm)) return ERR_PTR(-ENOMEM); + + kvm->arch.is_sn2 = ia64_platform_is("sn2"); + kvm_init_vm(kvm); kvm->arch.online_vcpus = 0; @@ -884,7 +946,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) RESTORE_REGS(saved_gp); vcpu->arch.irq_new_pending = 1; - vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); + vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu); set_bit(KVM_REQ_RESUME, &vcpu->requests); vcpu_put(vcpu); @@ -1043,10 +1105,6 @@ static void kvm_free_vmm_area(void) } } -static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -{ -} - static int vti_init_vpd(struct kvm_vcpu *vcpu) { int i; @@ -1165,7 +1223,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) regs->cr_iip = PALE_RESET_ENTRY; /*Initialize itc offset for vcpus*/ - itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); + itc_offset = 0UL - kvm_get_itc(vcpu); for (i = 0; i < kvm->arch.online_vcpus; i++) { v = (struct kvm_vcpu *)((char *)vcpu + sizeof(struct kvm_vcpu_data) * i); @@ -1237,6 +1295,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) local_irq_save(psr); r = kvm_insert_vmm_mapping(vcpu); + local_irq_restore(psr); if (r) goto fail; r = kvm_vcpu_init(vcpu, vcpu->kvm, id); @@ -1254,13 +1313,11 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) goto uninit; kvm_purge_vmm_mapping(vcpu); - local_irq_restore(psr); return 0; uninit: kvm_vcpu_uninit(vcpu); fail: - local_irq_restore(psr); return r; } @@ -1291,7 +1348,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->kvm = kvm; cpu = get_cpu(); - vti_vcpu_load(vcpu, cpu); r = vti_vcpu_setup(vcpu, id); put_cpu(); @@ -1427,7 +1483,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) } for (i = 0; i < 4; i++) regs->insvc[i] = vcpu->arch.insvc[i]; - regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); + regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu); SAVE_REGS(xtp); SAVE_REGS(metaphysical_rr0); SAVE_REGS(metaphysical_rr4); @@ -1574,6 +1630,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, void kvm_arch_flush_shadow(struct kvm *kvm) { + kvm_flush_remote_tlbs(kvm); } long kvm_arch_dev_ioctl(struct file *filp, @@ -1616,8 +1673,37 @@ out: return 0; } + +/* + * On SN2, the ITC isn't stable, so copy in fast path code to use the + * SN2 RTC, replacing the ITC based default verion. + */ +static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info, + struct module *module) +{ + unsigned long new_ar, new_ar_sn2; + unsigned long module_base; + + if (!ia64_platform_is("sn2")) + return; + + module_base = (unsigned long)module->module_core; + + new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base; + new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base; + + printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC " + "as source\n"); + + /* + * Copy the SN2 version of mov_ar into place. They are both + * the same size, so 6 bundles is sufficient (6 * 0x10). + */ + memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60); +} + static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, - struct module *module) + struct module *module) { unsigned long module_base; unsigned long vmm_size; @@ -1639,6 +1725,7 @@ static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, return -EFAULT; memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); + kvm_patch_vmm(vmm_info, module); kvm_flush_icache(kvm_vmm_base, vmm_size); /*Recalculate kvm_vmm_info based on new VMM*/ @@ -1792,38 +1879,24 @@ void kvm_arch_hardware_unsetup(void) { } -static void vcpu_kick_intr(void *info) -{ -#ifdef DEBUG - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; - printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); -#endif -} - void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int ipi_pcpu = vcpu->cpu; - int cpu = get_cpu(); + int me; + int cpu = vcpu->cpu; if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); - if (vcpu->guest_mode && cpu != ipi_pcpu) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); + me = get_cpu(); + if (cpu != me && (unsigned) cpu < nr_cpu_ids && cpu_online(cpu)) + if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) + smp_send_reschedule(cpu); put_cpu(); } -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { - - struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); - - if (!test_and_set_bit(vec, &vpd->irr[0])) { - vcpu->arch.irq_new_pending = 1; - kvm_vcpu_kick(vcpu); - return 1; - } - return 0; + return __apic_accept_irq(vcpu, irq->vector); } int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) @@ -1836,20 +1909,18 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) return 0; } -struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, - unsigned long bitmap) +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) { - struct kvm_vcpu *lvcpu = kvm->vcpus[0]; - int i; - - for (i = 1; i < kvm->arch.online_vcpus; i++) { - if (!kvm->vcpus[i]) - continue; - if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) - lvcpu = kvm->vcpus[i]; - } + return vcpu1->arch.xtp - vcpu2->arch.xtp; +} - return lvcpu; +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode) +{ + struct kvm_lapic *target = vcpu->arch.apic; + return (dest_mode == 0) ? + kvm_apic_match_physical_addr(target, dest) : + kvm_apic_match_logical_addr(target, dest); } static int find_highest_bits(int *dat) @@ -1888,6 +1959,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) return 0; } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return vcpu->arch.timer_fired; @@ -1918,6 +1995,7 @@ static int vcpu_reset(struct kvm_vcpu *vcpu) long psr; local_irq_save(psr); r = kvm_insert_vmm_mapping(vcpu); + local_irq_restore(psr); if (r) goto fail; @@ -1930,7 +2008,6 @@ static int vcpu_reset(struct kvm_vcpu *vcpu) kvm_purge_vmm_mapping(vcpu); r = 0; fail: - local_irq_restore(psr); return r; } diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c index a8ae52ed563..e4b82319881 100644 --- a/arch/ia64/kvm/kvm_fw.c +++ b/arch/ia64/kvm/kvm_fw.c @@ -21,6 +21,9 @@ #include <linux/kvm_host.h> #include <linux/smp.h> +#include <asm/sn/addrs.h> +#include <asm/sn/clksupport.h> +#include <asm/sn/shub_mmr.h> #include "vti.h" #include "misc.h" @@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) return result; } -static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) +/* + * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2 + * RTC is used instead. This function patches the ratios from SAL + * to match the RTC before providing them to the guest. + */ +static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result) { + struct pal_freq_ratio *ratio; + unsigned long sal_freq, sal_drift, factor; + + result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &sal_freq, &sal_drift); + ratio = (struct pal_freq_ratio *)&result->v2; + factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) / + sn_rtc_cycles_per_second; + + ratio->num = 3; + ratio->den = factor; +} +static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) +{ struct ia64_pal_retval result; PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); + + if (vcpu->kvm->arch.is_sn2) + sn2_patch_itc_freq_ratios(&result); + return result; } diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h index 6d6cbcb1489..ee541cebcd7 100644 --- a/arch/ia64/kvm/lapic.h +++ b/arch/ia64/kvm/lapic.h @@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode); +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); +#define kvm_apic_present(x) (true) #endif diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S index 32254ce9a1b..f793be3efff 100644 --- a/arch/ia64/kvm/optvfault.S +++ b/arch/ia64/kvm/optvfault.S @@ -11,6 +11,7 @@ #include <asm/asmmacro.h> #include <asm/processor.h> +#include <asm/kvm_host.h> #include "vti.h" #include "asm-offsets.h" @@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar) ;; END(kvm_asm_mov_from_ar) +/* + * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC + * clock as it's source for emulating the ITC. This version will be + * copied on top of the original version if the host is determined to + * be an SN2. + */ +GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2) + add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 + movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT)) + + add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 + extr.u r17=r25,6,7 + mov r24=b0 + ;; + ld8 r18=[r18] + ld8 r19=[r19] + addl r20=@gprel(asm_mov_to_reg),gp + ;; + add r19=r19,r18 + shladd r17=r17,4,r20 + ;; + adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 + st8 [r16] = r19 + mov b0=r17 + br.sptk.few b0 + ;; +END(kvm_asm_mov_from_ar_sn2) + + // mov r1=rr[r3] GLOBAL_ENTRY(kvm_asm_mov_from_rr) diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c index b1dc80952d9..a8f84da04b4 100644 --- a/arch/ia64/kvm/process.c +++ b/arch/ia64/kvm/process.c @@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, unsigned long isr, unsigned long iim) { struct kvm_vcpu *v = current_vcpu; + long psr; if (ia64_psr(regs)->cpl == 0) { /* Allow hypercalls only when cpl = 0. */ if (iim == DOMN_PAL_REQUEST) { + local_irq_save(psr); set_pal_call_data(v); vmm_transition(v); get_pal_call_result(v); vcpu_increment_iip(v); + local_irq_restore(psr); return; } else if (iim == DOMN_SAL_REQUEST) { + local_irq_save(psr); set_sal_call_data(v); vmm_transition(v); get_sal_call_result(v); vcpu_increment_iip(v); + local_irq_restore(psr); return; } } diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c index a18ee17b919..a2c6c15e476 100644 --- a/arch/ia64/kvm/vcpu.c +++ b/arch/ia64/kvm/vcpu.c @@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ } +/* + * The Altix RTC is mapped specially here for the vmm module + */ +#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT)) +static long kvm_get_itc(struct kvm_vcpu *vcpu) +{ +#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC) + struct kvm *kvm = (struct kvm *)KVM_VM_BASE; + + if (kvm->arch.is_sn2) + return (*SN_RTC_BASE); + else +#endif + return ia64_getreg(_IA64_REG_AR_ITC); +} + /************************************************************************ * lsapic timer ***********************************************************************/ u64 vcpu_get_itc(struct kvm_vcpu *vcpu) { unsigned long guest_itc; - guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); + guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu); if (guest_itc >= VMX(vcpu, last_itc)) { VMX(vcpu, last_itc) = guest_itc; @@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) struct kvm_vcpu *v; struct kvm *kvm; int i; - long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); + long itc_offset = val - kvm_get_itc(vcpu); unsigned long vitv = VCPU(vcpu, itv); kvm = (struct kvm *)KVM_VM_BASE; diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c index 9eee5c04bac..f4b4c899bb6 100644 --- a/arch/ia64/kvm/vmm.c +++ b/arch/ia64/kvm/vmm.c @@ -30,15 +30,19 @@ MODULE_AUTHOR("Intel"); MODULE_LICENSE("GPL"); extern char kvm_ia64_ivt; +extern char kvm_asm_mov_from_ar; +extern char kvm_asm_mov_from_ar_sn2; extern fpswa_interface_t *vmm_fpswa_interface; long vmm_sanity = 1; struct kvm_vmm_info vmm_info = { - .module = THIS_MODULE, - .vmm_entry = vmm_entry, - .tramp_entry = vmm_trampoline, - .vmm_ivt = (unsigned long)&kvm_ia64_ivt, + .module = THIS_MODULE, + .vmm_entry = vmm_entry, + .tramp_entry = vmm_trampoline, + .vmm_ivt = (unsigned long)&kvm_ia64_ivt, + .patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar, + .patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2, }; static int __init kvm_vmm_init(void) diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S index 3ef1a017a31..40920c63064 100644 --- a/arch/ia64/kvm/vmm_ivt.S +++ b/arch/ia64/kvm/vmm_ivt.S @@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic) ;; srlz.i // guarantee that interruption collection is on ;; - //(p15) ssm psr.i // restore psr.i + (p15) ssm psr.i // restore psr. addl r14=@gprel(ia64_leave_hypervisor),gp ;; KVM_SAVE_REST @@ -249,7 +249,7 @@ ENTRY(kvm_break_fault) ;; srlz.i // guarantee that interruption collection is on ;; - //(p15)ssm psr.i // restore psr.i + (p15)ssm psr.i // restore psr.i addl r14=@gprel(ia64_leave_hypervisor),gp ;; KVM_SAVE_REST @@ -439,7 +439,7 @@ kvm_dispatch_vexirq: ;; srlz.i // guarantee that interruption collection is on ;; - //(p15) ssm psr.i // restore psr.i + (p15) ssm psr.i // restore psr.i adds r3=8,r2 // set up second base pointer ;; KVM_SAVE_REST @@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch) ;; srlz.i // guarantee that interruption collection is on ;; - //(p15) ssm psr.i // restore psr.i + (p15) ssm psr.i // restore psr.i addl r14=@gprel(ia64_leave_hypervisor_prepare),gp ;; KVM_SAVE_REST @@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch) ;; srlz.i // guarantee that interruption collection is on ;; - //(p15) ssm psr.i // restore psr.i + (p15) ssm psr.i // restore psr.i addl r14=@gprel(ia64_leave_hypervisor),gp ;; KVM_SAVE_REST @@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection) ;; srlz.i // guarantee that interruption collection is on ;; - //(p15) ssm psr.i // restore psr.i + (p15) ssm psr.i // restore psr.i addl r14=@gprel(ia64_leave_hypervisor),gp ;; KVM_SAVE_REST @@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault) ;; srlz.i // guarantee that interruption collection is on ;; - //(p15) ssm psr.i // restore psr.i + (p15) ssm psr.i // restore psr.i addl r14=@gprel(ia64_leave_hypervisor_prepare),gp ;; KVM_SAVE_REST @@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt) ;; srlz.i ;; - //(p15) ssm psr.i + (p15) ssm psr.i addl r14=@gprel(ia64_leave_hypervisor),gp ;; KVM_SAVE_REST @@ -1333,7 +1333,7 @@ hostret = r24 ;; (p7) srlz.i ;; -//(p6) ssm psr.i +(p6) ssm psr.i ;; mov rp=rpsave mov ar.pfs=pfssave diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c index 2c2501f1315..4290a429bf7 100644 --- a/arch/ia64/kvm/vtlb.c +++ b/arch/ia64/kvm/vtlb.c @@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte) "(p7) st8 [%2]=r9;;" "ssm psr.ic;;" "srlz.d;;" - /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ + "ssm psr.i;;" + "srlz.d;;" : "=r"(ret) : "r"(iha), "r"(pte):"memory"); return ret; diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 355926730e8..89faacad5d1 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration" config MN10300 def_bool y select HAVE_OPROFILE + select HAVE_ARCH_TRACEHOOK config AM33 def_bool y diff --git a/arch/mn10300/include/asm/elf.h b/arch/mn10300/include/asm/elf.h index bf09f8bb392..49105462e6f 100644 --- a/arch/mn10300/include/asm/elf.h +++ b/arch/mn10300/include/asm/elf.h @@ -34,7 +34,7 @@ */ typedef unsigned long elf_greg_t; -#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t)) +#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1) typedef elf_greg_t elf_gregset_t[ELF_NGREG]; #define ELF_NFPREG 32 @@ -76,6 +76,7 @@ do { \ } while (0) #define USE_ELF_CORE_DUMP +#define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE 4096 /* diff --git a/arch/mn10300/include/asm/processor.h b/arch/mn10300/include/asm/processor.h index 73239271873..f7d4b0d285e 100644 --- a/arch/mn10300/include/asm/processor.h +++ b/arch/mn10300/include/asm/processor.h @@ -143,13 +143,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); unsigned long get_wchan(struct task_struct *p); -#define task_pt_regs(task) \ -({ \ - struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \ - __regs__ - 1; \ -}) - +#define task_pt_regs(task) ((task)->thread.uregs) #define KSTK_EIP(task) (task_pt_regs(task)->pc) #define KSTK_ESP(task) (task_pt_regs(task)->sp) diff --git a/arch/mn10300/include/asm/ptrace.h b/arch/mn10300/include/asm/ptrace.h index 7b06cc623d8..921942ed1b0 100644 --- a/arch/mn10300/include/asm/ptrace.h +++ b/arch/mn10300/include/asm/ptrace.h @@ -91,9 +91,17 @@ extern struct pt_regs *__frame; /* current frame pointer */ #if defined(__KERNEL__) #if !defined(__ASSEMBLY__) +struct task_struct; + #define user_mode(regs) (((regs)->epsw & EPSW_nSL) == EPSW_nSL) #define instruction_pointer(regs) ((regs)->pc) +#define user_stack_pointer(regs) ((regs)->sp) extern void show_regs(struct pt_regs *); + +#define arch_has_single_step() (1) +extern void user_enable_single_step(struct task_struct *); +extern void user_disable_single_step(struct task_struct *); + #endif /* !__ASSEMBLY */ #define profile_pc(regs) ((regs)->pc) diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index 3dc3e462f92..7408a27199f 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -76,7 +76,7 @@ ENTRY(system_call) cmp nr_syscalls,d0 bcc syscall_badsys btst _TIF_SYSCALL_TRACE,(TI_flags,a2) - bne syscall_trace_entry + bne syscall_entry_trace syscall_call: add d0,d0,a1 add a1,a1 @@ -104,11 +104,10 @@ restore_all: syscall_exit_work: btst _TIF_SYSCALL_TRACE,d2 beq work_pending - __sti # could let do_syscall_trace() call + __sti # could let syscall_trace_exit() call # schedule() instead mov fp,d0 - mov 1,d1 - call do_syscall_trace[],0 # do_syscall_trace(regs,entryexit) + call syscall_trace_exit[],0 # do_syscall_trace(regs) jmp resume_userspace ALIGN @@ -138,13 +137,11 @@ work_notifysig: jmp resume_userspace # perform syscall entry tracing -syscall_trace_entry: +syscall_entry_trace: mov -ENOSYS,d0 mov d0,(REG_D0,fp) mov fp,d0 - clr d1 - call do_syscall_trace[],0 - mov (REG_ORIG_D0,fp),d0 + call syscall_trace_entry[],0 # returns the syscall number to actually use mov (REG_D1,fp),d1 cmp nr_syscalls,d0 bcs syscall_call diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index d6d6cdc75c5..e143339ad28 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -17,6 +17,9 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/user.h> +#include <linux/regset.h> +#include <linux/elf.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -64,12 +67,6 @@ static inline int get_stack_long(struct task_struct *task, int offset) ((unsigned long) task->thread.uregs + offset); } -/* - * this routine will put a word on the processes privileged stack. - * the offset is how far from the base addr as stored in the TSS. - * this routine assumes that all the privileged stacks are in our - * data space. - */ static inline int put_stack_long(struct task_struct *task, int offset, unsigned long data) { @@ -80,94 +77,233 @@ int put_stack_long(struct task_struct *task, int offset, unsigned long data) return 0; } -static inline unsigned long get_fpregs(struct fpu_state_struct *buf, - struct task_struct *tsk) +/* + * retrieve the contents of MN10300 userspace general registers + */ +static int genregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - return __copy_to_user(buf, &tsk->thread.fpu_state, - sizeof(struct fpu_state_struct)); + const struct pt_regs *regs = task_pt_regs(target); + int ret; + + /* we need to skip regs->next */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs, 0, PT_ORIG_D0 * sizeof(long)); + if (ret < 0) + return ret; + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + ®s->orig_d0, PT_ORIG_D0 * sizeof(long), + NR_PTREGS * sizeof(long)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + NR_PTREGS * sizeof(long), -1); } -static inline unsigned long set_fpregs(struct task_struct *tsk, - struct fpu_state_struct *buf) +/* + * update the contents of the MN10300 userspace general registers + */ +static int genregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) { - return __copy_from_user(&tsk->thread.fpu_state, buf, - sizeof(struct fpu_state_struct)); + struct pt_regs *regs = task_pt_regs(target); + unsigned long tmp; + int ret; + + /* we need to skip regs->next */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + regs, 0, PT_ORIG_D0 * sizeof(long)); + if (ret < 0) + return ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->orig_d0, PT_ORIG_D0 * sizeof(long), + PT_EPSW * sizeof(long)); + if (ret < 0) + return ret; + + /* we need to mask off changes to EPSW */ + tmp = regs->epsw; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &tmp, PT_EPSW * sizeof(long), + PT_PC * sizeof(long)); + tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z; + tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | + EPSW_FLAG_Z); + regs->epsw = tmp; + + if (ret < 0) + return ret; + + /* and finally load the PC */ + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + ®s->pc, PT_PC * sizeof(long), + NR_PTREGS * sizeof(long)); + + if (ret < 0) + return ret; + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + NR_PTREGS * sizeof(long), -1); } -static inline void fpsave_init(struct task_struct *task) +/* + * retrieve the contents of MN10300 userspace FPU registers + */ +static int fpuregs_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) { - memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct)); + const struct fpu_state_struct *fpregs = &target->thread.fpu_state; + int ret; + + unlazy_fpu(target); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + fpregs, 0, sizeof(*fpregs)); + if (ret < 0) + return ret; + + return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + sizeof(*fpregs), -1); } /* - * make sure the single step bit is not set + * update the contents of the MN10300 userspace FPU registers */ -void ptrace_disable(struct task_struct *child) +static int fpuregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu_state_struct fpu_state = target->thread.fpu_state; + int ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fpu_state, 0, sizeof(fpu_state)); + if (ret < 0) + return ret; + + fpu_kill_state(target); + target->thread.fpu_state = fpu_state; + set_using_fpu(target); + + return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + sizeof(fpu_state), -1); +} + +/* + * determine if the FPU registers have actually been used + */ +static int fpuregs_active(struct task_struct *target, + const struct user_regset *regset) +{ + return is_using_fpu(target) ? regset->n : 0; +} + +/* + * Define the register sets available on the MN10300 under Linux + */ +enum mn10300_regset { + REGSET_GENERAL, + REGSET_FPU, +}; + +static const struct user_regset mn10300_regsets[] = { + /* + * General register format is: + * A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ + * E1, E0, E7...E2, SP, LAR, LIR, MDR + * A1, A0, D1, D0, ORIG_D0, EPSW, PC + */ + [REGSET_GENERAL] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(long), + .align = sizeof(long), + .get = genregs_get, + .set = genregs_set, + }, + /* + * FPU register format is: + * FS0-31, FPCR + */ + [REGSET_FPU] = { + .core_note_type = NT_PRFPREG, + .n = sizeof(struct fpu_state_struct) / sizeof(long), + .size = sizeof(long), + .align = sizeof(long), + .get = fpuregs_get, + .set = fpuregs_set, + .active = fpuregs_active, + }, +}; + +static const struct user_regset_view user_mn10300_native_view = { + .name = "mn10300", + .e_machine = EM_MN10300, + .regsets = mn10300_regsets, + .n = ARRAY_SIZE(mn10300_regsets), +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_mn10300_native_view; +} + +/* + * set the single-step bit + */ +void user_enable_single_step(struct task_struct *child) { #ifndef CONFIG_MN10300_USING_JTAG struct user *dummy = NULL; long tmp; tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw); - tmp &= ~EPSW_T; + tmp |= EPSW_T; put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp); #endif } /* - * set the single step bit + * make sure the single-step bit is not set */ -void ptrace_enable(struct task_struct *child) +void user_disable_single_step(struct task_struct *child) { #ifndef CONFIG_MN10300_USING_JTAG struct user *dummy = NULL; long tmp; tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw); - tmp |= EPSW_T; + tmp &= ~EPSW_T; put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp); #endif } +void ptrace_disable(struct task_struct *child) +{ + user_disable_single_step(child); +} + /* * handle the arch-specific side of process tracing */ long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct fpu_state_struct fpu_state; - int i, ret; + unsigned long tmp; + int ret; switch (request) { - /* read the word at location addr. */ - case PTRACE_PEEKTEXT: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, (unsigned long *) data); - break; - } - - /* read the word at location addr. */ - case PTRACE_PEEKDATA: { - unsigned long tmp; - int copied; - - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); - ret = -EIO; - if (copied != sizeof(tmp)) - break; - ret = put_user(tmp, (unsigned long *) data); - break; - } - /* read the word at location addr in the USER area. */ - case PTRACE_PEEKUSR: { - unsigned long tmp; - + case PTRACE_PEEKUSR: ret = -EIO; if ((addr & 3) || addr < 0 || addr > sizeof(struct user) - 3) @@ -179,17 +315,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) ptrace_regid_to_frame[addr]); ret = put_user(tmp, (unsigned long *) data); break; - } - - /* write the word at location addr. */ - case PTRACE_POKETEXT: - case PTRACE_POKEDATA: - if (access_process_vm(child, addr, &data, sizeof(data), 1) == - sizeof(data)) - ret = 0; - else - ret = -EIO; - break; /* write the word at location addr in the USER area */ case PTRACE_POKEUSR: @@ -204,132 +329,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) data); break; - /* continue and stop at next (return from) syscall */ - case PTRACE_SYSCALL: - /* restart after signal. */ - case PTRACE_CONT: - ret = -EIO; - if ((unsigned long) data > _NSIG) - break; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - ptrace_disable(child); - wake_up_process(child); - ret = 0; - break; - - /* - * make the child exit - * - the best I can do is send it a sigkill - * - perhaps it should be put in the status that it wants to - * exit - */ - case PTRACE_KILL: - ret = 0; - if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - break; - child->exit_code = SIGKILL; - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - ptrace_disable(child); - wake_up_process(child); - break; - - case PTRACE_SINGLESTEP: /* set the trap flag. */ -#ifndef CONFIG_MN10300_USING_JTAG - ret = -EIO; - if ((unsigned long) data > _NSIG) - break; - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - ptrace_enable(child); - child->exit_code = data; - wake_up_process(child); - ret = 0; -#else - ret = -EINVAL; -#endif - break; - - case PTRACE_DETACH: /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - break; - - /* Get all gp regs from the child. */ - case PTRACE_GETREGS: { - unsigned long tmp; - - if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) { - ret = -EIO; - break; - } - - for (i = 0; i < NR_PTREGS << 2; i += 4) { - tmp = get_stack_long(child, ptrace_regid_to_frame[i]); - __put_user(tmp, (unsigned long *) data); - data += sizeof(tmp); - } - ret = 0; - break; - } - - case PTRACE_SETREGS: { /* Set all gp regs in the child. */ - unsigned long tmp; - - if (!access_ok(VERIFY_READ, (unsigned long *)data, - sizeof(struct pt_regs))) { - ret = -EIO; - break; - } - - for (i = 0; i < NR_PTREGS << 2; i += 4) { - __get_user(tmp, (unsigned long *) data); - put_stack_long(child, ptrace_regid_to_frame[i], tmp); - data += sizeof(tmp); - } - ret = 0; - break; - } - - case PTRACE_GETFPREGS: { /* Get the child FPU state. */ - if (is_using_fpu(child)) { - unlazy_fpu(child); - fpu_state = child->thread.fpu_state; - } else { - memset(&fpu_state, 0, sizeof(fpu_state)); - } - - ret = -EIO; - if (copy_to_user((void *) data, &fpu_state, - sizeof(fpu_state)) == 0) - ret = 0; - break; - } - - case PTRACE_SETFPREGS: { /* Set the child FPU state. */ - ret = -EFAULT; - if (copy_from_user(&fpu_state, (const void *) data, - sizeof(fpu_state)) == 0) { - fpu_kill_state(child); - child->thread.fpu_state = fpu_state; - set_using_fpu(child); - ret = 0; - } - break; - } - - case PTRACE_SETOPTIONS: { - if (data & PTRACE_O_TRACESYSGOOD) - child->ptrace |= PT_TRACESYSGOOD; - else - child->ptrace &= ~PT_TRACESYSGOOD; - ret = 0; - break; - } + case PTRACE_GETREGS: /* Get all integer regs from the child. */ + return copy_regset_to_user(child, &user_mn10300_native_view, + REGSET_GENERAL, + 0, NR_PTREGS * sizeof(long), + (void __user *)data); + + case PTRACE_SETREGS: /* Set all integer regs in the child. */ + return copy_regset_from_user(child, &user_mn10300_native_view, + REGSET_GENERAL, + 0, NR_PTREGS * sizeof(long), + (const void __user *)data); + + case PTRACE_GETFPREGS: /* Get the child FPU state. */ + return copy_regset_to_user(child, &user_mn10300_native_view, + REGSET_FPU, + 0, sizeof(struct fpu_state_struct), + (void __user *)data); + + case PTRACE_SETFPREGS: /* Set the child FPU state. */ + return copy_regset_from_user(child, &user_mn10300_native_view, + REGSET_FPU, + 0, sizeof(struct fpu_state_struct), + (const void __user *)data); default: - ret = -EIO; + ret = ptrace_request(child, request, addr, data); break; } @@ -337,43 +362,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) } /* - * notification of system call entry/exit - * - triggered by current->work.syscall_trace + * handle tracing of system call entry + * - return the revised system call number or ULONG_MAX to cause ENOSYS */ -asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit) +asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs) { -#if 0 - /* just in case... */ - printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n", - current->pid, - regs->orig_d0, - regs->a0, - regs->d1, - regs->a3, - regs->a2, - regs->d0); - return; -#endif - - if (!test_thread_flag(TIF_SYSCALL_TRACE) && - !test_thread_flag(TIF_SINGLESTEP)) - return; - if (!(current->ptrace & PT_PTRACED)) - return; + if (tracehook_report_syscall_entry(regs)) + /* tracing decided this syscall should not happen, so + * We'll return a bogus call number to get an ENOSYS + * error, but leave the original number in + * regs->orig_d0 + */ + return ULONG_MAX; - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | - ((current->ptrace & PT_TRACESYSGOOD) && - !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0)); + return regs->orig_d0; +} - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } +/* + * handle tracing of system call exit + */ +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + tracehook_report_syscall_exit(regs, 0); } diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 841ca9955a1..9f7572a0f57 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -23,6 +23,7 @@ #include <linux/tty.h> #include <linux/personality.h> #include <linux/suspend.h> +#include <linux/tracehook.h> #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/uaccess.h> @@ -511,6 +512,9 @@ static void do_signal(struct pt_regs *regs) * clear the TIF_RESTORE_SIGMASK flag */ if (test_thread_flag(TIF_RESTORE_SIGMASK)) clear_thread_flag(TIF_RESTORE_SIGMASK); + + tracehook_signal_handler(signr, &info, &ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return; @@ -561,4 +565,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags) /* deal with pending signal delivery */ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK)) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(__frame); + } } diff --git a/arch/mn10300/mm/tlb-mn10300.S b/arch/mn10300/mm/tlb-mn10300.S index 789208094e9..7095147dcb8 100644 --- a/arch/mn10300/mm/tlb-mn10300.S +++ b/arch/mn10300/mm/tlb-mn10300.S @@ -165,24 +165,6 @@ ENTRY(itlb_aerror) ENTRY(dtlb_aerror) and ~EPSW_NMID,epsw add -4,sp - mov d1,(sp) - - movhu (MMUFCR_DFC),d1 # is it the initial valid write - # to this page? - and MMUFCR_xFC_INITWR,d1 - beq dtlb_pagefault # jump if not - - mov (DPTEL),d1 # set the dirty bit - # (don't replace with BSET!) - or _PAGE_DIRTY,d1 - mov d1,(DPTEL) - mov (sp),d1 - add 4,sp - rti - - ALIGN -dtlb_pagefault: - mov (sp),d1 SAVE_ALL add -4,sp # need to pass three params diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 9057335fdc6..2cf915e51e7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) return !!(v->arch.pending_exceptions); } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { return !(v->arch.msr & MSR_WE); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 54ea39f96ec..a27d0d5a6f8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -13,6 +13,8 @@ #ifndef ASM_KVM_HOST_H #define ASM_KVM_HOST_H +#include <linux/hrtimer.h> +#include <linux/interrupt.h> #include <linux/kvm_host.h> #include <asm/debug.h> #include <asm/cpuid.h> @@ -210,7 +212,8 @@ struct kvm_vcpu_arch { s390_fp_regs guest_fpregs; unsigned int guest_acrs[NUM_ACRS]; struct kvm_s390_local_interrupt local_int; - struct timer_list ckc_timer; + struct hrtimer ckc_timer; + struct tasklet_struct tasklet; union { cpuid_t cpu_id; u64 stidp_data; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 9d19803111b..98997ccba50 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu) static int handle_validity(struct kvm_vcpu *vcpu) { int viwhy = vcpu->arch.sie_block->ipb >> 16; + int rc; + vcpu->stat.exit_validity++; - if (viwhy == 0x37) { - fault_in_pages_writeable((char __user *) - vcpu->kvm->arch.guest_origin + - vcpu->arch.sie_block->prefix, - PAGE_SIZE); - return 0; - } - VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", - viwhy); - return -ENOTSUPP; + if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix + <= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){ + rc = fault_in_pages_writeable((char __user *) + vcpu->kvm->arch.guest_origin + + vcpu->arch.sie_block->prefix, + 2*PAGE_SIZE); + if (rc) + /* user will receive sigsegv, exit to user */ + rc = -ENOTSUPP; + } else + rc = -ENOTSUPP; + + if (rc) + VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", + viwhy); + return rc; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 0189356fe20..f04f5301b1b 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -12,6 +12,8 @@ #include <asm/lowcore.h> #include <asm/uaccess.h> +#include <linux/hrtimer.h> +#include <linux/interrupt.h> #include <linux/kvm_host.h> #include <linux/signal.h> #include "kvm-s390.h" @@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) } if ((!rc) && atomic_read(&fi->active)) { - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); list_for_each_entry(inti, &fi->list, list) if (__interrupt_is_deliverable(vcpu, inti)) { rc = 1; break; } - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); } if ((!rc) && (vcpu->arch.sie_block->ckc < @@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) return rc; } +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + /* do real check here */ + return 1; +} + int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { return 0; @@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) return 0; } - sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; + sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9; - vcpu->arch.ckc_timer.expires = jiffies + sltime; - - add_timer(&vcpu->arch.ckc_timer); - VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime); + hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL); + VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime); no_timer: - spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); add_wait_queue(&vcpu->arch.local_int.wq, &wait); while (list_empty(&vcpu->arch.local_int.list) && @@ -371,33 +377,46 @@ no_timer: !signal_pending(current)) { set_current_state(TASK_INTERRUPTIBLE); spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + spin_unlock(&vcpu->arch.local_int.float_int->lock); vcpu_put(vcpu); schedule(); vcpu_load(vcpu); - spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock(&vcpu->arch.local_int.float_int->lock); spin_lock_bh(&vcpu->arch.local_int.lock); } __unset_cpu_idle(vcpu); __set_current_state(TASK_RUNNING); remove_wait_queue(&vcpu->wq, &wait); spin_unlock_bh(&vcpu->arch.local_int.lock); - spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); - del_timer(&vcpu->arch.ckc_timer); + spin_unlock(&vcpu->arch.local_int.float_int->lock); + hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); return 0; } -void kvm_s390_idle_wakeup(unsigned long data) +void kvm_s390_tasklet(unsigned long parm) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm; - spin_lock_bh(&vcpu->arch.local_int.lock); + spin_lock(&vcpu->arch.local_int.lock); vcpu->arch.local_int.timer_due = 1; if (waitqueue_active(&vcpu->arch.local_int.wq)) wake_up_interruptible(&vcpu->arch.local_int.wq); - spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock(&vcpu->arch.local_int.lock); } +/* + * low level hrtimer wake routine. Because this runs in hardirq context + * we schedule a tasklet to do the real work. + */ +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) +{ + struct kvm_vcpu *vcpu; + + vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer); + tasklet_schedule(&vcpu->arch.tasklet); + + return HRTIMER_NORESTART; +} void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { @@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) if (atomic_read(&fi->active)) { do { deliver = 0; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); list_for_each_entry_safe(inti, n, &fi->list, list) { if (__interrupt_is_deliverable(vcpu, inti)) { list_del(&inti->list); @@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) } if (list_empty(&fi->list)) atomic_set(&fi->active, 0); - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); if (deliver) { __do_deliver_interrupt(vcpu, inti); kfree(inti); @@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, mutex_lock(&kvm->lock); fi = &kvm->arch.float_int; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); list_add_tail(&inti->list, &fi->list); atomic_set(&fi->active, 1); sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); @@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm, if (waitqueue_active(&li->wq)) wake_up_interruptible(&li->wq); spin_unlock_bh(&li->lock); - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); mutex_unlock(&kvm->lock); return 0; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f4d56e9939c..10bccd1f8ae 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -15,6 +15,7 @@ #include <linux/compiler.h> #include <linux/err.h> #include <linux/fs.h> +#include <linux/hrtimer.h> #include <linux/init.h> #include <linux/kvm.h> #include <linux/kvm_host.h> @@ -195,6 +196,10 @@ out_nokvm: void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 3, "%s", "free cpu"); + if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == + (__u64) vcpu->arch.sie_block) + vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + smp_mb(); free_page((unsigned long)(vcpu->arch.sie_block)); kvm_vcpu_uninit(vcpu); kfree(vcpu); @@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin; vcpu->arch.sie_block->ecb = 2; vcpu->arch.sie_block->eca = 0xC1002001U; - setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, - (unsigned long) vcpu); + hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); + tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, + (unsigned long) vcpu); + vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; get_cpu_id(&vcpu->arch.cpu_id); vcpu->arch.cpu_id.version = 0xff; return 0; @@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->icpua = id; BUG_ON(!kvm->arch.sca); - BUG_ON(kvm->arch.sca->cpu[id].sda); - kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + if (!kvm->arch.sca->cpu[id].sda) + kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + else + BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */ vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; spin_lock_init(&vcpu->arch.local_int.lock); INIT_LIST_HEAD(&vcpu->arch.local_int.list); vcpu->arch.local_int.float_int = &kvm->arch.float_int; - spin_lock_bh(&kvm->arch.float_int.lock); + spin_lock(&kvm->arch.float_int.lock); kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; init_waitqueue_head(&vcpu->arch.local_int.wq); vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; - spin_unlock_bh(&kvm->arch.float_int.lock); + spin_unlock(&kvm->arch.float_int.lock); rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) @@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vcpu_load(vcpu); + /* verify, that memory has been registered */ + if (!vcpu->kvm->arch.guest_memsize) { + vcpu_put(vcpu); + return -EINVAL; + } + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm, struct kvm_memory_slot old, int user_alloc) { + int i; + /* A few sanity checks. We can have exactly one memory slot which has to start at guest virtual zero and which has to be located at a page boundary in userland and which has to end at a page boundary. @@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, vmas. It is okay to mmap() and munmap() stuff in this slot after doing this call at any time */ - if (mem->slot) + if (mem->slot || kvm->arch.guest_memsize) return -EINVAL; if (mem->guest_phys_addr) @@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (mem->memory_size & (PAGE_SIZE - 1)) return -EINVAL; + if (!user_alloc) + return -EINVAL; + + /* lock all vcpus */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (!kvm->vcpus[i]) + continue; + if (!mutex_trylock(&kvm->vcpus[i]->mutex)) + goto fail_out; + } + kvm->arch.guest_origin = mem->userspace_addr; kvm->arch.guest_memsize = mem->memory_size; - /* FIXME: we do want to interrupt running CPUs and update their memory - configuration now to avoid race conditions. But hey, changing the - memory layout while virtual CPUs are running is usually bad - programming practice. */ + /* update sie control blocks, and unlock all vcpus */ + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm->vcpus[i]->arch.sie_block->gmsor = + kvm->arch.guest_origin; + kvm->vcpus[i]->arch.sie_block->gmslm = + kvm->arch.guest_memsize + + kvm->arch.guest_origin + + VIRTIODESCSPACE - 1ul; + mutex_unlock(&kvm->vcpus[i]->mutex); + } + } return 0; + +fail_out: + for (; i >= 0; i--) + mutex_unlock(&kvm->vcpus[i]->mutex); + return -EINVAL; } void kvm_arch_flush_shadow(struct kvm *kvm) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 00bbe69b78d..748fee87232 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -14,6 +14,7 @@ #ifndef ARCH_S390_KVM_S390_H #define ARCH_S390_KVM_S390_H +#include <linux/hrtimer.h> #include <linux/kvm.h> #include <linux/kvm_host.h> @@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); -void kvm_s390_idle_wakeup(unsigned long data); +enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); +void kvm_s390_tasklet(unsigned long parm); void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 4b88834b8dd..93ecd06e1a7 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) int cpus = 0; int n; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); for (n = 0; n < KVM_MAX_VCPUS; n++) if (fi->local_int[n]) cpus++; - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); /* deal with other level 3 hypervisors */ if (stsi(mem, 3, 2, 2) == -ENOSYS) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index f27dbedf086..36678835034 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, if (cpu_addr >= KVM_MAX_VCPUS) return 3; /* not operational */ - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); if (fi->local_int[cpu_addr] == NULL) rc = 3; /* not operational */ else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) @@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, *reg |= SIGP_STAT_STOPPED; rc = 1; /* status stored */ } - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); return rc; @@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) inti->type = KVM_S390_INT_EMERGENCY; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; if (li == NULL) { rc = 3; /* not operational */ @@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ unlock: - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); return rc; } @@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) inti->type = KVM_S390_SIGP_STOP; - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; if (li == NULL) { rc = 3; /* not operational */ @@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) spin_unlock_bh(&li->lock); rc = 0; /* order accepted */ unlock: - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); return rc; } @@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, if (!inti) return 2; /* busy */ - spin_lock_bh(&fi->lock); + spin_lock(&fi->lock); li = fi->local_int[cpu_addr]; if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { @@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, out_li: spin_unlock_bh(&li->lock); out_fi: - spin_unlock_bh(&fi->lock); + spin_unlock(&fi->lock); return rc; } diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 19af42138f7..4a28d22d479 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -116,6 +116,8 @@ #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ +#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index dc3f6cf1170..125be8b1956 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -16,6 +16,7 @@ #define __KVM_HAVE_MSI #define __KVM_HAVE_USER_NMI #define __KVM_HAVE_GUEST_DEBUG +#define __KVM_HAVE_MSIX /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f0faf58044f..eabdc1cfab5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -185,6 +185,7 @@ union kvm_mmu_page_role { unsigned access:3; unsigned invalid:1; unsigned cr4_pge:1; + unsigned nxe:1; }; }; @@ -212,7 +213,6 @@ struct kvm_mmu_page { int multimapped; /* More than one parent_pte? */ int root_count; /* Currently serving as active root */ bool unsync; - bool global; unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ @@ -261,13 +261,11 @@ struct kvm_mmu { union kvm_mmu_page_role base_role; u64 *pae_root; + u64 rsvd_bits_mask[2][4]; }; struct kvm_vcpu_arch { u64 host_tsc; - int interrupt_window_open; - unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ - DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); /* * rip and regs accesses must go through * kvm_{register,rip}_{read,write} functions. @@ -286,6 +284,7 @@ struct kvm_vcpu_arch { u64 shadow_efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ + int32_t apic_arb_prio; int mp_state; int sipi_vector; u64 ia32_misc_enable_msr; @@ -320,6 +319,8 @@ struct kvm_vcpu_arch { struct kvm_pio_request pio; void *pio_data; + u8 event_exit_inst_len; + struct kvm_queued_exception { bool pending; bool has_error_code; @@ -329,11 +330,12 @@ struct kvm_vcpu_arch { struct kvm_queued_interrupt { bool pending; + bool soft; u8 nr; } interrupt; struct { - int active; + int vm86_active; u8 save_iopl; struct kvm_save_segment { u16 selector; @@ -356,9 +358,9 @@ struct kvm_vcpu_arch { unsigned int time_offset; struct page *time_page; + bool singlestep; /* guest is single stepped by KVM */ bool nmi_pending; bool nmi_injected; - bool nmi_window_open; struct mtrr_state_type mtrr_state; u32 pat; @@ -392,15 +394,14 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; - struct list_head oos_global_pages; struct iommu_domain *iommu_domain; + int iommu_flags; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; struct hlist_head irq_ack_notifier_list; int vapics_in_nmi_mode; - int round_robin_prev_vcpu; unsigned int tss_addr; struct page *apic_access_page; @@ -423,7 +424,6 @@ struct kvm_vm_stat { u32 mmu_recycled; u32 mmu_cache_miss; u32 mmu_unsync; - u32 mmu_unsync_global; u32 remote_tlb_flush; u32 lpages; }; @@ -443,7 +443,6 @@ struct kvm_vcpu_stat { u32 halt_exits; u32 halt_wakeup; u32 request_irq_exits; - u32 request_nmi_exits; u32 irq_exits; u32 host_state_reload; u32 efer_reload; @@ -511,20 +510,22 @@ struct kvm_x86_ops { void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); + void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); + u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); - int (*get_irq)(struct kvm_vcpu *vcpu); - void (*set_irq)(struct kvm_vcpu *vcpu, int vec); + void (*set_irq)(struct kvm_vcpu *vcpu); + void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, bool has_error_code, u32 error_code); - bool (*exception_injected)(struct kvm_vcpu *vcpu); - void (*inject_pending_irq)(struct kvm_vcpu *vcpu); - void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, - struct kvm_run *run); - + int (*interrupt_allowed)(struct kvm_vcpu *vcpu); + int (*nmi_allowed)(struct kvm_vcpu *vcpu); + void (*enable_nmi_window)(struct kvm_vcpu *vcpu); + void (*enable_irq_window)(struct kvm_vcpu *vcpu); + void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); - int (*get_mt_mask_shift)(void); + u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); }; extern struct kvm_x86_ops *kvm_x86_ops; @@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); void kvm_mmu_set_base_ptes(u64 base_pte); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); @@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, gpa_t addr, unsigned long *ret); +u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; @@ -563,6 +565,7 @@ enum emulation_result { #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) +#define EMULTYPE_SKIP (1 << 2) int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long cr2, u16 error_code, int emulation_type); void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); @@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); -void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); @@ -769,6 +771,8 @@ enum { #define HF_GIF_MASK (1 << 0) #define HF_HIF_MASK (1 << 1) #define HF_VINTR_MASK (1 << 2) +#define HF_NMI_MASK (1 << 3) +#define HF_IRET_MASK (1 << 4) /* * Hardware virtualization extension instructions may fault if a @@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void); #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 6a159732881..b7ed2c42311 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h @@ -143,6 +143,9 @@ struct decode_cache { struct fetch_cache fetch; }; +#define X86_SHADOW_INT_MOV_SS 1 +#define X86_SHADOW_INT_STI 2 + struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; @@ -152,6 +155,9 @@ struct x86_emulate_ctxt { int mode; u32 cs_base; + /* interruptibility state, as a result of execution of STI or MOV SS */ + int interruptibility; + /* decode cache */ struct decode_cache decode; }; diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ada75f3eb..85574b7c1bc 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EVTINJ_VALID_ERR (1 << 11) #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK +#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index f72956331c4..c4ee8056bac 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h @@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios, SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); + get_user(termios->c_line, &termio->c_line); return copy_from_user(termios->c_cc, termio->c_cc, NCC); } diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 498f944010b..11be5ad2e0e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -247,6 +247,7 @@ enum vmcs_field { #define EXIT_REASON_MSR_READ 31 #define EXIT_REASON_MSR_WRITE 32 #define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 #define EXIT_REASON_EPT_VIOLATION 48 diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 09dd1d414fc..289cc481502 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code) out2: atomic_dec(&mce_entry); } +EXPORT_SYMBOL_GPL(do_machine_check); #ifdef CONFIG_X86_MCE_INTEL /*** diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6551dedee20..a78ecad0c90 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -27,6 +27,7 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/hardirq.h> +#include <asm/timer.h> #define MMU_QUEUE_SIZE 1024 @@ -230,6 +231,9 @@ static void paravirt_ops_setup(void) pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; } +#ifdef CONFIG_X86_IO_APIC + no_timer_check = 1; +#endif } void __init kvm_guest_init(void) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index f6db48c405b..28f5fb495a6 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); inc_irq_stat(irq_resched_count); + /* + * KVM uses this interrupt to force a cpu out of guest mode + */ } void smp_call_function_interrupt(struct pt_regs *regs) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a58504ea78c..8600a09e0c6 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -50,6 +50,9 @@ config KVM_INTEL Provides support for KVM on Intel processors equipped with the VT extensions. + To compile this as a module, choose M here: the module + will be called kvm-intel. + config KVM_AMD tristate "KVM for AMD processors support" depends on KVM @@ -57,6 +60,9 @@ config KVM_AMD Provides support for KVM on AMD processors equipped with the AMD-V (SVM) extensions. + To compile this as a module, choose M here: the module + will be called kvm-amd. + config KVM_TRACE bool "KVM trace support" depends on KVM && SYSFS diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d3ec292f00f..b43c4efafe8 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -14,7 +14,7 @@ endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ - i8254.o + i8254.o timer.o obj-$(CONFIG_KVM) += kvm.o kvm-intel-objs = vmx.o obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c13bb92d315..4d6f0d293ee 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel) return kvm->arch.vpit->pit_state.channels[channel].gate; } +static s64 __kpit_elapsed(struct kvm *kvm) +{ + s64 elapsed; + ktime_t remaining; + struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; + + /* + * The Counter does not stop when it reaches zero. In + * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to + * the highest count, either FFFF hex for binary counting + * or 9999 for BCD counting, and continues counting. + * Modes 2 and 3 are periodic; the Counter reloads + * itself with the initial count and continues counting + * from there. + */ + remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); + elapsed = ps->pit_timer.period - ktime_to_ns(remaining); + elapsed = mod_64(elapsed, ps->pit_timer.period); + + return elapsed; +} + +static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c, + int channel) +{ + if (channel == 0) + return __kpit_elapsed(kvm); + + return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); +} + static int pit_get_count(struct kvm *kvm, int channel) { struct kvm_kpit_channel_state *c = @@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel) WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); - t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); + t = kpit_elapsed(kvm, c, channel); d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); switch (c->mode) { @@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel) WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); - t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); + t = kpit_elapsed(kvm, c, channel); d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); switch (c->mode) { @@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel) } } -static int __pit_timer_fn(struct kvm_kpit_state *ps) -{ - struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; - struct kvm_kpit_timer *pt = &ps->pit_timer; - - if (!atomic_inc_and_test(&pt->pending)) - set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); - - if (!pt->reinject) - atomic_set(&pt->pending, 1); - - if (vcpu0 && waitqueue_active(&vcpu0->wq)) - wake_up_interruptible(&vcpu0->wq); - - hrtimer_add_expires_ns(&pt->timer, pt->period); - pt->scheduled = hrtimer_get_expires_ns(&pt->timer); - if (pt->period) - ps->channels[0].count_load_time = ktime_get(); - - return (pt->period == 0 ? 0 : 1); -} - int pit_has_pending_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu->kvm->arch.vpit; @@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) spin_unlock(&ps->inject_lock); } -static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) -{ - struct kvm_kpit_state *ps; - int restart_timer = 0; - - ps = container_of(data, struct kvm_kpit_state, pit_timer.timer); - - restart_timer = __pit_timer_fn(ps); - - if (restart_timer) - return HRTIMER_RESTART; - else - return HRTIMER_NORESTART; -} - void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) { struct kvm_pit *pit = vcpu->kvm->arch.vpit; @@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } -static void destroy_pit_timer(struct kvm_kpit_timer *pt) +static void destroy_pit_timer(struct kvm_timer *pt) { pr_debug("pit: execute del timer!\n"); hrtimer_cancel(&pt->timer); } +static bool kpit_is_periodic(struct kvm_timer *ktimer) +{ + struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, + pit_timer); + return ps->is_periodic; +} + +static struct kvm_timer_ops kpit_ops = { + .is_periodic = kpit_is_periodic, +}; + static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) { - struct kvm_kpit_timer *pt = &ps->pit_timer; + struct kvm_timer *pt = &ps->pit_timer; s64 interval; interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); @@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) /* TODO The new value only affected after the retriggered */ hrtimer_cancel(&pt->timer); - pt->period = (is_period == 0) ? 0 : interval; - pt->timer.function = pit_timer_fn; + pt->period = interval; + ps->is_periodic = is_period; + + pt->timer.function = kvm_timer_fn; + pt->t_ops = &kpit_ops; + pt->kvm = ps->pit->kvm; + pt->vcpu_id = 0; + atomic_set(&pt->pending, 0); ps->irq_ack = 1; @@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); /* - * Though spec said the state of 8254 is undefined after power-up, - * seems some tricky OS like Windows XP depends on IRQ0 interrupt - * when booting up. - * So here setting initialize rate for it, and not a specific number + * The largest possible initial count is 0; this is equivalent + * to 216 for binary counting and 104 for BCD counting. */ if (val == 0) val = 0x10000; - ps->channels[channel].count_load_time = ktime_get(); ps->channels[channel].count = val; - if (channel != 0) + if (channel != 0) { + ps->channels[channel].count_load_time = ktime_get(); return; + } /* Two types of timer * mode 1 is one shot, mode 2 is period, otherwise del timer */ switch (ps->channels[0].mode) { + case 0: case 1: /* FIXME: enhance mode 4 precision */ case 4: diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 6acbe4b505d..bbd863ff60b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -3,15 +3,6 @@ #include "iodev.h" -struct kvm_kpit_timer { - struct hrtimer timer; - int irq; - s64 period; /* unit: ns */ - s64 scheduled; - atomic_t pending; - bool reinject; -}; - struct kvm_kpit_channel_state { u32 count; /* can be 65536 */ u16 latched_count; @@ -30,7 +21,8 @@ struct kvm_kpit_channel_state { struct kvm_kpit_state { struct kvm_kpit_channel_state channels[3]; - struct kvm_kpit_timer pit_timer; + struct kvm_timer pit_timer; + bool is_periodic; u32 speaker_data_on; struct mutex lock; struct kvm_pit *pit; diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index cf17ed52f6f..96dfbb6ad2a 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -24,6 +24,7 @@ #include "irq.h" #include "i8254.h" +#include "x86.h" /* * check if there are pending timer events @@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { struct kvm_pic *s; + if (!irqchip_in_kernel(v->kvm)) + return v->arch.interrupt.pending; + if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ if (kvm_apic_accept_pic_intr(v)) { s = pic_irqchip(v->kvm); /* PIC */ @@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) struct kvm_pic *s; int vector; + if (!irqchip_in_kernel(v->kvm)) + return v->arch.interrupt.nr; + vector = kvm_get_apic_interrupt(v); /* APIC */ if (vector == -1) { if (kvm_apic_accept_pic_intr(v)) { diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h new file mode 100644 index 00000000000..26bd6ba74e1 --- /dev/null +++ b/arch/x86/kvm/kvm_timer.h @@ -0,0 +1,18 @@ + +struct kvm_timer { + struct hrtimer timer; + s64 period; /* unit: ns */ + atomic_t pending; /* accumulated triggered timers */ + bool reinject; + struct kvm_timer_ops *t_ops; + struct kvm *kvm; + int vcpu_id; +}; + +struct kvm_timer_ops { + bool (*is_periodic)(struct kvm_timer *); +}; + + +enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); + diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f0b67f2cdd6..ae99d83f81a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -196,20 +196,15 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, + int vector, int level, int trig_mode); + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) { struct kvm_lapic *apic = vcpu->arch.apic; - if (!apic_test_and_set_irr(vec, apic)) { - /* a new pending irq is set in IRR */ - if (trig) - apic_set_vector(vec, apic->regs + APIC_TMR); - else - apic_clear_vector(vec, apic->regs + APIC_TMR); - kvm_vcpu_kick(apic->vcpu); - return 1; - } - return 0; + return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, + irq->level, irq->trig_mode); } static inline int apic_find_highest_isr(struct kvm_lapic *apic) @@ -250,7 +245,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) { - return kvm_apic_id(apic) == dest; + return dest == 0xff || kvm_apic_id(apic) == dest; } int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) @@ -279,37 +274,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) return result; } -static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, int dest, int dest_mode) { int result = 0; struct kvm_lapic *target = vcpu->arch.apic; apic_debug("target %p, source %p, dest 0x%x, " - "dest_mode 0x%x, short_hand 0x%x", + "dest_mode 0x%x, short_hand 0x%x\n", target, source, dest, dest_mode, short_hand); ASSERT(!target); switch (short_hand) { case APIC_DEST_NOSHORT: - if (dest_mode == 0) { + if (dest_mode == 0) /* Physical mode. */ - if ((dest == 0xFF) || (dest == kvm_apic_id(target))) - result = 1; - } else + result = kvm_apic_match_physical_addr(target, dest); + else /* Logical mode. */ result = kvm_apic_match_logical_addr(target, dest); break; case APIC_DEST_SELF: - if (target == source) - result = 1; + result = (target == source); break; case APIC_DEST_ALLINC: result = 1; break; case APIC_DEST_ALLBUT: - if (target != source) - result = 1; + result = (target != source); break; default: printk(KERN_WARNING "Bad dest shorthand value %x\n", @@ -327,20 +319,22 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode) { - int orig_irr, result = 0; + int result = 0; struct kvm_vcpu *vcpu = apic->vcpu; switch (delivery_mode) { - case APIC_DM_FIXED: case APIC_DM_LOWEST: + vcpu->arch.apic_arb_prio++; + case APIC_DM_FIXED: /* FIXME add logic for vcpu on reset */ if (unlikely(!apic_enabled(apic))) break; - orig_irr = apic_test_and_set_irr(vector, apic); - if (orig_irr && trig_mode) { - apic_debug("level trig mode repeatedly for vector %d", - vector); + result = !apic_test_and_set_irr(vector, apic); + if (!result) { + if (trig_mode) + apic_debug("level trig mode repeatedly for " + "vector %d", vector); break; } @@ -349,10 +343,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_set_vector(vector, apic->regs + APIC_TMR); } else apic_clear_vector(vector, apic->regs + APIC_TMR); - kvm_vcpu_kick(vcpu); - - result = (orig_irr == 0); break; case APIC_DM_REMRD: @@ -364,12 +355,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; case APIC_DM_NMI: + result = 1; kvm_inject_nmi(vcpu); kvm_vcpu_kick(vcpu); break; case APIC_DM_INIT: if (level) { + result = 1; if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) printk(KERN_DEBUG "INIT on a runnable vcpu %d\n", @@ -386,6 +379,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_debug("SIPI to vcpu %d vector 0x%02x\n", vcpu->vcpu_id, vector); if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { + result = 1; vcpu->arch.sipi_vector = vector; vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; kvm_vcpu_kick(vcpu); @@ -408,43 +402,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, return result; } -static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, - unsigned long bitmap) -{ - int last; - int next; - struct kvm_lapic *apic = NULL; - - last = kvm->arch.round_robin_prev_vcpu; - next = last; - - do { - if (++next == KVM_MAX_VCPUS) - next = 0; - if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap)) - continue; - apic = kvm->vcpus[next]->arch.apic; - if (apic && apic_enabled(apic)) - break; - apic = NULL; - } while (next != last); - kvm->arch.round_robin_prev_vcpu = next; - - if (!apic) - printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); - - return apic; -} - -struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, - unsigned long bitmap) +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) { - struct kvm_lapic *apic; - - apic = kvm_apic_round_robin(kvm, vector, bitmap); - if (apic) - return apic->vcpu; - return NULL; + return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; } static void apic_set_eoi(struct kvm_lapic *apic) @@ -472,47 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic) { u32 icr_low = apic_get_reg(apic, APIC_ICR); u32 icr_high = apic_get_reg(apic, APIC_ICR2); + struct kvm_lapic_irq irq; - unsigned int dest = GET_APIC_DEST_FIELD(icr_high); - unsigned int short_hand = icr_low & APIC_SHORT_MASK; - unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; - unsigned int level = icr_low & APIC_INT_ASSERT; - unsigned int dest_mode = icr_low & APIC_DEST_MASK; - unsigned int delivery_mode = icr_low & APIC_MODE_MASK; - unsigned int vector = icr_low & APIC_VECTOR_MASK; - - struct kvm_vcpu *target; - struct kvm_vcpu *vcpu; - unsigned long lpr_map = 0; - int i; + irq.vector = icr_low & APIC_VECTOR_MASK; + irq.delivery_mode = icr_low & APIC_MODE_MASK; + irq.dest_mode = icr_low & APIC_DEST_MASK; + irq.level = icr_low & APIC_INT_ASSERT; + irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; + irq.shorthand = icr_low & APIC_SHORT_MASK; + irq.dest_id = GET_APIC_DEST_FIELD(icr_high); apic_debug("icr_high 0x%x, icr_low 0x%x, " "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", - icr_high, icr_low, short_hand, dest, - trig_mode, level, dest_mode, delivery_mode, vector); - - for (i = 0; i < KVM_MAX_VCPUS; i++) { - vcpu = apic->vcpu->kvm->vcpus[i]; - if (!vcpu) - continue; - - if (vcpu->arch.apic && - apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { - if (delivery_mode == APIC_DM_LOWEST) - set_bit(vcpu->vcpu_id, &lpr_map); - else - __apic_accept_irq(vcpu->arch.apic, delivery_mode, - vector, level, trig_mode); - } - } + icr_high, icr_low, irq.shorthand, irq.dest_id, + irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, + irq.vector); - if (delivery_mode == APIC_DM_LOWEST) { - target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map); - if (target != NULL) - __apic_accept_irq(target->arch.apic, delivery_mode, - vector, level, trig_mode); - } + kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); } static u32 apic_get_tmcct(struct kvm_lapic *apic) @@ -527,12 +464,13 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) if (apic_get_reg(apic, APIC_TMICT) == 0) return 0; - remaining = hrtimer_expires_remaining(&apic->timer.dev); + remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); if (ktime_to_ns(remaining) < 0) remaining = ktime_set(0, 0); - ns = mod_64(ktime_to_ns(remaining), apic->timer.period); - tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); + ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); + tmcct = div64_u64(ns, + (APIC_BUS_CYCLE_NS * apic->divide_count)); return tmcct; } @@ -619,25 +557,25 @@ static void update_divide_count(struct kvm_lapic *apic) tdcr = apic_get_reg(apic, APIC_TDCR); tmp1 = tdcr & 0xf; tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; - apic->timer.divide_count = 0x1 << (tmp2 & 0x7); + apic->divide_count = 0x1 << (tmp2 & 0x7); apic_debug("timer divide count is 0x%x\n", - apic->timer.divide_count); + apic->divide_count); } static void start_apic_timer(struct kvm_lapic *apic) { - ktime_t now = apic->timer.dev.base->get_time(); + ktime_t now = apic->lapic_timer.timer.base->get_time(); - apic->timer.period = apic_get_reg(apic, APIC_TMICT) * - APIC_BUS_CYCLE_NS * apic->timer.divide_count; - atomic_set(&apic->timer.pending, 0); + apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * + APIC_BUS_CYCLE_NS * apic->divide_count; + atomic_set(&apic->lapic_timer.pending, 0); - if (!apic->timer.period) + if (!apic->lapic_timer.period) return; - hrtimer_start(&apic->timer.dev, - ktime_add_ns(now, apic->timer.period), + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, apic->lapic_timer.period), HRTIMER_MODE_ABS); apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" @@ -646,9 +584,9 @@ static void start_apic_timer(struct kvm_lapic *apic) "expire @ 0x%016" PRIx64 ".\n", __func__, APIC_BUS_CYCLE_NS, ktime_to_ns(now), apic_get_reg(apic, APIC_TMICT), - apic->timer.period, + apic->lapic_timer.period, ktime_to_ns(ktime_add_ns(now, - apic->timer.period))); + apic->lapic_timer.period))); } static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) @@ -730,7 +668,7 @@ static void apic_mmio_write(struct kvm_io_device *this, apic_set_reg(apic, APIC_LVTT + 0x10 * i, lvt_val | APIC_LVT_MASKED); } - atomic_set(&apic->timer.pending, 0); + atomic_set(&apic->lapic_timer.pending, 0); } break; @@ -762,7 +700,7 @@ static void apic_mmio_write(struct kvm_io_device *this, break; case APIC_TMICT: - hrtimer_cancel(&apic->timer.dev); + hrtimer_cancel(&apic->lapic_timer.timer); apic_set_reg(apic, APIC_TMICT, val); start_apic_timer(apic); return; @@ -802,7 +740,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) if (!vcpu->arch.apic) return; - hrtimer_cancel(&vcpu->arch.apic->timer.dev); + hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); if (vcpu->arch.apic->regs_page) __free_page(vcpu->arch.apic->regs_page); @@ -880,7 +818,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) ASSERT(apic != NULL); /* Stop the timer in case it's a reset to an active apic */ - hrtimer_cancel(&apic->timer.dev); + hrtimer_cancel(&apic->lapic_timer.timer); apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); apic_set_reg(apic, APIC_LVR, APIC_VERSION); @@ -905,11 +843,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } update_divide_count(apic); - atomic_set(&apic->timer.pending, 0); + atomic_set(&apic->lapic_timer.pending, 0); if (vcpu->vcpu_id == 0) vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; apic_update_ppr(apic); + vcpu->arch.apic_arb_prio = 0; + apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, vcpu, kvm_apic_id(apic), @@ -917,16 +857,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_reset); -int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +bool kvm_apic_present(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = vcpu->arch.apic; - int ret = 0; - - if (!apic) - return 0; - ret = apic_enabled(apic); + return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); +} - return ret; +int kvm_lapic_enabled(struct kvm_vcpu *vcpu) +{ + return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); } EXPORT_SYMBOL_GPL(kvm_lapic_enabled); @@ -936,22 +874,11 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled); *---------------------------------------------------------------------- */ -/* TODO: make sure __apic_timer_fn runs in current pCPU */ -static int __apic_timer_fn(struct kvm_lapic *apic) +static bool lapic_is_periodic(struct kvm_timer *ktimer) { - int result = 0; - wait_queue_head_t *q = &apic->vcpu->wq; - - if(!atomic_inc_and_test(&apic->timer.pending)) - set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); - if (waitqueue_active(q)) - wake_up_interruptible(q); - - if (apic_lvtt_period(apic)) { - result = 1; - hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period); - } - return result; + struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, + lapic_timer); + return apic_lvtt_period(apic); } int apic_has_pending_timer(struct kvm_vcpu *vcpu) @@ -959,7 +886,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) struct kvm_lapic *lapic = vcpu->arch.apic; if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) - return atomic_read(&lapic->timer.pending); + return atomic_read(&lapic->lapic_timer.pending); return 0; } @@ -986,20 +913,9 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) kvm_apic_local_deliver(apic, APIC_LVT0); } -static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) -{ - struct kvm_lapic *apic; - int restart_timer = 0; - - apic = container_of(data, struct kvm_lapic, timer.dev); - - restart_timer = __apic_timer_fn(apic); - - if (restart_timer) - return HRTIMER_RESTART; - else - return HRTIMER_NORESTART; -} +static struct kvm_timer_ops lapic_timer_ops = { + .is_periodic = lapic_is_periodic, +}; int kvm_create_lapic(struct kvm_vcpu *vcpu) { @@ -1024,8 +940,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) memset(apic->regs, 0, PAGE_SIZE); apic->vcpu = vcpu; - hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - apic->timer.dev.function = apic_timer_fn; + hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, + HRTIMER_MODE_ABS); + apic->lapic_timer.timer.function = kvm_timer_fn; + apic->lapic_timer.t_ops = &lapic_timer_ops; + apic->lapic_timer.kvm = vcpu->kvm; + apic->lapic_timer.vcpu_id = vcpu->vcpu_id; + apic->base_address = APIC_DEFAULT_PHYS_BASE; vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; @@ -1078,9 +999,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - if (apic && atomic_read(&apic->timer.pending) > 0) { + if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { if (kvm_apic_local_deliver(apic, APIC_LVTT)) - atomic_dec(&apic->timer.pending); + atomic_dec(&apic->lapic_timer.pending); } } @@ -1106,7 +1027,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) MSR_IA32_APICBASE_BASE; apic_set_reg(apic, APIC_LVR, APIC_VERSION); apic_update_ppr(apic); - hrtimer_cancel(&apic->timer.dev); + hrtimer_cancel(&apic->lapic_timer.timer); update_divide_count(apic); start_apic_timer(apic); } @@ -1119,7 +1040,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) if (!apic) return; - timer = &apic->timer.dev; + timer = &apic->lapic_timer.timer; if (hrtimer_cancel(timer)) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 45ab6ee7120..a587f8349c4 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -2,18 +2,15 @@ #define __KVM_X86_LAPIC_H #include "iodev.h" +#include "kvm_timer.h" #include <linux/kvm_host.h> struct kvm_lapic { unsigned long base_address; struct kvm_io_device dev; - struct { - atomic_t pending; - s64 period; /* unit: ns */ - u32 divide_count; - struct hrtimer dev; - } timer; + struct kvm_timer lapic_timer; + u32 divide_count; struct kvm_vcpu *vcpu; struct page *regs_page; void *regs; @@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); -int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); int kvm_lapic_enabled(struct kvm_vcpu *vcpu); +bool kvm_apic_present(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 32cf11e5728..5c3d6e81a7d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644); #define PFERR_PRESENT_MASK (1U << 0) #define PFERR_WRITE_MASK (1U << 1) #define PFERR_USER_MASK (1U << 2) +#define PFERR_RSVD_MASK (1U << 3) #define PFERR_FETCH_MASK (1U << 4) #define PT_DIRECTORY_LEVEL 2 @@ -177,7 +178,11 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; -static u64 __read_mostly shadow_mt_mask; + +static inline u64 rsvd_bits(int s, int e) +{ + return ((1ULL << (e - s + 1)) - 1) << s; +} void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) { @@ -193,14 +198,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte) EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; - shadow_mt_mask = mt_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); @@ -219,11 +223,6 @@ static int is_nx(struct kvm_vcpu *vcpu) return vcpu->arch.shadow_efer & EFER_NX; } -static int is_present_pte(unsigned long pte) -{ - return pte & PT_PRESENT_MASK; -} - static int is_shadow_present_pte(u64 pte) { return pte != shadow_trap_nonpresent_pte @@ -1074,18 +1073,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) return NULL; } -static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) -{ - list_del(&sp->oos_link); - --kvm->stat.mmu_unsync_global; -} - static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) { WARN_ON(!sp->unsync); sp->unsync = 0; - if (sp->global) - kvm_unlink_unsync_global(kvm, sp); --kvm->stat.mmu_unsync; } @@ -1248,7 +1239,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); sp->gfn = gfn; sp->role = role; - sp->global = 0; hlist_add_head(&sp->hash_link, bucket); if (!direct) { if (rmap_write_protect(vcpu->kvm, gfn)) @@ -1616,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state, return mtrr_state->def_type; } -static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) +u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) { u8 mtrr; @@ -1626,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) mtrr = MTRR_TYPE_WRBACK; return mtrr; } +EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { @@ -1646,11 +1637,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; - if (sp->global) { - list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); - ++vcpu->kvm->stat.mmu_unsync_global; - } else - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(vcpu, sp); mmu_convert_notrap(sp); return 0; @@ -1677,21 +1664,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pte_access, int user_fault, int write_fault, int dirty, int largepage, - int global, gfn_t gfn, pfn_t pfn, bool speculative, + gfn_t gfn, pfn_t pfn, bool speculative, bool can_unsync) { u64 spte; int ret = 0; - u64 mt_mask = shadow_mt_mask; - struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); - - if (!global && sp->global) { - sp->global = 0; - if (sp->unsync) { - kvm_unlink_unsync_global(vcpu->kvm, sp); - kvm_mmu_mark_parents_unsync(vcpu, sp); - } - } /* * We don't set the accessed bit, since we sometimes want to see @@ -1711,16 +1688,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, spte |= shadow_user_mask; if (largepage) spte |= PT_PAGE_SIZE_MASK; - if (mt_mask) { - if (!kvm_is_mmio_pfn(pfn)) { - mt_mask = get_memory_type(vcpu, gfn) << - kvm_x86_ops->get_mt_mask_shift(); - mt_mask |= VMX_EPT_IGMT_BIT; - } else - mt_mask = MTRR_TYPE_UNCACHABLE << - kvm_x86_ops->get_mt_mask_shift(); - spte |= mt_mask; - } + if (tdp_enabled) + spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, + kvm_is_mmio_pfn(pfn)); spte |= (u64)pfn << PAGE_SHIFT; @@ -1765,8 +1735,8 @@ set_pte: static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, unsigned pt_access, unsigned pte_access, int user_fault, int write_fault, int dirty, - int *ptwrite, int largepage, int global, - gfn_t gfn, pfn_t pfn, bool speculative) + int *ptwrite, int largepage, gfn_t gfn, + pfn_t pfn, bool speculative) { int was_rmapped = 0; int was_writeble = is_writeble_pte(*shadow_pte); @@ -1795,7 +1765,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, was_rmapped = 1; } if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, - dirty, largepage, global, gfn, pfn, speculative, true)) { + dirty, largepage, gfn, pfn, speculative, true)) { if (write_fault) *ptwrite = 1; kvm_x86_ops->tlb_flush(vcpu); @@ -1843,7 +1813,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, 0, write, 1, &pt_write, - largepage, 0, gfn, pfn, false); + largepage, gfn, pfn, false); ++vcpu->stat.pf_fixed; break; } @@ -1942,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu.root_hpa = INVALID_PAGE; } -static void mmu_alloc_roots(struct kvm_vcpu *vcpu) +static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) +{ + int ret = 0; + + if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + ret = 1; + } + + return ret; +} + +static int mmu_alloc_roots(struct kvm_vcpu *vcpu) { int i; gfn_t root_gfn; @@ -1957,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); if (tdp_enabled) direct = 1; + if (mmu_check_root(vcpu, root_gfn)) + return 1; sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, direct, ACC_ALL, NULL); root = __pa(sp->spt); ++sp->root_count; vcpu->arch.mmu.root_hpa = root; - return; + return 0; } direct = !is_paging(vcpu); if (tdp_enabled) @@ -1980,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; } else if (vcpu->arch.mmu.root_level == 0) root_gfn = 0; + if (mmu_check_root(vcpu, root_gfn)) + return 1; sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, direct, ACC_ALL, NULL); @@ -1988,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; } vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); + return 0; } static void mmu_sync_roots(struct kvm_vcpu *vcpu) @@ -2006,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) for (i = 0; i < 4; ++i) { hpa_t root = vcpu->arch.mmu.pae_root[i]; - if (root) { + if (root && VALID_PAGE(root)) { root &= PT64_BASE_ADDR_MASK; sp = page_header(root); mmu_sync_children(vcpu, sp); @@ -2014,15 +2001,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) } } -static void mmu_sync_global(struct kvm_vcpu *vcpu) -{ - struct kvm *kvm = vcpu->kvm; - struct kvm_mmu_page *sp, *n; - - list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) - kvm_sync_page(vcpu, sp); -} - void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) { spin_lock(&vcpu->kvm->mmu_lock); @@ -2030,13 +2008,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) spin_unlock(&vcpu->kvm->mmu_lock); } -void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) -{ - spin_lock(&vcpu->kvm->mmu_lock); - mmu_sync_global(vcpu); - spin_unlock(&vcpu->kvm->mmu_lock); -} - static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) { return vaddr; @@ -2151,6 +2122,14 @@ static void paging_free(struct kvm_vcpu *vcpu) nonpaging_free(vcpu); } +static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) +{ + int bit7; + + bit7 = (gpte >> 7) & 1; + return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; +} + #define PTTYPE 64 #include "paging_tmpl.h" #undef PTTYPE @@ -2159,6 +2138,59 @@ static void paging_free(struct kvm_vcpu *vcpu) #include "paging_tmpl.h" #undef PTTYPE +static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) +{ + struct kvm_mmu *context = &vcpu->arch.mmu; + int maxphyaddr = cpuid_maxphyaddr(vcpu); + u64 exb_bit_rsvd = 0; + + if (!is_nx(vcpu)) + exb_bit_rsvd = rsvd_bits(63, 63); + switch (level) { + case PT32_ROOT_LEVEL: + /* no rsvd bits for 2 level 4K page table entries */ + context->rsvd_bits_mask[0][1] = 0; + context->rsvd_bits_mask[0][0] = 0; + if (is_cpuid_PSE36()) + /* 36bits PSE 4MB page */ + context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); + else + /* 32 bits PSE 4MB page */ + context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); + context->rsvd_bits_mask[1][0] = ~0ull; + break; + case PT32E_ROOT_LEVEL: + context->rsvd_bits_mask[0][2] = + rsvd_bits(maxphyaddr, 63) | + rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ + context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 62); /* PDE */ + context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 62); /* PTE */ + context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 62) | + rsvd_bits(13, 20); /* large page */ + context->rsvd_bits_mask[1][0] = ~0ull; + break; + case PT64_ROOT_LEVEL: + context->rsvd_bits_mask[0][3] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0][2] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); + context->rsvd_bits_mask[0][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51); + context->rsvd_bits_mask[0][0] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51); + context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; + context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2]; + context->rsvd_bits_mask[1][1] = exb_bit_rsvd | + rsvd_bits(maxphyaddr, 51) | + rsvd_bits(13, 20); /* large page */ + context->rsvd_bits_mask[1][0] = ~0ull; + break; + } +} + static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) { struct kvm_mmu *context = &vcpu->arch.mmu; @@ -2179,6 +2211,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) static int paging64_init_context(struct kvm_vcpu *vcpu) { + reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); } @@ -2186,6 +2219,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = &vcpu->arch.mmu; + reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); context->new_cr3 = paging_new_cr3; context->page_fault = paging32_page_fault; context->gva_to_gpa = paging32_gva_to_gpa; @@ -2201,6 +2235,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) static int paging32E_init_context(struct kvm_vcpu *vcpu) { + reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); } @@ -2221,12 +2256,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->gva_to_gpa = nonpaging_gva_to_gpa; context->root_level = 0; } else if (is_long_mode(vcpu)) { + reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); context->gva_to_gpa = paging64_gva_to_gpa; context->root_level = PT64_ROOT_LEVEL; } else if (is_pae(vcpu)) { + reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); context->gva_to_gpa = paging64_gva_to_gpa; context->root_level = PT32E_ROOT_LEVEL; } else { + reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); context->gva_to_gpa = paging32_gva_to_gpa; context->root_level = PT32_ROOT_LEVEL; } @@ -2290,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) goto out; spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); - mmu_alloc_roots(vcpu); + r = mmu_alloc_roots(vcpu); mmu_sync_roots(vcpu); spin_unlock(&vcpu->kvm->mmu_lock); + if (r) + goto out; kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); kvm_mmu_flush_tlb(vcpu); out: @@ -2638,14 +2678,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); static void free_mmu_pages(struct kvm_vcpu *vcpu) { - struct kvm_mmu_page *sp; - - while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) { - sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, - struct kvm_mmu_page, link); - kvm_mmu_zap_page(vcpu->kvm, sp); - cond_resched(); - } free_page((unsigned long)vcpu->arch.mmu.pae_root); } @@ -2710,7 +2742,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) { struct kvm_mmu_page *sp; - spin_lock(&kvm->mmu_lock); list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { int i; u64 *pt; @@ -2725,7 +2756,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) pt[i] &= ~PT_WRITABLE_MASK; } kvm_flush_remote_tlbs(kvm); - spin_unlock(&kvm->mmu_lock); } void kvm_mmu_zap_all(struct kvm *kvm) @@ -3007,11 +3037,13 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, " in nonleaf level: levels %d gva %lx" " level %d pte %llx\n", audit_msg, vcpu->arch.mmu.root_level, va, level, ent); - - audit_mappings_page(vcpu, ent, va, level - 1); + else + audit_mappings_page(vcpu, ent, va, level - 1); } else { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); - hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; + gfn_t gfn = gpa >> PAGE_SHIFT; + pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); + hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; if (is_shadow_present_pte(ent) && (ent & PT64_BASE_ADDR_MASK) != hpa) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index eaab2145f62..3494a2fb136 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return vcpu->arch.cr0 & X86_CR0_PG; } +static inline int is_present_pte(unsigned long pte) +{ + return pte & PT_PRESENT_MASK; +} + #endif diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bd70206c56..258e4591e1c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, gfn_t table_gfn; unsigned index, pt_access, pte_access; gpa_t pte_gpa; + int rsvd_fault = 0; pgprintk("%s: addr %lx\n", __func__, addr); walk: @@ -157,6 +158,10 @@ walk: if (!is_present_pte(pte)) goto not_present; + rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); + if (rsvd_fault) + goto access_error; + if (write_fault && !is_writeble_pte(pte)) if (user_fault || is_write_protection(vcpu)) goto access_error; @@ -209,7 +214,6 @@ walk: if (ret) goto walk; pte |= PT_DIRTY_MASK; - kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); walker->ptes[walker->level - 1] = pte; } @@ -233,6 +237,8 @@ err: walker->error_code |= PFERR_USER_MASK; if (fetch_fault) walker->error_code |= PFERR_FETCH_MASK; + if (rsvd_fault) + walker->error_code |= PFERR_RSVD_MASK; return 0; } @@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, kvm_get_pfn(pfn); mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, gpte & PT_DIRTY_MASK, NULL, largepage, - gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), - pfn, true); + gpte_to_gfn(gpte), pfn, true); } /* @@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, user_fault, write_fault, gw->ptes[gw->level-1] & PT_DIRTY_MASK, ptwrite, largepage, - gw->ptes[gw->level-1] & PT_GLOBAL_MASK, gw->gfn, pfn, false); break; } @@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, return r; /* - * Look up the shadow pte for the faulting address. + * Look up the guest pte for the faulting address. */ r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, fetch_fault); @@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) nr_present++; pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, - is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, + is_dirty_pte(gpte), 0, gfn, spte_to_pfn(sp->spt[i]), true, false); } diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1f8510c51d6..71510e07e69 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -19,6 +19,7 @@ #include "irq.h" #include "mmu.h" #include "kvm_cache_regs.h" +#include "x86.h" #include <linux/module.h> #include <linux/kernel.h> @@ -69,7 +70,6 @@ module_param(npt, int, S_IRUGO); static int nested = 0; module_param(nested, int, S_IRUGO); -static void kvm_reput_irq(struct vcpu_svm *svm); static void svm_flush_tlb(struct kvm_vcpu *vcpu); static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); @@ -132,24 +132,6 @@ static inline u32 svm_has(u32 feat) return svm_features & feat; } -static inline u8 pop_irq(struct kvm_vcpu *vcpu) -{ - int word_index = __ffs(vcpu->arch.irq_summary); - int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); - int irq = word_index * BITS_PER_LONG + bit_index; - - clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); - if (!vcpu->arch.irq_pending[word_index]) - clear_bit(word_index, &vcpu->arch.irq_summary); - return irq; -} - -static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) -{ - set_bit(irq, vcpu->arch.irq_pending); - set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); -} - static inline void clgi(void) { asm volatile (__ex(SVM_CLGI)); @@ -214,17 +196,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, svm->vmcb->control.event_inj_err = error_code; } -static bool svm_exception_injected(struct kvm_vcpu *vcpu) +static int is_external_interrupt(u32 info) +{ + info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; + return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); +} + +static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) { struct vcpu_svm *svm = to_svm(vcpu); + u32 ret = 0; - return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID); + if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) + ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; + return ret & mask; } -static int is_external_interrupt(u32 info) +static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) { - info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; - return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); + struct vcpu_svm *svm = to_svm(vcpu); + + if (mask == 0) + svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; + else + svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; + } static void skip_emulated_instruction(struct kvm_vcpu *vcpu) @@ -232,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (!svm->next_rip) { - printk(KERN_DEBUG "%s: NOP\n", __func__); + if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != + EMULATE_DONE) + printk(KERN_DEBUG "%s: NOP\n", __func__); return; } if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) @@ -240,9 +238,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) __func__, kvm_rip_read(vcpu), svm->next_rip); kvm_rip_write(vcpu, svm->next_rip); - svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; - - vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK); + svm_set_interrupt_shadow(vcpu, 0); } static int has_svm(void) @@ -830,6 +826,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, if (!var->unusable) var->type |= 0x1; break; + case VCPU_SREG_SS: + /* On AMD CPUs sometimes the DB bit in the segment + * descriptor is left as 1, although the whole segment has + * been made unusable. Clear it here to pass an Intel VMX + * entry check when cross vendor migrating. + */ + if (var->unusable) + var->db = 0; + break; } } @@ -960,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, } -static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +static void update_db_intercept(struct kvm_vcpu *vcpu) { - int old_debug = vcpu->guest_debug; struct vcpu_svm *svm = to_svm(vcpu); - vcpu->guest_debug = dbg->control; - svm->vmcb->control.intercept_exceptions &= ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); + + if (vcpu->arch.singlestep) + svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); + if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { if (vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) @@ -979,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) 1 << BP_VECTOR; } else vcpu->guest_debug = 0; +} + +static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) +{ + int old_debug = vcpu->guest_debug; + struct vcpu_svm *svm = to_svm(vcpu); + + vcpu->guest_debug = dbg->control; + + update_db_intercept(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; @@ -993,16 +1009,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) return 0; } -static int svm_get_irq(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u32 exit_int_info = svm->vmcb->control.exit_int_info; - - if (is_external_interrupt(exit_int_info)) - return exit_int_info & SVM_EVTINJ_VEC_MASK; - return -1; -} - static void load_host_msrs(struct kvm_vcpu *vcpu) { #ifdef CONFIG_X86_64 @@ -1107,17 +1113,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { - u32 exit_int_info = svm->vmcb->control.exit_int_info; - struct kvm *kvm = svm->vcpu.kvm; u64 fault_address; u32 error_code; - bool event_injection = false; - - if (!irqchip_in_kernel(kvm) && - is_external_interrupt(exit_int_info)) { - event_injection = true; - push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); - } fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; @@ -1137,23 +1134,40 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) */ if (npt_enabled) svm_flush_tlb(&svm->vcpu); - - if (!npt_enabled && event_injection) - kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); + else { + if (kvm_event_needs_reinjection(&svm->vcpu)) + kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); + } return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (!(svm->vcpu.guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && + !svm->vcpu.arch.singlestep) { kvm_queue_exception(&svm->vcpu, DB_VECTOR); return 1; } - kvm_run->exit_reason = KVM_EXIT_DEBUG; - kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; - kvm_run->debug.arch.exception = DB_VECTOR; - return 0; + + if (svm->vcpu.arch.singlestep) { + svm->vcpu.arch.singlestep = false; + if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) + svm->vmcb->save.rflags &= + ~(X86_EFLAGS_TF | X86_EFLAGS_RF); + update_db_intercept(&svm->vcpu); + } + + if (svm->vcpu.guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ + kvm_run->exit_reason = KVM_EXIT_DEBUG; + kvm_run->debug.arch.pc = + svm->vmcb->save.cs.base + svm->vmcb->save.rip; + kvm_run->debug.arch.exception = DB_VECTOR; + return 0; + } + + return 1; } static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) @@ -1842,17 +1856,51 @@ static int task_switch_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { u16 tss_selector; + int reason; + int int_type = svm->vmcb->control.exit_int_info & + SVM_EXITINTINFO_TYPE_MASK; + int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; + uint32_t type = + svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; + uint32_t idt_v = + svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; tss_selector = (u16)svm->vmcb->control.exit_info_1; + if (svm->vmcb->control.exit_info_2 & (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) - return kvm_task_switch(&svm->vcpu, tss_selector, - TASK_SWITCH_IRET); - if (svm->vmcb->control.exit_info_2 & - (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) - return kvm_task_switch(&svm->vcpu, tss_selector, - TASK_SWITCH_JMP); - return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); + reason = TASK_SWITCH_IRET; + else if (svm->vmcb->control.exit_info_2 & + (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) + reason = TASK_SWITCH_JMP; + else if (idt_v) + reason = TASK_SWITCH_GATE; + else + reason = TASK_SWITCH_CALL; + + if (reason == TASK_SWITCH_GATE) { + switch (type) { + case SVM_EXITINTINFO_TYPE_NMI: + svm->vcpu.arch.nmi_injected = false; + break; + case SVM_EXITINTINFO_TYPE_EXEPT: + kvm_clear_exception_queue(&svm->vcpu); + break; + case SVM_EXITINTINFO_TYPE_INTR: + kvm_clear_interrupt_queue(&svm->vcpu); + break; + default: + break; + } + } + + if (reason != TASK_SWITCH_GATE || + int_type == SVM_EXITINTINFO_TYPE_SOFT || + (int_type == SVM_EXITINTINFO_TYPE_EXEPT && + (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) + skip_emulated_instruction(&svm->vcpu); + + return kvm_task_switch(&svm->vcpu, tss_selector, reason); } static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) @@ -1862,6 +1910,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) return 1; } +static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) +{ + ++svm->vcpu.stat.nmi_window_exits; + svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); + svm->vcpu.arch.hflags |= HF_IRET_MASK; + return 1; +} + static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) @@ -1879,8 +1935,14 @@ static int emulate_on_interception(struct vcpu_svm *svm, static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) { + u8 cr8_prev = kvm_get_cr8(&svm->vcpu); + /* instruction emulation calls kvm_set_cr8() */ emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); - if (irqchip_in_kernel(svm->vcpu.kvm)) + if (irqchip_in_kernel(svm->vcpu.kvm)) { + svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + return 1; + } + if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) return 1; kvm_run->exit_reason = KVM_EXIT_SET_TPR; return 0; @@ -2090,8 +2152,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm, * If the user space waits to inject interrupts, exit as soon as * possible */ - if (kvm_run->request_interrupt_window && - !svm->vcpu.arch.irq_summary) { + if (!irqchip_in_kernel(svm->vcpu.kvm) && + kvm_run->request_interrupt_window && + !kvm_cpu_has_interrupt(&svm->vcpu)) { ++svm->vcpu.stat.irq_window_exits; kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; @@ -2134,6 +2197,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, [SVM_EXIT_VINTR] = interrupt_window_interception, /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ [SVM_EXIT_CPUID] = cpuid_interception, + [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, @@ -2194,7 +2258,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) } } - kvm_reput_irq(svm); if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; @@ -2205,7 +2268,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) if (is_external_interrupt(svm->vmcb->control.exit_int_info) && exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && - exit_code != SVM_EXIT_NPF) + exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH) printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " "exit_code 0x%x\n", __func__, svm->vmcb->control.exit_int_info, @@ -2242,6 +2305,15 @@ static void pre_svm_run(struct vcpu_svm *svm) new_asid(svm, svm_data); } +static void svm_inject_nmi(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; + vcpu->arch.hflags |= HF_NMI_MASK; + svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); + ++vcpu->stat.nmi_injections; +} static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) { @@ -2257,134 +2329,71 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); } -static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) +static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr) { struct vcpu_svm *svm = to_svm(vcpu); - nested_svm_intr(svm); - - svm_inject_irq(svm, irq); + svm->vmcb->control.event_inj = nr | + SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; } -static void update_cr8_intercept(struct kvm_vcpu *vcpu) +static void svm_set_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; - int max_irr, tpr; - if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) - return; + nested_svm_intr(svm); - vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + svm_queue_irq(vcpu, vcpu->arch.interrupt.nr); +} - max_irr = kvm_lapic_find_highest_irr(vcpu); - if (max_irr == -1) - return; +static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) +{ + struct vcpu_svm *svm = to_svm(vcpu); - tpr = kvm_lapic_get_cr8(vcpu) << 4; + if (irr == -1) + return; - if (tpr >= (max_irr & 0xf0)) - vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; + if (tpr >= irr) + svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; } -static void svm_intr_assist(struct kvm_vcpu *vcpu) +static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - int intr_vector = -1; - - if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && - ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { - intr_vector = vmcb->control.exit_int_info & - SVM_EVTINJ_VEC_MASK; - vmcb->control.exit_int_info = 0; - svm_inject_irq(svm, intr_vector); - goto out; - } - - if (vmcb->control.int_ctl & V_IRQ_MASK) - goto out; - - if (!kvm_cpu_has_interrupt(vcpu)) - goto out; - - if (nested_svm_intr(svm)) - goto out; - - if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) - goto out; - - if (!(vmcb->save.rflags & X86_EFLAGS_IF) || - (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || - (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { - /* unable to deliver irq, set pending irq */ - svm_set_vintr(svm); - svm_inject_irq(svm, 0x0); - goto out; - } - /* Okay, we can deliver the interrupt: grab it and update PIC state. */ - intr_vector = kvm_cpu_get_interrupt(vcpu); - svm_inject_irq(svm, intr_vector); -out: - update_cr8_intercept(vcpu); + return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && + !(svm->vcpu.arch.hflags & HF_NMI_MASK); } -static void kvm_reput_irq(struct vcpu_svm *svm) +static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) { - struct vmcb_control_area *control = &svm->vmcb->control; - - if ((control->int_ctl & V_IRQ_MASK) - && !irqchip_in_kernel(svm->vcpu.kvm)) { - control->int_ctl &= ~V_IRQ_MASK; - push_irq(&svm->vcpu, control->int_vector); - } - - svm->vcpu.arch.interrupt_window_open = - !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && - (svm->vcpu.arch.hflags & HF_GIF_MASK); + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb; + return (vmcb->save.rflags & X86_EFLAGS_IF) && + !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && + (svm->vcpu.arch.hflags & HF_GIF_MASK); } -static void svm_do_inject_vector(struct vcpu_svm *svm) +static void enable_irq_window(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; - int word_index = __ffs(vcpu->arch.irq_summary); - int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); - int irq = word_index * BITS_PER_LONG + bit_index; - - clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); - if (!vcpu->arch.irq_pending[word_index]) - clear_bit(word_index, &vcpu->arch.irq_summary); - svm_inject_irq(svm, irq); + svm_set_vintr(to_svm(vcpu)); + svm_inject_irq(to_svm(vcpu), 0x0); } -static void do_interrupt_requests(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static void enable_nmi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb_control_area *control = &svm->vmcb->control; - - if (nested_svm_intr(svm)) - return; - svm->vcpu.arch.interrupt_window_open = - (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && - (svm->vmcb->save.rflags & X86_EFLAGS_IF) && - (svm->vcpu.arch.hflags & HF_GIF_MASK)); + if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) + == HF_NMI_MASK) + return; /* IRET will cause a vm exit */ - if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) - /* - * If interrupts enabled, and not blocked by sti or mov ss. Good. - */ - svm_do_inject_vector(svm); - - /* - * Interrupts blocked. Wait for unblock. - */ - if (!svm->vcpu.arch.interrupt_window_open && - (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) - svm_set_vintr(svm); - else - svm_clear_vintr(svm); + /* Something prevents NMI from been injected. Single step over + possible problem (IRET or exception injection or interrupt + shadow) */ + vcpu->arch.singlestep = true; + svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); + update_db_intercept(vcpu); } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -2407,7 +2416,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; - kvm_lapic_set_tpr(vcpu, cr8); + kvm_set_cr8(vcpu, cr8); } } @@ -2416,14 +2425,54 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; - if (!irqchip_in_kernel(vcpu->kvm)) - return; - cr8 = kvm_get_cr8(vcpu); svm->vmcb->control.int_ctl &= ~V_TPR_MASK; svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; } +static void svm_complete_interrupts(struct vcpu_svm *svm) +{ + u8 vector; + int type; + u32 exitintinfo = svm->vmcb->control.exit_int_info; + + if (svm->vcpu.arch.hflags & HF_IRET_MASK) + svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); + + svm->vcpu.arch.nmi_injected = false; + kvm_clear_exception_queue(&svm->vcpu); + kvm_clear_interrupt_queue(&svm->vcpu); + + if (!(exitintinfo & SVM_EXITINTINFO_VALID)) + return; + + vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; + type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; + + switch (type) { + case SVM_EXITINTINFO_TYPE_NMI: + svm->vcpu.arch.nmi_injected = true; + break; + case SVM_EXITINTINFO_TYPE_EXEPT: + /* In case of software exception do not reinject an exception + vector, but re-execute and instruction instead */ + if (kvm_exception_is_soft(vector)) + break; + if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { + u32 err = svm->vmcb->control.exit_int_info_err; + kvm_queue_exception_e(&svm->vcpu, vector, err); + + } else + kvm_queue_exception(&svm->vcpu, vector); + break; + case SVM_EXITINTINFO_TYPE_INTR: + kvm_queue_interrupt(&svm->vcpu, vector, false); + break; + default: + break; + } +} + #ifdef CONFIG_X86_64 #define R "r" #else @@ -2552,6 +2601,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) sync_cr8_to_lapic(vcpu); svm->next_rip = 0; + + svm_complete_interrupts(svm); } #undef R @@ -2617,7 +2668,7 @@ static int get_npt_level(void) #endif } -static int svm_get_mt_mask_shift(void) +static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; } @@ -2667,17 +2718,21 @@ static struct kvm_x86_ops svm_x86_ops = { .run = svm_vcpu_run, .handle_exit = handle_exit, .skip_emulated_instruction = skip_emulated_instruction, + .set_interrupt_shadow = svm_set_interrupt_shadow, + .get_interrupt_shadow = svm_get_interrupt_shadow, .patch_hypercall = svm_patch_hypercall, - .get_irq = svm_get_irq, .set_irq = svm_set_irq, + .set_nmi = svm_inject_nmi, .queue_exception = svm_queue_exception, - .exception_injected = svm_exception_injected, - .inject_pending_irq = svm_intr_assist, - .inject_pending_vectors = do_interrupt_requests, + .interrupt_allowed = svm_interrupt_allowed, + .nmi_allowed = svm_nmi_allowed, + .enable_nmi_window = enable_nmi_window, + .enable_irq_window = enable_irq_window, + .update_cr8_intercept = update_cr8_intercept, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, - .get_mt_mask_shift = svm_get_mt_mask_shift, + .get_mt_mask = svm_get_mt_mask, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c new file mode 100644 index 00000000000..86dbac072d0 --- /dev/null +++ b/arch/x86/kvm/timer.c @@ -0,0 +1,46 @@ +#include <linux/kvm_host.h> +#include <linux/kvm.h> +#include <linux/hrtimer.h> +#include <asm/atomic.h> +#include "kvm_timer.h" + +static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) +{ + int restart_timer = 0; + wait_queue_head_t *q = &vcpu->wq; + + /* FIXME: this code should not know anything about vcpus */ + if (!atomic_inc_and_test(&ktimer->pending)) + set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); + + if (!ktimer->reinject) + atomic_set(&ktimer->pending, 1); + + if (waitqueue_active(q)) + wake_up_interruptible(q); + + if (ktimer->t_ops->is_periodic(ktimer)) { + hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); + restart_timer = 1; + } + + return restart_timer; +} + +enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) +{ + int restart_timer; + struct kvm_vcpu *vcpu; + struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); + + vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id]; + if (!vcpu) + return HRTIMER_NORESTART; + + restart_timer = __kvm_timer_fn(vcpu, ktimer); + if (restart_timer) + return HRTIMER_RESTART; + else + return HRTIMER_NORESTART; +} + diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bb481330716..32d6ae8fb60 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -32,26 +32,27 @@ #include <asm/desc.h> #include <asm/vmx.h> #include <asm/virtext.h> +#include <asm/mce.h> #define __ex(x) __kvm_handle_fault_on_reboot(x) MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -static int bypass_guest_pf = 1; -module_param(bypass_guest_pf, bool, 0); +static int __read_mostly bypass_guest_pf = 1; +module_param(bypass_guest_pf, bool, S_IRUGO); -static int enable_vpid = 1; -module_param(enable_vpid, bool, 0); +static int __read_mostly enable_vpid = 1; +module_param_named(vpid, enable_vpid, bool, 0444); -static int flexpriority_enabled = 1; -module_param(flexpriority_enabled, bool, 0); +static int __read_mostly flexpriority_enabled = 1; +module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); -static int enable_ept = 1; -module_param(enable_ept, bool, 0); +static int __read_mostly enable_ept = 1; +module_param_named(ept, enable_ept, bool, S_IRUGO); -static int emulate_invalid_guest_state = 0; -module_param(emulate_invalid_guest_state, bool, 0); +static int __read_mostly emulate_invalid_guest_state = 0; +module_param(emulate_invalid_guest_state, bool, S_IRUGO); struct vmcs { u32 revision_id; @@ -97,6 +98,7 @@ struct vcpu_vmx { int soft_vnmi_blocked; ktime_t entry_time; s64 vnmi_blocked_time; + u32 exit_reason; }; static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) @@ -111,9 +113,10 @@ static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); -static struct page *vmx_io_bitmap_a; -static struct page *vmx_io_bitmap_b; -static struct page *vmx_msr_bitmap; +static unsigned long *vmx_io_bitmap_a; +static unsigned long *vmx_io_bitmap_b; +static unsigned long *vmx_msr_bitmap_legacy; +static unsigned long *vmx_msr_bitmap_longmode; static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); static DEFINE_SPINLOCK(vmx_vpid_lock); @@ -213,70 +216,78 @@ static inline int is_external_interrupt(u32 intr_info) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } +static inline int is_machine_check(u32 intr_info) +{ + return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | + INTR_INFO_VALID_MASK)) == + (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); +} + static inline int cpu_has_vmx_msr_bitmap(void) { - return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); + return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; } static inline int cpu_has_vmx_tpr_shadow(void) { - return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); + return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; } static inline int vm_need_tpr_shadow(struct kvm *kvm) { - return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm))); + return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); } static inline int cpu_has_secondary_exec_ctrls(void) { - return (vmcs_config.cpu_based_exec_ctrl & - CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); + return vmcs_config.cpu_based_exec_ctrl & + CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; } static inline bool cpu_has_vmx_virtualize_apic_accesses(void) { - return flexpriority_enabled - && (vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; +} + +static inline bool cpu_has_vmx_flexpriority(void) +{ + return cpu_has_vmx_tpr_shadow() && + cpu_has_vmx_virtualize_apic_accesses(); } static inline int cpu_has_vmx_invept_individual_addr(void) { - return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); + return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); } static inline int cpu_has_vmx_invept_context(void) { - return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); + return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); } static inline int cpu_has_vmx_invept_global(void) { - return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); + return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); } static inline int cpu_has_vmx_ept(void) { - return (vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_ENABLE_EPT); -} - -static inline int vm_need_ept(void) -{ - return (cpu_has_vmx_ept() && enable_ept); + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_EPT; } static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) { - return ((cpu_has_vmx_virtualize_apic_accesses()) && - (irqchip_in_kernel(kvm))); + return flexpriority_enabled && + (cpu_has_vmx_virtualize_apic_accesses()) && + (irqchip_in_kernel(kvm)); } static inline int cpu_has_vmx_vpid(void) { - return (vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_ENABLE_VPID); + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_ENABLE_VPID; } static inline int cpu_has_virtual_nmis(void) @@ -284,6 +295,11 @@ static inline int cpu_has_virtual_nmis(void) return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; } +static inline bool report_flexpriority(void) +{ + return flexpriority_enabled; +} + static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) { int i; @@ -381,7 +397,7 @@ static inline void ept_sync_global(void) static inline void ept_sync_context(u64 eptp) { - if (vm_need_ept()) { + if (enable_ept) { if (cpu_has_vmx_invept_context()) __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); else @@ -391,7 +407,7 @@ static inline void ept_sync_context(u64 eptp) static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) { - if (vm_need_ept()) { + if (enable_ept) { if (cpu_has_vmx_invept_individual_addr()) __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, eptp, gpa); @@ -478,7 +494,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); + eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); if (!vcpu->fpu_active) eb |= 1u << NM_VECTOR; if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { @@ -488,9 +504,9 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) eb |= 1u << BP_VECTOR; } - if (vcpu->arch.rmode.active) + if (vcpu->arch.rmode.vm86_active) eb = ~0; - if (vm_need_ept()) + if (enable_ept) eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -724,29 +740,50 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { - if (vcpu->arch.rmode.active) + if (vcpu->arch.rmode.vm86_active) rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; vmcs_writel(GUEST_RFLAGS, rflags); } +static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +{ + u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + int ret = 0; + + if (interruptibility & GUEST_INTR_STATE_STI) + ret |= X86_SHADOW_INT_STI; + if (interruptibility & GUEST_INTR_STATE_MOV_SS) + ret |= X86_SHADOW_INT_MOV_SS; + + return ret & mask; +} + +static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) +{ + u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); + u32 interruptibility = interruptibility_old; + + interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); + + if (mask & X86_SHADOW_INT_MOV_SS) + interruptibility |= GUEST_INTR_STATE_MOV_SS; + if (mask & X86_SHADOW_INT_STI) + interruptibility |= GUEST_INTR_STATE_STI; + + if ((interruptibility != interruptibility_old)) + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); +} + static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { unsigned long rip; - u32 interruptibility; rip = kvm_rip_read(vcpu); rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); kvm_rip_write(vcpu, rip); - /* - * We emulated an instruction, so temporary interrupt blocking - * should be removed, if set. - */ - interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - if (interruptibility & 3) - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, - interruptibility & ~3); - vcpu->arch.interrupt_window_open = 1; + /* skipping an emulated instruction also counts */ + vmx_set_interrupt_shadow(vcpu, 0); } static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, @@ -760,7 +797,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, intr_info |= INTR_INFO_DELIVER_CODE_MASK; } - if (vcpu->arch.rmode.active) { + if (vcpu->arch.rmode.vm86_active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = nr; vmx->rmode.irq.rip = kvm_rip_read(vcpu); @@ -773,8 +810,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, return; } - if (nr == BP_VECTOR || nr == OF_VECTOR) { - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); + if (kvm_exception_is_soft(nr)) { + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmx->vcpu.arch.event_exit_inst_len); intr_info |= INTR_TYPE_SOFT_EXCEPTION; } else intr_info |= INTR_TYPE_HARD_EXCEPTION; @@ -782,11 +820,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); } -static bool vmx_exception_injected(struct kvm_vcpu *vcpu) -{ - return false; -} - /* * Swap MSR entry in host/guest MSR entry array. */ @@ -812,6 +845,7 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) static void setup_msrs(struct vcpu_vmx *vmx) { int save_nmsrs; + unsigned long *msr_bitmap; vmx_load_host_state(vmx); save_nmsrs = 0; @@ -847,6 +881,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) __find_msr_index(vmx, MSR_KERNEL_GS_BASE); #endif vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); + + if (cpu_has_vmx_msr_bitmap()) { + if (is_long_mode(&vmx->vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode; + else + msr_bitmap = vmx_msr_bitmap_legacy; + + vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); + } } /* @@ -1034,13 +1077,6 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) return 0; } -static int vmx_get_irq(struct kvm_vcpu *vcpu) -{ - if (!vcpu->arch.interrupt.pending) - return -1; - return vcpu->arch.interrupt.nr; -} - static __init int cpu_has_kvm_support(void) { return cpu_has_vmx(); @@ -1294,6 +1330,18 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); + if (!cpu_has_vmx_vpid()) + enable_vpid = 0; + + if (!cpu_has_vmx_ept()) + enable_ept = 0; + + if (!cpu_has_vmx_flexpriority()) + flexpriority_enabled = 0; + + if (!cpu_has_vmx_tpr_shadow()) + kvm_x86_ops->update_cr8_intercept = NULL; + return alloc_kvm_area(); } @@ -1324,7 +1372,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); vmx->emulation_required = 1; - vcpu->arch.rmode.active = 0; + vcpu->arch.rmode.vm86_active = 0; vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); @@ -1386,7 +1434,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); vmx->emulation_required = 1; - vcpu->arch.rmode.active = 1; + vcpu->arch.rmode.vm86_active = 1; vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); @@ -1485,7 +1533,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) static void vmx_flush_tlb(struct kvm_vcpu *vcpu) { vpid_sync_vcpu_all(to_vmx(vcpu)); - if (vm_need_ept()) + if (enable_ept) ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); } @@ -1555,10 +1603,10 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmx_fpu_deactivate(vcpu); - if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) + if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); - if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE)) + if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE)) enter_rmode(vcpu); #ifdef CONFIG_X86_64 @@ -1570,7 +1618,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } #endif - if (vm_need_ept()) + if (enable_ept) ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); vmcs_writel(CR0_READ_SHADOW, cr0); @@ -1599,7 +1647,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) u64 eptp; guest_cr3 = cr3; - if (vm_need_ept()) { + if (enable_ept) { eptp = construct_eptp(cr3); vmcs_write64(EPT_POINTER, eptp); ept_sync_context(eptp); @@ -1616,11 +1664,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { - unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? + unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ? KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); vcpu->arch.cr4 = cr4; - if (vm_need_ept()) + if (enable_ept) ept_update_paging_mode_cr4(&hw_cr4, vcpu); vmcs_writel(CR4_READ_SHADOW, cr4); @@ -1699,7 +1747,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; u32 ar; - if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) { + if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) { vcpu->arch.rmode.tr.selector = var->selector; vcpu->arch.rmode.tr.base = var->base; vcpu->arch.rmode.tr.limit = var->limit; @@ -1709,7 +1757,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vcpu->arch.rmode.active && var->s) { + if (vcpu->arch.rmode.vm86_active && var->s) { /* * Hack real-mode segments into vm86 compatibility. */ @@ -1982,7 +2030,7 @@ static int init_rmode_identity_map(struct kvm *kvm) pfn_t identity_map_pfn; u32 tmp; - if (!vm_need_ept()) + if (!enable_ept) return 1; if (unlikely(!kvm->arch.ept_identity_pagetable)) { printk(KERN_ERR "EPT: identity-mapping pagetable " @@ -2071,7 +2119,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx) int vpid; vmx->vpid = 0; - if (!enable_vpid || !cpu_has_vmx_vpid()) + if (!enable_vpid) return; spin_lock(&vmx_vpid_lock); vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); @@ -2082,9 +2130,9 @@ static void allocate_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) { - void *va; + int f = sizeof(unsigned long); if (!cpu_has_vmx_msr_bitmap()) return; @@ -2094,16 +2142,21 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) * have the write-low and read-high bitmap offsets the wrong way round. * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. */ - va = kmap(msr_bitmap); if (msr <= 0x1fff) { - __clear_bit(msr, va + 0x000); /* read-low */ - __clear_bit(msr, va + 0x800); /* write-low */ + __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ + __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { msr &= 0x1fff; - __clear_bit(msr, va + 0x400); /* read-high */ - __clear_bit(msr, va + 0xc00); /* write-high */ + __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ + __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ } - kunmap(msr_bitmap); +} + +static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) +{ + if (!longmode_only) + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); } /* @@ -2121,11 +2174,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) u32 exec_control; /* I/O */ - vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); - vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); + vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); + vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); if (cpu_has_vmx_msr_bitmap()) - vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); + vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ @@ -2141,7 +2194,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) CPU_BASED_CR8_LOAD_EXITING; #endif } - if (!vm_need_ept()) + if (!enable_ept) exec_control |= CPU_BASED_CR3_STORE_EXITING | CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_INVLPG_EXITING; @@ -2154,7 +2207,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; if (vmx->vpid == 0) exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; - if (!vm_need_ept()) + if (!enable_ept) exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } @@ -2273,7 +2326,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) goto out; } - vmx->vcpu.arch.rmode.active = 0; + vmx->vcpu.arch.rmode.vm86_active = 0; vmx->soft_vnmi_blocked = 0; @@ -2402,14 +2455,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); } -static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) +static void vmx_inject_irq(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + uint32_t intr; + int irq = vcpu->arch.interrupt.nr; KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); ++vcpu->stat.irq_injections; - if (vcpu->arch.rmode.active) { + if (vcpu->arch.rmode.vm86_active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = irq; vmx->rmode.irq.rip = kvm_rip_read(vcpu); @@ -2419,8 +2474,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); return; } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); + intr = irq | INTR_INFO_VALID_MASK; + if (vcpu->arch.interrupt.soft) { + intr |= INTR_TYPE_SOFT_INTR; + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmx->vcpu.arch.event_exit_inst_len); + } else + intr |= INTR_TYPE_EXT_INTR; + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); } static void vmx_inject_nmi(struct kvm_vcpu *vcpu) @@ -2441,7 +2502,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) } ++vcpu->stat.nmi_injections; - if (vcpu->arch.rmode.active) { + if (vcpu->arch.rmode.vm86_active) { vmx->rmode.irq.pending = true; vmx->rmode.irq.vector = NMI_VECTOR; vmx->rmode.irq.rip = kvm_rip_read(vcpu); @@ -2456,76 +2517,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); } -static void vmx_update_window_states(struct kvm_vcpu *vcpu) +static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) { - u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); - - vcpu->arch.nmi_window_open = - !(guest_intr & (GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS | - GUEST_INTR_STATE_NMI)); if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) - vcpu->arch.nmi_window_open = 0; - - vcpu->arch.interrupt_window_open = - ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(guest_intr & (GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS))); -} - -static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) -{ - int word_index = __ffs(vcpu->arch.irq_summary); - int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); - int irq = word_index * BITS_PER_LONG + bit_index; + return 0; - clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); - if (!vcpu->arch.irq_pending[word_index]) - clear_bit(word_index, &vcpu->arch.irq_summary); - kvm_queue_interrupt(vcpu, irq); + return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | + GUEST_INTR_STATE_NMI)); } -static void do_interrupt_requests(struct kvm_vcpu *vcpu, - struct kvm_run *kvm_run) +static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { - vmx_update_window_states(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS); - - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vcpu->arch.nmi_window_open) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_nmi_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (vcpu->arch.irq_summary - || kvm_run->request_interrupt_window) - enable_irq_window(vcpu); - return; - } - - if (vcpu->arch.interrupt_window_open) { - if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) - kvm_do_inject_irq(vcpu); - - if (vcpu->arch.interrupt.pending) - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - } - if (!vcpu->arch.interrupt_window_open && - (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) - enable_irq_window(vcpu); + return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -2585,6 +2591,31 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, return 0; } +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s, 0); +#endif +} + +static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + /* already handled by vcpu_run */ + return 1; +} + static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2596,17 +2627,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) vect_info = vmx->idt_vectoring_info; intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + if (is_machine_check(intr_info)) + return handle_machine_check(vcpu, kvm_run); + if ((vect_info & VECTORING_INFO_VALID_MASK) && !is_page_fault(intr_info)) printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " "intr info 0x%x\n", __func__, vect_info, intr_info); - if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { - int irq = vect_info & VECTORING_INFO_VECTOR_MASK; - set_bit(irq, vcpu->arch.irq_pending); - set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); - } - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) return 1; /* already handled by vmx_vcpu_run() */ @@ -2628,17 +2656,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); if (is_page_fault(intr_info)) { /* EPT won't cause page fault directly */ - if (vm_need_ept()) + if (enable_ept) BUG(); cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); - if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending) + if (kvm_event_needs_reinjection(vcpu)) kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } - if (vcpu->arch.rmode.active && + if (vcpu->arch.rmode.vm86_active && handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, error_code)) { if (vcpu->arch.halt_request) { @@ -2753,13 +2781,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); skip_emulated_instruction(vcpu); return 1; - case 8: - kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); - skip_emulated_instruction(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) - return 1; - kvm_run->exit_reason = KVM_EXIT_SET_TPR; - return 0; + case 8: { + u8 cr8_prev = kvm_get_cr8(vcpu); + u8 cr8 = kvm_register_read(vcpu, reg); + kvm_set_cr8(vcpu, cr8); + skip_emulated_instruction(vcpu); + if (irqchip_in_kernel(vcpu->kvm)) + return 1; + if (cr8_prev <= cr8) + return 1; + kvm_run->exit_reason = KVM_EXIT_SET_TPR; + return 0; + } }; break; case 2: /* clts */ @@ -2957,8 +2990,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, * If the user space waits to inject interrupts, exit as soon as * possible */ - if (kvm_run->request_interrupt_window && - !vcpu->arch.irq_summary) { + if (!irqchip_in_kernel(vcpu->kvm) && + kvm_run->request_interrupt_window && + !kvm_cpu_has_interrupt(vcpu)) { kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; return 0; } @@ -2980,7 +3014,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); kvm_mmu_invlpg(vcpu, exit_qualification); skip_emulated_instruction(vcpu); @@ -2996,11 +3030,11 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u64 exit_qualification; + unsigned long exit_qualification; enum emulation_result er; unsigned long offset; - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); offset = exit_qualification & 0xffful; er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); @@ -3019,22 +3053,41 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; u16 tss_selector; - int reason; + int reason, type, idt_v; + + idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); + type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); exit_qualification = vmcs_readl(EXIT_QUALIFICATION); reason = (u32)exit_qualification >> 30; - if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && - (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && - (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) - == INTR_TYPE_NMI_INTR) { - vcpu->arch.nmi_injected = false; - if (cpu_has_virtual_nmis()) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + if (reason == TASK_SWITCH_GATE && idt_v) { + switch (type) { + case INTR_TYPE_NMI_INTR: + vcpu->arch.nmi_injected = false; + if (cpu_has_virtual_nmis()) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + break; + case INTR_TYPE_EXT_INTR: + case INTR_TYPE_SOFT_INTR: + kvm_clear_interrupt_queue(vcpu); + break; + case INTR_TYPE_HARD_EXCEPTION: + case INTR_TYPE_SOFT_EXCEPTION: + kvm_clear_exception_queue(vcpu); + break; + default: + break; + } } tss_selector = exit_qualification; + if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && + type != INTR_TYPE_EXT_INTR && + type != INTR_TYPE_NMI_INTR)) + skip_emulated_instruction(vcpu); + if (!kvm_task_switch(vcpu, tss_selector, reason)) return 0; @@ -3051,11 +3104,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - u64 exit_qualification; + unsigned long exit_qualification; gpa_t gpa; int gla_validity; - exit_qualification = vmcs_read64(EXIT_QUALIFICATION); + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); if (exit_qualification & (1 << 6)) { printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); @@ -3067,7 +3120,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), - (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); + vmcs_readl(GUEST_LINEAR_ADDRESS)); printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", (long unsigned int)exit_qualification); kvm_run->exit_reason = KVM_EXIT_UNKNOWN; @@ -3150,6 +3203,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, + [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, }; static const int kvm_vmx_max_exit_handlers = @@ -3159,10 +3213,10 @@ static const int kvm_vmx_max_exit_handlers = * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { - u32 exit_reason = vmcs_read32(VM_EXIT_REASON); struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), @@ -3178,7 +3232,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* Access CR3 don't cause VMExit in paging mode, so we need * to sync with guest real CR3. */ - if (vm_need_ept() && is_paging(vcpu)) { + if (enable_ept && is_paging(vcpu)) { vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); ept_load_pdptrs(vcpu); } @@ -3199,9 +3253,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) __func__, vectoring_info, exit_reason); if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { - if (vcpu->arch.interrupt_window_open) { + if (vmx_interrupt_allowed(vcpu)) { vmx->soft_vnmi_blocked = 0; - vcpu->arch.nmi_window_open = 1; } else if (vmx->vnmi_blocked_time > 1000000000LL && vcpu->arch.nmi_pending) { /* @@ -3214,7 +3267,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) "state on VCPU %d after 1 s timeout\n", __func__, vcpu->vcpu_id); vmx->soft_vnmi_blocked = 0; - vmx->vcpu.arch.nmi_window_open = 1; } } @@ -3228,122 +3280,107 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) return 0; } -static void update_tpr_threshold(struct kvm_vcpu *vcpu) +static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { - int max_irr, tpr; - - if (!vm_need_tpr_shadow(vcpu->kvm)) - return; - - if (!kvm_lapic_enabled(vcpu) || - ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) { + if (irr == -1 || tpr < irr) { vmcs_write32(TPR_THRESHOLD, 0); return; } - tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4; - vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); + vmcs_write32(TPR_THRESHOLD, irr); } static void vmx_complete_interrupts(struct vcpu_vmx *vmx) { u32 exit_intr_info; - u32 idt_vectoring_info; + u32 idt_vectoring_info = vmx->idt_vectoring_info; bool unblock_nmi; u8 vector; int type; bool idtv_info_valid; - u32 error; exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); + + /* Handle machine checks before interrupts are enabled */ + if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) + || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI + && is_machine_check(exit_intr_info))) + kvm_machine_check(); + + /* We need to handle NMIs before interrupts are enabled */ + if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && + (exit_intr_info & INTR_INFO_VALID_MASK)) { + KVMTRACE_0D(NMI, &vmx->vcpu, handler); + asm("int $2"); + } + + idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; + if (cpu_has_virtual_nmis()) { unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* - * SDM 3: 25.7.1.2 + * SDM 3: 27.7.1.2 (September 2008) * Re-set bit "block by NMI" before VM entry if vmexit caused by * a guest IRET fault. + * SDM 3: 23.2.2 (September 2008) + * Bit 12 is undefined in any of the following cases: + * If the VM exit sets the valid bit in the IDT-vectoring + * information field. + * If the VM exit is due to a double fault. */ - if (unblock_nmi && vector != DF_VECTOR) + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && + vector != DF_VECTOR && !idtv_info_valid) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); } else if (unlikely(vmx->soft_vnmi_blocked)) vmx->vnmi_blocked_time += ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); - idt_vectoring_info = vmx->idt_vectoring_info; - idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; + vmx->vcpu.arch.nmi_injected = false; + kvm_clear_exception_queue(&vmx->vcpu); + kvm_clear_interrupt_queue(&vmx->vcpu); + + if (!idtv_info_valid) + return; + vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; - if (vmx->vcpu.arch.nmi_injected) { + + switch (type) { + case INTR_TYPE_NMI_INTR: + vmx->vcpu.arch.nmi_injected = true; /* - * SDM 3: 25.7.1.2 - * Clear bit "block by NMI" before VM entry if a NMI delivery - * faulted. + * SDM 3: 27.7.1.2 (September 2008) + * Clear bit "block by NMI" before VM entry if a NMI + * delivery faulted. */ - if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->vcpu.arch.nmi_injected = false; - } - kvm_clear_exception_queue(&vmx->vcpu); - if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || - type == INTR_TYPE_SOFT_EXCEPTION)) { + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + break; + case INTR_TYPE_SOFT_EXCEPTION: + vmx->vcpu.arch.event_exit_inst_len = + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + /* fall through */ + case INTR_TYPE_HARD_EXCEPTION: if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { - error = vmcs_read32(IDT_VECTORING_ERROR_CODE); - kvm_queue_exception_e(&vmx->vcpu, vector, error); + u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); + kvm_queue_exception_e(&vmx->vcpu, vector, err); } else kvm_queue_exception(&vmx->vcpu, vector); - vmx->idt_vectoring_info = 0; - } - kvm_clear_interrupt_queue(&vmx->vcpu); - if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { - kvm_queue_interrupt(&vmx->vcpu, vector); - vmx->idt_vectoring_info = 0; - } -} - -static void vmx_intr_assist(struct kvm_vcpu *vcpu) -{ - update_tpr_threshold(vcpu); - - vmx_update_window_states(vcpu); - - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_STI | - GUEST_INTR_STATE_MOV_SS); - - if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vcpu->arch.interrupt.pending) { - enable_nmi_window(vcpu); - } else if (vcpu->arch.nmi_window_open) { - vcpu->arch.nmi_pending = false; - vcpu->arch.nmi_injected = true; - } else { - enable_nmi_window(vcpu); - return; - } - } - if (vcpu->arch.nmi_injected) { - vmx_inject_nmi(vcpu); - if (vcpu->arch.nmi_pending) - enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); - return; - } - if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { - if (vcpu->arch.interrupt_window_open) - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); - else - enable_irq_window(vcpu); - } - if (vcpu->arch.interrupt.pending) { - vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); - if (kvm_cpu_has_interrupt(vcpu)) - enable_irq_window(vcpu); + break; + case INTR_TYPE_SOFT_INTR: + vmx->vcpu.arch.event_exit_inst_len = + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + /* fall through */ + case INTR_TYPE_EXT_INTR: + kvm_queue_interrupt(&vmx->vcpu, vector, + type == INTR_TYPE_SOFT_INTR); + break; + default: + break; } } @@ -3381,7 +3418,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 intr_info; /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) @@ -3505,20 +3541,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (vmx->rmode.irq.pending) fixup_rmode_irq(vmx); - vmx_update_window_states(vcpu); - asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); vmx->launched = 1; - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* We need to handle NMIs before interrupts are enabled */ - if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (intr_info & INTR_INFO_VALID_MASK)) { - KVMTRACE_0D(NMI, vcpu, handler); - asm("int $2"); - } - vmx_complete_interrupts(vmx); } @@ -3593,7 +3618,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (alloc_apic_access_page(kvm) != 0) goto free_vmcs; - if (vm_need_ept()) + if (enable_ept) if (alloc_identity_pagetable(kvm) != 0) goto free_vmcs; @@ -3631,9 +3656,32 @@ static int get_ept_level(void) return VMX_EPT_DEFAULT_GAW + 1; } -static int vmx_get_mt_mask_shift(void) +static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { - return VMX_EPT_MT_EPTE_SHIFT; + u64 ret; + + /* For VT-d and EPT combination + * 1. MMIO: always map as UC + * 2. EPT with VT-d: + * a. VT-d without snooping control feature: can't guarantee the + * result, try to trust guest. + * b. VT-d with snooping control feature: snooping control feature of + * VT-d engine can guarantee the cache correctness. Just set it + * to WB to keep consistent with host. So the same as item 3. + * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep + * consistent with host MTRR + */ + if (is_mmio) + ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; + else if (vcpu->kvm->arch.iommu_domain && + !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) + ret = kvm_get_guest_memory_type(vcpu, gfn) << + VMX_EPT_MT_EPTE_SHIFT; + else + ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) + | VMX_EPT_IGMT_BIT; + + return ret; } static struct kvm_x86_ops vmx_x86_ops = { @@ -3644,7 +3692,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .check_processor_compatibility = vmx_check_processor_compat, .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, - .cpu_has_accelerated_tpr = cpu_has_vmx_virtualize_apic_accesses, + .cpu_has_accelerated_tpr = report_flexpriority, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, @@ -3678,78 +3726,82 @@ static struct kvm_x86_ops vmx_x86_ops = { .tlb_flush = vmx_flush_tlb, .run = vmx_vcpu_run, - .handle_exit = kvm_handle_exit, + .handle_exit = vmx_handle_exit, .skip_emulated_instruction = skip_emulated_instruction, + .set_interrupt_shadow = vmx_set_interrupt_shadow, + .get_interrupt_shadow = vmx_get_interrupt_shadow, .patch_hypercall = vmx_patch_hypercall, - .get_irq = vmx_get_irq, .set_irq = vmx_inject_irq, + .set_nmi = vmx_inject_nmi, .queue_exception = vmx_queue_exception, - .exception_injected = vmx_exception_injected, - .inject_pending_irq = vmx_intr_assist, - .inject_pending_vectors = do_interrupt_requests, + .interrupt_allowed = vmx_interrupt_allowed, + .nmi_allowed = vmx_nmi_allowed, + .enable_nmi_window = enable_nmi_window, + .enable_irq_window = enable_irq_window, + .update_cr8_intercept = update_cr8_intercept, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, - .get_mt_mask_shift = vmx_get_mt_mask_shift, + .get_mt_mask = vmx_get_mt_mask, }; static int __init vmx_init(void) { - void *va; int r; - vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_io_bitmap_a) return -ENOMEM; - vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_io_bitmap_b) { r = -ENOMEM; goto out; } - vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); - if (!vmx_msr_bitmap) { + vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy) { r = -ENOMEM; goto out1; } + vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode) { + r = -ENOMEM; + goto out2; + } + /* * Allow direct access to the PC debug port (it is often used for I/O * delays, but the vmexits simply slow things down). */ - va = kmap(vmx_io_bitmap_a); - memset(va, 0xff, PAGE_SIZE); - clear_bit(0x80, va); - kunmap(vmx_io_bitmap_a); + memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); + clear_bit(0x80, vmx_io_bitmap_a); - va = kmap(vmx_io_bitmap_b); - memset(va, 0xff, PAGE_SIZE); - kunmap(vmx_io_bitmap_b); + memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); - va = kmap(vmx_msr_bitmap); - memset(va, 0xff, PAGE_SIZE); - kunmap(vmx_msr_bitmap); + memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); + memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); if (r) - goto out2; + goto out3; - vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); - vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); - vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); - vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); - vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); + vmx_disable_intercept_for_msr(MSR_FS_BASE, false); + vmx_disable_intercept_for_msr(MSR_GS_BASE, false); + vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); + vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); - if (vm_need_ept()) { + if (enable_ept) { bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, - VMX_EPT_EXECUTABLE_MASK, - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); } else kvm_disable_tdp(); @@ -3761,20 +3813,23 @@ static int __init vmx_init(void) return 0; +out3: + free_page((unsigned long)vmx_msr_bitmap_longmode); out2: - __free_page(vmx_msr_bitmap); + free_page((unsigned long)vmx_msr_bitmap_legacy); out1: - __free_page(vmx_io_bitmap_b); + free_page((unsigned long)vmx_io_bitmap_b); out: - __free_page(vmx_io_bitmap_a); + free_page((unsigned long)vmx_io_bitmap_a); return r; } static void __exit vmx_exit(void) { - __free_page(vmx_msr_bitmap); - __free_page(vmx_io_bitmap_b); - __free_page(vmx_io_bitmap_a); + free_page((unsigned long)vmx_msr_bitmap_legacy); + free_page((unsigned long)vmx_msr_bitmap_longmode); + free_page((unsigned long)vmx_io_bitmap_b); + free_page((unsigned long)vmx_io_bitmap_a); kvm_exit(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3944e917e79..249540f9851 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -91,7 +91,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { "hypercalls", VCPU_STAT(hypercalls) }, { "request_irq", VCPU_STAT(request_irq_exits) }, - { "request_nmi", VCPU_STAT(request_nmi_exits) }, { "irq_exits", VCPU_STAT(irq_exits) }, { "host_state_reload", VCPU_STAT(host_state_reload) }, { "efer_reload", VCPU_STAT(efer_reload) }, @@ -108,7 +107,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_recycled", VM_STAT(mmu_recycled) }, { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, - { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages) }, { NULL } @@ -234,7 +232,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) goto out; } for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { - if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { + if (is_present_pte(pdpte[i]) && + (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { ret = 0; goto out; } @@ -321,7 +320,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) kvm_x86_ops->set_cr0(vcpu, cr0); vcpu->arch.cr0 = cr0; - kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); return; } @@ -370,7 +368,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_x86_ops->set_cr4(vcpu, cr4); vcpu->arch.cr4 = cr4; vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; - kvm_mmu_sync_global(vcpu); kvm_mmu_reset_context(vcpu); } EXPORT_SYMBOL_GPL(kvm_set_cr4); @@ -523,6 +520,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) efer |= vcpu->arch.shadow_efer & EFER_LMA; vcpu->arch.shadow_efer = efer; + + vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; + kvm_mmu_reset_context(vcpu); } void kvm_enable_efer_bits(u64 mask) @@ -630,14 +630,17 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) unsigned long flags; struct kvm_vcpu_arch *vcpu = &v->arch; void *shared_kaddr; + unsigned long this_tsc_khz; if ((!vcpu->time_page)) return; - if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { - kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); - vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); + this_tsc_khz = get_cpu_var(cpu_tsc_khz); + if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { + kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = this_tsc_khz; } + put_cpu_var(cpu_tsc_khz); /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); @@ -893,6 +896,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: case MSR_VM_HSAVE_PA: + case MSR_P6_EVNTSEL0: + case MSR_P6_EVNTSEL1: data = 0; break; case MSR_MTRRcap: @@ -1024,6 +1029,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_SYNC_MMU: case KVM_CAP_REINJECT_CONTROL: case KVM_CAP_IRQ_INJECT_STATUS: + case KVM_CAP_ASSIGN_DEV_IRQ: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -1241,41 +1247,53 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->flags = 0; } +#define F(x) bit(X86_FEATURE_##x) + static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, u32 index, int *nent, int maxnent) { - const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | - bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | - bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | - bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | - bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | - bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | - bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | - bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | - bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | - bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); - const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) | - bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | - bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | - bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | - bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | - bit(X86_FEATURE_PGE) | - bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | - bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) | - bit(X86_FEATURE_SYSCALL) | - (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) | + unsigned f_nx = is_efer_nx() ? F(NX) : 0; #ifdef CONFIG_X86_64 - bit(X86_FEATURE_LM) | + unsigned f_lm = F(LM); +#else + unsigned f_lm = 0; #endif - bit(X86_FEATURE_FXSR_OPT) | - bit(X86_FEATURE_MMXEXT) | - bit(X86_FEATURE_3DNOWEXT) | - bit(X86_FEATURE_3DNOW); - const u32 kvm_supported_word3_x86_features = - bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); + + /* cpuid 1.edx */ + const u32 kvm_supported_word0_x86_features = + F(FPU) | F(VME) | F(DE) | F(PSE) | + F(TSC) | F(MSR) | F(PAE) | F(MCE) | + F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | + F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | + F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | + 0 /* Reserved, DS, ACPI */ | F(MMX) | + F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | + 0 /* HTT, TM, Reserved, PBE */; + /* cpuid 0x80000001.edx */ + const u32 kvm_supported_word1_x86_features = + F(FPU) | F(VME) | F(DE) | F(PSE) | + F(TSC) | F(MSR) | F(PAE) | F(MCE) | + F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | + F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | + F(PAT) | F(PSE36) | 0 /* Reserved */ | + f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | + F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ | + 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); + /* cpuid 1.ecx */ + const u32 kvm_supported_word4_x86_features = + F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | + 0 /* DS-CPL, VMX, SMX, EST */ | + 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | + 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | + 0 /* Reserved, DCA */ | F(XMM4_1) | + F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) | + 0 /* Reserved, XSAVE, OSXSAVE */; + /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = - bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | - bit(X86_FEATURE_SVM); + F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | + F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | + F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | + 0 /* SKINIT */ | 0 /* WDT */; /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -1288,7 +1306,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, break; case 1: entry->edx &= kvm_supported_word0_x86_features; - entry->ecx &= kvm_supported_word3_x86_features; + entry->ecx &= kvm_supported_word4_x86_features; break; /* function 2 entries are STATEFUL. That is, repeated cpuid commands * may return different values. This forces us to get_cpu() before @@ -1350,6 +1368,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, put_cpu(); } +#undef F + static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { @@ -1421,8 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, return -ENXIO; vcpu_load(vcpu); - set_bit(irq->irq, vcpu->arch.irq_pending); - set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary); + kvm_queue_interrupt(vcpu, irq->irq, false); vcpu_put(vcpu); @@ -1584,8 +1603,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EINVAL; } out: - if (lapic) - kfree(lapic); + kfree(lapic); return r; } @@ -1606,10 +1624,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return -EINVAL; down_write(&kvm->slots_lock); + spin_lock(&kvm->mmu_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; + spin_unlock(&kvm->mmu_lock); up_write(&kvm->slots_lock); return 0; } @@ -1785,7 +1805,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { + spin_lock(&kvm->mmu_lock); kvm_mmu_slot_remove_write_access(kvm, log->slot); + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); memslot = &kvm->memslots[log->slot]; n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; @@ -2360,7 +2382,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, u16 error_code, int emulation_type) { - int r; + int r, shadow_mask; struct decode_cache *c; kvm_clear_exception_queue(vcpu); @@ -2408,7 +2430,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, } } + if (emulation_type & EMULTYPE_SKIP) { + kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); + return EMULATE_DONE; + } + r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); + shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; + + if (r == 0) + kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); if (vcpu->arch.pio.string) return EMULATE_DO_MMIO; @@ -2761,7 +2792,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_nonpresent_ptes(0ull, 0ull); kvm_mmu_set_base_ptes(PT_PRESENT_MASK); kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, - PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); + PT_DIRTY_MASK, PT64_NX_MASK, 0); for_each_possible_cpu(cpu) per_cpu(cpu_tsc_khz, cpu) = tsc_khz; @@ -3012,6 +3043,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, return best; } +int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best) + return best->eax & 0xff; + return 36; +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 function, index; @@ -3048,10 +3089,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { - return (!vcpu->arch.irq_summary && + return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && kvm_run->request_interrupt_window && - vcpu->arch.interrupt_window_open && - (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); + kvm_arch_interrupt_allowed(vcpu)); } static void post_kvm_run_save(struct kvm_vcpu *vcpu, @@ -3064,8 +3104,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, kvm_run->ready_for_interrupt_injection = 1; else kvm_run->ready_for_interrupt_injection = - (vcpu->arch.interrupt_window_open && - vcpu->arch.irq_summary == 0); + kvm_arch_interrupt_allowed(vcpu) && + !kvm_cpu_has_interrupt(vcpu) && + !kvm_event_needs_reinjection(vcpu); } static void vapic_enter(struct kvm_vcpu *vcpu) @@ -3094,9 +3135,63 @@ static void vapic_exit(struct kvm_vcpu *vcpu) up_read(&vcpu->kvm->slots_lock); } +static void update_cr8_intercept(struct kvm_vcpu *vcpu) +{ + int max_irr, tpr; + + if (!kvm_x86_ops->update_cr8_intercept) + return; + + if (!vcpu->arch.apic->vapic_addr) + max_irr = kvm_lapic_find_highest_irr(vcpu); + else + max_irr = -1; + + if (max_irr != -1) + max_irr >>= 4; + + tpr = kvm_lapic_get_cr8(vcpu); + + kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); +} + +static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + kvm_x86_ops->set_interrupt_shadow(vcpu, 0); + + /* try to reinject previous events if any */ + if (vcpu->arch.nmi_injected) { + kvm_x86_ops->set_nmi(vcpu); + return; + } + + if (vcpu->arch.interrupt.pending) { + kvm_x86_ops->set_irq(vcpu); + return; + } + + /* try to inject new event if pending */ + if (vcpu->arch.nmi_pending) { + if (kvm_x86_ops->nmi_allowed(vcpu)) { + vcpu->arch.nmi_pending = false; + vcpu->arch.nmi_injected = true; + kvm_x86_ops->set_nmi(vcpu); + } + } else if (kvm_cpu_has_interrupt(vcpu)) { + if (kvm_x86_ops->interrupt_allowed(vcpu)) { + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), + false); + kvm_x86_ops->set_irq(vcpu); + } + } +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; + bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && + kvm_run->request_interrupt_window; if (vcpu->requests) if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) @@ -3128,9 +3223,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } } - clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); - kvm_inject_pending_timer_irqs(vcpu); - preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); @@ -3138,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) local_irq_disable(); + clear_bit(KVM_REQ_KICK, &vcpu->requests); + smp_mb__after_clear_bit(); + if (vcpu->requests || need_resched() || signal_pending(current)) { local_irq_enable(); preempt_enable(); @@ -3145,21 +3240,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) goto out; } - vcpu->guest_mode = 1; - /* - * Make sure that guest_mode assignment won't happen after - * testing the pending IRQ vector bitmap. - */ - smp_wmb(); - if (vcpu->arch.exception.pending) __queue_exception(vcpu); - else if (irqchip_in_kernel(vcpu->kvm)) - kvm_x86_ops->inject_pending_irq(vcpu); else - kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); + inject_pending_irq(vcpu, kvm_run); - kvm_lapic_sync_to_vapic(vcpu); + /* enable NMI/IRQ window open exits if needed */ + if (vcpu->arch.nmi_pending) + kvm_x86_ops->enable_nmi_window(vcpu); + else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + kvm_x86_ops->enable_irq_window(vcpu); + + if (kvm_lapic_enabled(vcpu)) { + update_cr8_intercept(vcpu); + kvm_lapic_sync_to_vapic(vcpu); + } up_read(&vcpu->kvm->slots_lock); @@ -3193,7 +3288,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) set_debugreg(vcpu->arch.host_dr6, 6); set_debugreg(vcpu->arch.host_dr7, 7); - vcpu->guest_mode = 0; + set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); ++vcpu->stat.exits; @@ -3220,8 +3315,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) profile_hit(KVM_PROFILING, (void *)rip); } - if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) - vcpu->arch.exception.pending = false; kvm_lapic_sync_from_vapic(vcpu); @@ -3230,6 +3323,7 @@ out: return r; } + static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int r; @@ -3256,29 +3350,42 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) kvm_vcpu_block(vcpu); down_read(&vcpu->kvm->slots_lock); if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) - if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) + { + switch(vcpu->arch.mp_state) { + case KVM_MP_STATE_HALTED: vcpu->arch.mp_state = - KVM_MP_STATE_RUNNABLE; - if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) - r = -EINTR; + KVM_MP_STATE_RUNNABLE; + case KVM_MP_STATE_RUNNABLE: + break; + case KVM_MP_STATE_SIPI_RECEIVED: + default: + r = -EINTR; + break; + } + } } - if (r > 0) { - if (dm_request_for_irq_injection(vcpu, kvm_run)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.request_irq_exits; - } - if (signal_pending(current)) { - r = -EINTR; - kvm_run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - } - if (need_resched()) { - up_read(&vcpu->kvm->slots_lock); - kvm_resched(vcpu); - down_read(&vcpu->kvm->slots_lock); - } + if (r <= 0) + break; + + clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); + if (kvm_cpu_has_pending_timer(vcpu)) + kvm_inject_pending_timer_irqs(vcpu); + + if (dm_request_for_irq_injection(vcpu, kvm_run)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.request_irq_exits; + } + if (signal_pending(current)) { + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + ++vcpu->stat.signal_exits; + } + if (need_resched()) { + up_read(&vcpu->kvm->slots_lock); + kvm_resched(vcpu); + down_read(&vcpu->kvm->slots_lock); } } @@ -3442,7 +3549,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct descriptor_table dt; - int pending_vec; vcpu_load(vcpu); @@ -3472,16 +3578,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, sregs->efer = vcpu->arch.shadow_efer; sregs->apic_base = kvm_get_apic_base(vcpu); - if (irqchip_in_kernel(vcpu->kvm)) { - memset(sregs->interrupt_bitmap, 0, - sizeof sregs->interrupt_bitmap); - pending_vec = kvm_x86_ops->get_irq(vcpu); - if (pending_vec >= 0) - set_bit(pending_vec, - (unsigned long *)sregs->interrupt_bitmap); - } else - memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending, - sizeof sregs->interrupt_bitmap); + memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); + + if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) + set_bit(vcpu->arch.interrupt.nr, + (unsigned long *)sregs->interrupt_bitmap); vcpu_put(vcpu); @@ -3688,7 +3789,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); - tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); } static int load_state_from_tss32(struct kvm_vcpu *vcpu, @@ -3785,8 +3885,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, } static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, - u32 old_tss_base, - struct desc_struct *nseg_desc) + u16 old_tss_sel, u32 old_tss_base, + struct desc_struct *nseg_desc) { struct tss_segment_16 tss_segment_16; int ret = 0; @@ -3805,6 +3905,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, &tss_segment_16, sizeof tss_segment_16)) goto out; + if (old_tss_sel != 0xffff) { + tss_segment_16.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_16.prev_task_link, + sizeof tss_segment_16.prev_task_link)) + goto out; + } + if (load_state_from_tss16(vcpu, &tss_segment_16)) goto out; @@ -3814,7 +3924,7 @@ out: } static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, - u32 old_tss_base, + u16 old_tss_sel, u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_32 tss_segment_32; @@ -3834,6 +3944,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, &tss_segment_32, sizeof tss_segment_32)) goto out; + if (old_tss_sel != 0xffff) { + tss_segment_32.prev_task_link = old_tss_sel; + + if (kvm_write_guest(vcpu->kvm, + get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_32.prev_task_link, + sizeof tss_segment_32.prev_task_link)) + goto out; + } + if (load_state_from_tss32(vcpu, &tss_segment_32)) goto out; @@ -3887,14 +4007,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); } - kvm_x86_ops->skip_emulated_instruction(vcpu); + /* set back link to prev task only if NT bit is set in eflags + note that old_tss_sel is not used afetr this point */ + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) + old_tss_sel = 0xffff; + + /* set back link to prev task only if NT bit is set in eflags + note that old_tss_sel is not used afetr this point */ + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) + old_tss_sel = 0xffff; if (nseg_desc.type & 8) - ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, - &nseg_desc); + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); else - ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, - &nseg_desc); + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, + old_tss_base, &nseg_desc); if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { u32 eflags = kvm_x86_ops->get_rflags(vcpu); @@ -3920,7 +4048,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { int mmu_reset_needed = 0; - int i, pending_vec, max_bits; + int pending_vec, max_bits; struct descriptor_table dt; vcpu_load(vcpu); @@ -3934,7 +4062,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = sregs->cr2; mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; - vcpu->arch.cr3 = sregs->cr3; + + down_read(&vcpu->kvm->slots_lock); + if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT)) + vcpu->arch.cr3 = sregs->cr3; + else + set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); + up_read(&vcpu->kvm->slots_lock); kvm_set_cr8(vcpu, sregs->cr8); @@ -3956,25 +4090,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, if (mmu_reset_needed) kvm_mmu_reset_context(vcpu); - if (!irqchip_in_kernel(vcpu->kvm)) { - memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap, - sizeof vcpu->arch.irq_pending); - vcpu->arch.irq_summary = 0; - for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i) - if (vcpu->arch.irq_pending[i]) - __set_bit(i, &vcpu->arch.irq_summary); - } else { - max_bits = (sizeof sregs->interrupt_bitmap) << 3; - pending_vec = find_first_bit( - (const unsigned long *)sregs->interrupt_bitmap, - max_bits); - /* Only pending external irq is handled here */ - if (pending_vec < max_bits) { - kvm_x86_ops->set_irq(vcpu, pending_vec); - pr_debug("Set back pending irq %d\n", - pending_vec); - } - kvm_pic_clear_isr_ack(vcpu->kvm); + max_bits = (sizeof sregs->interrupt_bitmap) << 3; + pending_vec = find_first_bit( + (const unsigned long *)sregs->interrupt_bitmap, max_bits); + if (pending_vec < max_bits) { + kvm_queue_interrupt(vcpu, pending_vec, false); + pr_debug("Set back pending irq %d\n", pending_vec); + if (irqchip_in_kernel(vcpu->kvm)) + kvm_pic_clear_isr_ack(vcpu->kvm); } kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); @@ -4308,7 +4431,6 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&kvm->arch.oos_global_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ @@ -4411,12 +4533,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, } } + spin_lock(&kvm->mmu_lock); if (!kvm->arch.n_requested_mmu_pages) { unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); } kvm_mmu_slot_remove_write_access(kvm, mem->slot); + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); return 0; @@ -4425,6 +4549,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, void kvm_arch_flush_shadow(struct kvm *kvm) { kvm_mmu_zap_all(kvm); + kvm_reload_remote_mmus(kvm); } int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) @@ -4434,28 +4559,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) || vcpu->arch.nmi_pending; } -static void vcpu_kick_intr(void *info) -{ -#ifdef DEBUG - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; - printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); -#endif -} - void kvm_vcpu_kick(struct kvm_vcpu *vcpu) { - int ipi_pcpu = vcpu->cpu; - int cpu = get_cpu(); + int me; + int cpu = vcpu->cpu; if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); ++vcpu->stat.halt_wakeup; } - /* - * We may be called synchronously with irqs disabled in guest mode, - * So need not to call smp_call_function_single() in that case. - */ - if (vcpu->guest_mode && vcpu->cpu != cpu) - smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); + + me = get_cpu(); + if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) + if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) + smp_send_reschedule(cpu); put_cpu(); } + +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->interrupt_allowed(vcpu); +} diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 6a4be78a738..4c8e10af78e 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) vcpu->arch.exception.pending = false; } -static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) +static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, + bool soft) { vcpu->arch.interrupt.pending = true; + vcpu->arch.interrupt.soft = soft; vcpu->arch.interrupt.nr = vector; } @@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) vcpu->arch.interrupt.pending = false; } +static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || + vcpu->arch.nmi_injected; +} + +static inline bool kvm_exception_is_soft(unsigned int nr) +{ + return (nr == BP_VECTOR) || (nr == OF_VECTOR); +} #endif diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ca91749d208..c1b6c232e02 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c @@ -59,13 +59,14 @@ #define SrcImm (5<<4) /* Immediate operand. */ #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ #define SrcOne (7<<4) /* Implied '1' */ -#define SrcMask (7<<4) +#define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ +#define SrcMask (0xf<<4) /* Generic ModRM decode. */ -#define ModRM (1<<7) +#define ModRM (1<<8) /* Destination is only written; never read. */ -#define Mov (1<<8) -#define BitOp (1<<9) -#define MemAbs (1<<10) /* Memory operand is absolute displacement */ +#define Mov (1<<9) +#define BitOp (1<<10) +#define MemAbs (1<<11) /* Memory operand is absolute displacement */ #define String (1<<12) /* String instruction (rep capable) */ #define Stack (1<<13) /* Stack instruction (push/pop) */ #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ @@ -76,6 +77,7 @@ #define Src2CL (1<<29) #define Src2ImmByte (2<<29) #define Src2One (3<<29) +#define Src2Imm16 (4<<29) #define Src2Mask (7<<29) enum { @@ -135,11 +137,11 @@ static u32 opcode_table[256] = { SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ /* 0x70 - 0x77 */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, /* 0x78 - 0x7F */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, + SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, /* 0x80 - 0x87 */ Group | Group1_80, Group | Group1_81, Group | Group1_82, Group | Group1_83, @@ -153,7 +155,8 @@ static u32 opcode_table[256] = { /* 0x90 - 0x97 */ DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, /* 0x98 - 0x9F */ - 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, + 0, 0, SrcImm | Src2Imm16, 0, + ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, @@ -178,7 +181,8 @@ static u32 opcode_table[256] = { 0, ImplicitOps | Stack, 0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, /* 0xC8 - 0xCF */ - 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, + 0, 0, 0, ImplicitOps | Stack, + ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, /* 0xD0 - 0xD7 */ ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, @@ -187,11 +191,11 @@ static u32 opcode_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xE7 */ 0, 0, 0, 0, - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, + ByteOp | SrcImmUByte, SrcImmUByte, + ByteOp | SrcImmUByte, SrcImmUByte, /* 0xE8 - 0xEF */ - ImplicitOps | Stack, SrcImm | ImplicitOps, - ImplicitOps, SrcImmByte | ImplicitOps, + SrcImm | Stack, SrcImm | ImplicitOps, + SrcImm | Src2Imm16, SrcImmByte | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* 0xF0 - 0xF7 */ @@ -230,10 +234,8 @@ static u32 twobyte_table[256] = { /* 0x70 - 0x7F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x80 - 0x8F */ - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, - ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, + SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, + SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, /* 0x90 - 0x9F */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0xA0 - 0xA7 */ @@ -1044,10 +1046,14 @@ done_prefixes: } break; case SrcImmByte: + case SrcImmUByte: c->src.type = OP_IMM; c->src.ptr = (unsigned long *)c->eip; c->src.bytes = 1; - c->src.val = insn_fetch(s8, 1, c->eip); + if ((c->d & SrcMask) == SrcImmByte) + c->src.val = insn_fetch(s8, 1, c->eip); + else + c->src.val = insn_fetch(u8, 1, c->eip); break; case SrcOne: c->src.bytes = 1; @@ -1072,6 +1078,12 @@ done_prefixes: c->src2.bytes = 1; c->src2.val = insn_fetch(u8, 1, c->eip); break; + case Src2Imm16: + c->src2.type = OP_IMM; + c->src2.ptr = (unsigned long *)c->eip; + c->src2.bytes = 2; + c->src2.val = insn_fetch(u16, 2, c->eip); + break; case Src2One: c->src2.bytes = 1; c->src2.val = 1; @@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, return 0; } +void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) +{ + u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); + /* + * an sti; sti; sequence only disable interrupts for the first + * instruction. So, if the last instruction, be it emulated or + * not, left the system with the INT_STI flag enabled, it + * means that the last instruction is an sti. We should not + * leave the flag on in this case. The same goes for mov ss + */ + if (!(int_shadow & mask)) + ctxt->interruptibility = mask; +} + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { @@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) int io_dir_in; int rc = 0; + ctxt->interruptibility = 0; + /* Shadow copy of register state. Committed on successful emulation. * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't * modify them. @@ -1531,13 +1559,10 @@ special_insn: return -1; } return 0; - case 0x70 ... 0x7f: /* jcc (short) */ { - int rel = insn_fetch(s8, 1, c->eip); - + case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(c->b, ctxt->eflags)) - jmp_rel(c, rel); + jmp_rel(c, c->src.val); break; - } case 0x80 ... 0x83: /* Grp1 */ switch (c->modrm_reg) { case 0: @@ -1609,6 +1634,9 @@ special_insn: int err; sel = c->src.val; + if (c->modrm_reg == VCPU_SREG_SS) + toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); + if (c->modrm_reg <= 5) { type_bits = (c->modrm_reg == 1) ? 9 : 1; err = kvm_load_segment_descriptor(ctxt->vcpu, sel, @@ -1769,59 +1797,32 @@ special_insn: break; case 0xe4: /* inb */ case 0xe5: /* in */ - port = insn_fetch(u8, 1, c->eip); + port = c->src.val; io_dir_in = 1; goto do_io; case 0xe6: /* outb */ case 0xe7: /* out */ - port = insn_fetch(u8, 1, c->eip); + port = c->src.val; io_dir_in = 0; goto do_io; case 0xe8: /* call (near) */ { - long int rel; - switch (c->op_bytes) { - case 2: - rel = insn_fetch(s16, 2, c->eip); - break; - case 4: - rel = insn_fetch(s32, 4, c->eip); - break; - default: - DPRINTF("Call: Invalid op_bytes\n"); - goto cannot_emulate; - } + long int rel = c->src.val; c->src.val = (unsigned long) c->eip; jmp_rel(c, rel); - c->op_bytes = c->ad_bytes; emulate_push(ctxt); break; } case 0xe9: /* jmp rel */ goto jmp; - case 0xea: /* jmp far */ { - uint32_t eip; - uint16_t sel; - - switch (c->op_bytes) { - case 2: - eip = insn_fetch(u16, 2, c->eip); - break; - case 4: - eip = insn_fetch(u32, 4, c->eip); - break; - default: - DPRINTF("jmp far: Invalid op_bytes\n"); - goto cannot_emulate; - } - sel = insn_fetch(u16, 2, c->eip); - if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { + case 0xea: /* jmp far */ + if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, + VCPU_SREG_CS) < 0) { DPRINTF("jmp far: Failed to load CS descriptor\n"); goto cannot_emulate; } - c->eip = eip; + c->eip = c->src.val; break; - } case 0xeb: jmp: /* jmp rel short */ jmp_rel(c, c->src.val); @@ -1865,6 +1866,7 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xfb: /* sti */ + toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); ctxt->eflags |= X86_EFLAGS_IF; c->dst.type = OP_NONE; /* Disable writeback. */ break; @@ -2039,28 +2041,11 @@ twobyte_insn: if (!test_cc(c->b, ctxt->eflags)) c->dst.type = OP_NONE; /* no writeback */ break; - case 0x80 ... 0x8f: /* jnz rel, etc*/ { - long int rel; - - switch (c->op_bytes) { - case 2: - rel = insn_fetch(s16, 2, c->eip); - break; - case 4: - rel = insn_fetch(s32, 4, c->eip); - break; - case 8: - rel = insn_fetch(s64, 8, c->eip); - break; - default: - DPRINTF("jnz: Invalid op_bytes\n"); - goto cannot_emulate; - } + case 0x80 ... 0x8f: /* jnz rel, etc*/ if (test_cc(c->b, ctxt->eflags)) - jmp_rel(c, rel); + jmp_rel(c, c->src.val); c->dst.type = OP_NONE; break; - } case 0xa3: bt: /* bt */ c->dst.type = OP_NONE; diff --git a/block/blk-core.c b/block/blk-core.c index 9475bf99b89..648f15cb41f 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1732,10 +1732,14 @@ static int __end_that_request_first(struct request *req, int error, trace_block_rq_complete(req->q, req); /* - * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual - * sense key with us all the way through + * For fs requests, rq is just carrier of independent bio's + * and each partial completion should be handled separately. + * Reset per-request error on each partial completion. + * + * TODO: tj: This is too subtle. It would be better to let + * low level drivers do what they see fit. */ - if (!blk_pc_request(req)) + if (blk_fs_request(req)) req->errors = 0; if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index af761dc434f..4895f0e0532 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty) /* FIXME: why is this needed. Note don't use ldisc_ref here as the open path is before the ldisc is referencable */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); return 0; @@ -463,7 +463,6 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file * file, clear_bit(HCI_UART_PROTO_SET, &hu->flags); return err; } - tty->low_latency = 1; } else return -EBUSY; break; diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 735bbe2be51..02ecfd5fa61 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -97,6 +97,19 @@ config DEVKMEM kind of kernel debugging operations. When in doubt, say "N". +config BFIN_JTAG_COMM + tristate "Blackfin JTAG Communication" + depends on BLACKFIN + help + Add support for emulating a TTY device over the Blackfin JTAG. + + To compile this driver as a module, choose M here: the + module will be called bfin_jtag_comm. + +config BFIN_JTAG_COMM_CONSOLE + bool "Console on Blackfin JTAG" + depends on BFIN_JTAG_COMM=y + config SERIAL_NONSTANDARD bool "Non-standard serial port support" depends on HAS_IOMEM diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 9caf5b5ad1c..189efcff08c 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_LEGACY_PTYS) += pty.o obj-$(CONFIG_UNIX98_PTYS) += pty.o obj-y += misc.o obj-$(CONFIG_VT) += vt_ioctl.o vc_screen.o selection.o keyboard.o +obj-$(CONFIG_BFIN_JTAG_COMM) += bfin_jtag_comm.o obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o obj-$(CONFIG_HW_CONSOLE) += vt.o defkeymap.o obj-$(CONFIG_AUDIT) += tty_audit.o diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c new file mode 100644 index 00000000000..44c113d5604 --- /dev/null +++ b/drivers/char/bfin_jtag_comm.c @@ -0,0 +1,365 @@ +/* + * TTY over Blackfin JTAG Communication + * + * Copyright 2008-2009 Analog Devices Inc. + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/circ_buf.h> +#include <linux/console.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/tty.h> +#include <linux/tty_driver.h> +#include <linux/tty_flip.h> +#include <asm/atomic.h> + +/* See the Debug/Emulation chapter in the HRM */ +#define EMUDOF 0x00000001 /* EMUDAT_OUT full & valid */ +#define EMUDIF 0x00000002 /* EMUDAT_IN full & valid */ +#define EMUDOOVF 0x00000004 /* EMUDAT_OUT overflow */ +#define EMUDIOVF 0x00000008 /* EMUDAT_IN overflow */ + +#define DRV_NAME "bfin-jtag-comm" +#define DEV_NAME "ttyBFJC" + +#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); }) +#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args) + +static inline uint32_t bfin_write_emudat(uint32_t emudat) +{ + __asm__ __volatile__("emudat = %0;" : : "d"(emudat)); + return emudat; +} + +static inline uint32_t bfin_read_emudat(void) +{ + uint32_t emudat; + __asm__ __volatile__("%0 = emudat;" : "=d"(emudat)); + return emudat; +} + +static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d) +{ + return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24)); +} + +#define CIRC_SIZE 2048 /* see comment in tty_io.c:do_tty_write() */ +#define CIRC_MASK (CIRC_SIZE - 1) +#define circ_empty(circ) ((circ)->head == (circ)->tail) +#define circ_free(circ) CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE) +#define circ_cnt(circ) CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE) +#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK]) + +static struct tty_driver *bfin_jc_driver; +static struct task_struct *bfin_jc_kthread; +static struct tty_struct * volatile bfin_jc_tty; +static unsigned long bfin_jc_count; +static DEFINE_MUTEX(bfin_jc_tty_mutex); +static volatile struct circ_buf bfin_jc_write_buf; + +static int +bfin_jc_emudat_manager(void *arg) +{ + uint32_t inbound_len = 0, outbound_len = 0; + + while (!kthread_should_stop()) { + /* no one left to give data to, so sleep */ + if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) { + debug("waiting for readers\n"); + __set_current_state(TASK_UNINTERRUPTIBLE); + schedule(); + __set_current_state(TASK_RUNNING); + } + + /* no data available, so just chill */ + if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) { + debug("waiting for data (in_len = %i) (circ: %i %i)\n", + inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head); + if (inbound_len) + schedule(); + else + schedule_timeout_interruptible(HZ); + continue; + } + + /* if incoming data is ready, eat it */ + if (bfin_read_DBGSTAT() & EMUDIF) { + struct tty_struct *tty; + mutex_lock(&bfin_jc_tty_mutex); + tty = (struct tty_struct *)bfin_jc_tty; + if (tty != NULL) { + uint32_t emudat = bfin_read_emudat(); + if (inbound_len == 0) { + debug("incoming length: 0x%08x\n", emudat); + inbound_len = emudat; + } else { + size_t num_chars = (4 <= inbound_len ? 4 : inbound_len); + debug(" incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars); + inbound_len -= num_chars; + tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars); + tty_flip_buffer_push(tty); + } + } + mutex_unlock(&bfin_jc_tty_mutex); + } + + /* if outgoing data is ready, post it */ + if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) { + if (outbound_len == 0) { + outbound_len = circ_cnt(&bfin_jc_write_buf); + bfin_write_emudat(outbound_len); + debug("outgoing length: 0x%08x\n", outbound_len); + } else { + struct tty_struct *tty; + int tail = bfin_jc_write_buf.tail; + size_t ate = (4 <= outbound_len ? 4 : outbound_len); + uint32_t emudat = + bfin_write_emudat_chars( + circ_byte(&bfin_jc_write_buf, tail + 0), + circ_byte(&bfin_jc_write_buf, tail + 1), + circ_byte(&bfin_jc_write_buf, tail + 2), + circ_byte(&bfin_jc_write_buf, tail + 3) + ); + bfin_jc_write_buf.tail += ate; + outbound_len -= ate; + mutex_lock(&bfin_jc_tty_mutex); + tty = (struct tty_struct *)bfin_jc_tty; + if (tty) + tty_wakeup(tty); + mutex_unlock(&bfin_jc_tty_mutex); + debug(" outgoing data: 0x%08x (pushing %zu)\n", emudat, ate); + } + } + } + + __set_current_state(TASK_RUNNING); + return 0; +} + +static int +bfin_jc_open(struct tty_struct *tty, struct file *filp) +{ + mutex_lock(&bfin_jc_tty_mutex); + debug("open %lu\n", bfin_jc_count); + ++bfin_jc_count; + bfin_jc_tty = tty; + wake_up_process(bfin_jc_kthread); + mutex_unlock(&bfin_jc_tty_mutex); + return 0; +} + +static void +bfin_jc_close(struct tty_struct *tty, struct file *filp) +{ + mutex_lock(&bfin_jc_tty_mutex); + debug("close %lu\n", bfin_jc_count); + if (--bfin_jc_count == 0) + bfin_jc_tty = NULL; + wake_up_process(bfin_jc_kthread); + mutex_unlock(&bfin_jc_tty_mutex); +} + +/* XXX: we dont handle the put_char() case where we must handle count = 1 */ +static int +bfin_jc_circ_write(const unsigned char *buf, int count) +{ + int i; + count = min(count, circ_free(&bfin_jc_write_buf)); + debug("going to write chunk of %i bytes\n", count); + for (i = 0; i < count; ++i) + circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i]; + bfin_jc_write_buf.head += i; + return i; +} + +#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE +# define acquire_console_sem() +# define release_console_sem() +#endif +static int +bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count) +{ + int i; + acquire_console_sem(); + i = bfin_jc_circ_write(buf, count); + release_console_sem(); + wake_up_process(bfin_jc_kthread); + return i; +} + +static void +bfin_jc_flush_chars(struct tty_struct *tty) +{ + wake_up_process(bfin_jc_kthread); +} + +static int +bfin_jc_write_room(struct tty_struct *tty) +{ + return circ_free(&bfin_jc_write_buf); +} + +static int +bfin_jc_chars_in_buffer(struct tty_struct *tty) +{ + return circ_cnt(&bfin_jc_write_buf); +} + +static void +bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout) +{ + unsigned long expire = jiffies + timeout; + while (!circ_empty(&bfin_jc_write_buf)) { + if (signal_pending(current)) + break; + if (time_after(jiffies, expire)) + break; + } +} + +static struct tty_operations bfin_jc_ops = { + .open = bfin_jc_open, + .close = bfin_jc_close, + .write = bfin_jc_write, + /*.put_char = bfin_jc_put_char,*/ + .flush_chars = bfin_jc_flush_chars, + .write_room = bfin_jc_write_room, + .chars_in_buffer = bfin_jc_chars_in_buffer, + .wait_until_sent = bfin_jc_wait_until_sent, +}; + +static int __init bfin_jc_init(void) +{ + int ret; + + bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME); + if (IS_ERR(bfin_jc_kthread)) + return PTR_ERR(bfin_jc_kthread); + + ret = -ENOMEM; + + bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0; + bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL); + if (!bfin_jc_write_buf.buf) + goto err; + + bfin_jc_driver = alloc_tty_driver(1); + if (!bfin_jc_driver) + goto err; + + bfin_jc_driver->owner = THIS_MODULE; + bfin_jc_driver->driver_name = DRV_NAME; + bfin_jc_driver->name = DEV_NAME; + bfin_jc_driver->type = TTY_DRIVER_TYPE_SERIAL; + bfin_jc_driver->subtype = SERIAL_TYPE_NORMAL; + bfin_jc_driver->init_termios = tty_std_termios; + tty_set_operations(bfin_jc_driver, &bfin_jc_ops); + + ret = tty_register_driver(bfin_jc_driver); + if (ret) + goto err; + + pr_init(KERN_INFO DRV_NAME ": initialized\n"); + + return 0; + + err: + put_tty_driver(bfin_jc_driver); + kfree(bfin_jc_write_buf.buf); + kthread_stop(bfin_jc_kthread); + return ret; +} +module_init(bfin_jc_init); + +static void __exit bfin_jc_exit(void) +{ + kthread_stop(bfin_jc_kthread); + kfree(bfin_jc_write_buf.buf); + tty_unregister_driver(bfin_jc_driver); + put_tty_driver(bfin_jc_driver); +} +module_exit(bfin_jc_exit); + +#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK) +static void +bfin_jc_straight_buffer_write(const char *buf, unsigned count) +{ + unsigned ate = 0; + while (bfin_read_DBGSTAT() & EMUDOF) + continue; + bfin_write_emudat(count); + while (ate < count) { + while (bfin_read_DBGSTAT() & EMUDOF) + continue; + bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]); + ate += 4; + } +} +#endif + +#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE +static void +bfin_jc_console_write(struct console *co, const char *buf, unsigned count) +{ + if (bfin_jc_kthread == NULL) + bfin_jc_straight_buffer_write(buf, count); + else + bfin_jc_circ_write(buf, count); +} + +static struct tty_driver * +bfin_jc_console_device(struct console *co, int *index) +{ + *index = co->index; + return bfin_jc_driver; +} + +static struct console bfin_jc_console = { + .name = DEV_NAME, + .write = bfin_jc_console_write, + .device = bfin_jc_console_device, + .flags = CON_ANYTIME | CON_PRINTBUFFER, + .index = -1, +}; + +static int __init bfin_jc_console_init(void) +{ + register_console(&bfin_jc_console); + return 0; +} +console_initcall(bfin_jc_console_init); +#endif + +#ifdef CONFIG_EARLY_PRINTK +static void __init +bfin_jc_early_write(struct console *co, const char *buf, unsigned int count) +{ + bfin_jc_straight_buffer_write(buf, count); +} + +static struct __initdata console bfin_jc_early_console = { + .name = "early_BFJC", + .write = bfin_jc_early_write, + .flags = CON_ANYTIME | CON_PRINTBUFFER, + .index = -1, +}; + +struct console * __init +bfin_jc_early_init(unsigned int port, unsigned int cflag) +{ + return &bfin_jc_early_console; +} +#endif + +MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>"); +MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication"); +MODULE_LICENSE("GPL"); diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c index 1fdb9f657d8..f3366d3f06c 100644 --- a/drivers/char/cyclades.c +++ b/drivers/char/cyclades.c @@ -604,7 +604,6 @@ #define NR_PORTS 256 -#define ZE_V1_NPORTS 64 #define ZO_V1 0 #define ZO_V2 1 #define ZE_V1 2 @@ -663,18 +662,6 @@ static void cy_throttle(struct tty_struct *tty); static void cy_send_xchar(struct tty_struct *tty, char ch); -#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1) - -#define Z_FPGA_CHECK(card) \ - ((readl(&((struct RUNTIME_9060 __iomem *) \ - ((card).ctl_addr))->init_ctrl) & (1<<17)) != 0) - -#define ISZLOADED(card) (((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \ - ((card).ctl_addr))->mail_box_0)) || \ - Z_FPGA_CHECK(card)) && \ - (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \ - ((card).base_addr+ID_ADDRESS))->signature))) - #ifndef SERIAL_XMIT_SIZE #define SERIAL_XMIT_SIZE (min(PAGE_SIZE, 4096)) #endif @@ -687,8 +674,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch); #define DRIVER_VERSION 0x02010203 #define RAM_SIZE 0x80000 -#define Z_FPGA_LOADED(X) ((readl(&(X)->init_ctrl) & (1<<17)) != 0) - enum zblock_type { ZBLOCK_PRG = 0, ZBLOCK_FPGA = 1 @@ -883,6 +868,29 @@ static void cyz_rx_restart(unsigned long); static struct timer_list cyz_rx_full_timer[NR_PORTS]; #endif /* CONFIG_CYZ_INTR */ +static inline bool cy_is_Z(struct cyclades_card *card) +{ + return card->num_chips == (unsigned int)-1; +} + +static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr) +{ + return readl(&ctl_addr->init_ctrl) & (1 << 17); +} + +static inline bool cyz_fpga_loaded(struct cyclades_card *card) +{ + return __cyz_fpga_loaded(card->ctl_addr.p9060); +} + +static inline bool cyz_is_loaded(struct cyclades_card *card) +{ + struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS; + + return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) && + readl(&fw_id->signature) == ZFIRM_ID; +} + static inline int serial_paranoia_check(struct cyclades_port *info, char *name, const char *routine) { @@ -1395,19 +1403,15 @@ cyz_fetch_msg(struct cyclades_card *cinfo, unsigned long loc_doorbell; firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) - return -1; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; - loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *) - (cinfo->ctl_addr))->loc_doorbell); + loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell); if (loc_doorbell) { *cmd = (char)(0xff & loc_doorbell); *channel = readl(&board_ctrl->fwcmd_channel); *param = (__u32) readl(&board_ctrl->fwcmd_param); - cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> - loc_doorbell, 0xffffffff); + cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff); return 1; } return 0; @@ -1424,15 +1428,14 @@ cyz_issue_cmd(struct cyclades_card *cinfo, unsigned int index; firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) + if (!cyz_is_loaded(cinfo)) return -1; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; index = 0; - pci_doorbell = - &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell; + pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell; while ((readl(pci_doorbell) & 0xff) != 0) { if (index++ == 1000) return (int)(readl(pci_doorbell) & 0xff); @@ -1624,10 +1627,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) static struct BOARD_CTRL __iomem *board_ctrl; static struct CH_CTRL __iomem *ch_ctrl; static struct BUF_CTRL __iomem *buf_ctrl; - __u32 channel; + __u32 channel, param, fw_ver; __u8 cmd; - __u32 param; - __u32 hw_ver, fw_ver; int special_count; int delta_count; @@ -1635,8 +1636,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo) zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; fw_ver = readl(&board_ctrl->fw_version); - hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))-> - mail_box_0); while (cyz_fetch_msg(cinfo, &channel, &cmd, ¶m) == 1) { special_count = 0; @@ -1737,15 +1736,7 @@ static irqreturn_t cyz_interrupt(int irq, void *dev_id) { struct cyclades_card *cinfo = dev_id; - if (unlikely(cinfo == NULL)) { -#ifdef CY_DEBUG_INTERRUPTS - printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n", - irq); -#endif - return IRQ_NONE; /* spurious interrupt */ - } - - if (unlikely(!ISZLOADED(*cinfo))) { + if (unlikely(!cyz_is_loaded(cinfo))) { #ifdef CY_DEBUG_INTERRUPTS printk(KERN_DEBUG "cyz_interrupt: board not yet loaded " "(IRQ%d).\n", irq); @@ -1785,7 +1776,6 @@ static void cyz_poll(unsigned long arg) struct tty_struct *tty; struct FIRM_ID __iomem *firm_id; struct ZFW_CTRL __iomem *zfw_ctrl; - struct BOARD_CTRL __iomem *board_ctrl; struct BUF_CTRL __iomem *buf_ctrl; unsigned long expires = jiffies + HZ; unsigned int port, card; @@ -1793,19 +1783,17 @@ static void cyz_poll(unsigned long arg) for (card = 0; card < NR_CARDS; card++) { cinfo = &cy_card[card]; - if (!IS_CYC_Z(*cinfo)) + if (!cy_is_Z(cinfo)) continue; - if (!ISZLOADED(*cinfo)) + if (!cyz_is_loaded(cinfo)) continue; firm_id = cinfo->base_addr + ID_ADDRESS; zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); - board_ctrl = &(zfw_ctrl->board_ctrl); /* Skip first polling cycle to avoid racing conditions with the FW */ if (!cinfo->intr_enabled) { - cinfo->nports = (int)readl(&board_ctrl->n_channel); cinfo->intr_enabled = 1; continue; } @@ -1874,7 +1862,7 @@ static int startup(struct cyclades_port *info) set_line_char(info); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -1931,7 +1919,7 @@ static int startup(struct cyclades_port *info) base_addr = card->base_addr; firm_id = base_addr + ID_ADDRESS; - if (!ISZLOADED(*card)) + if (!cyz_is_loaded(card)) return -ENODEV; zfw_ctrl = card->base_addr + @@ -2026,7 +2014,7 @@ static void start_xmit(struct cyclades_port *info) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -2070,7 +2058,7 @@ static void shutdown(struct cyclades_port *info) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -2126,7 +2114,7 @@ static void shutdown(struct cyclades_port *info) #endif firm_id = base_addr + ID_ADDRESS; - if (!ISZLOADED(*card)) + if (!cyz_is_loaded(card)) return; zfw_ctrl = card->base_addr + @@ -2233,7 +2221,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp, #endif info->port.blocked_open++; - if (!IS_CYC_Z(*cinfo)) { + if (!cy_is_Z(cinfo)) { chip = channel >> 2; channel &= 0x03; index = cinfo->bus_index; @@ -2296,7 +2284,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp, base_addr = cinfo->base_addr; firm_id = base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) { + if (!cyz_is_loaded(cinfo)) { __set_current_state(TASK_RUNNING); remove_wait_queue(&info->port.open_wait, &wait); return -EINVAL; @@ -2397,16 +2385,14 @@ static int cy_open(struct tty_struct *tty, struct file *filp) treat it as absent from the system. This will make the user pay attention. */ - if (IS_CYC_Z(*info->card)) { + if (cy_is_Z(info->card)) { struct cyclades_card *cinfo = info->card; struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS; - if (!ISZLOADED(*cinfo)) { - if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *) - (cinfo->ctl_addr))->mail_box_0)) && - Z_FPGA_CHECK(*cinfo)) && - (ZFIRM_HLT == readl( - &firm_id->signature))) { + if (!cyz_is_loaded(cinfo)) { + if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) && + readl(&firm_id->signature) == + ZFIRM_HLT) { printk(KERN_ERR "cyc:Cyclades-Z Error: you " "need an external power supply for " "this number of ports.\nFirmware " @@ -2423,18 +2409,13 @@ static int cy_open(struct tty_struct *tty, struct file *filp) interrupts should be enabled as soon as the first open happens to one of its ports. */ if (!cinfo->intr_enabled) { - struct ZFW_CTRL __iomem *zfw_ctrl; - struct BOARD_CTRL __iomem *board_ctrl; - - zfw_ctrl = cinfo->base_addr + - (readl(&firm_id->zfwctrl_addr) & - 0xfffff); - - board_ctrl = &zfw_ctrl->board_ctrl; + u16 intr; /* Enable interrupts on the PLX chip */ - cy_writew(cinfo->ctl_addr + 0x68, - readw(cinfo->ctl_addr + 0x68) | 0x0900); + intr = readw(&cinfo->ctl_addr.p9060-> + intr_ctrl_stat) | 0x0900; + cy_writew(&cinfo->ctl_addr.p9060-> + intr_ctrl_stat, intr); /* Enable interrupts on the FW */ retval = cyz_issue_cmd(cinfo, 0, C_CM_IRQ_ENBL, 0L); @@ -2442,8 +2423,6 @@ static int cy_open(struct tty_struct *tty, struct file *filp) printk(KERN_ERR "cyc:IRQ enable retval " "was %x\n", retval); } - cinfo->nports = - (int)readl(&board_ctrl->n_channel); cinfo->intr_enabled = 1; } } @@ -2556,7 +2535,7 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout) #endif card = info->card; channel = (info->line) - (card->first_line); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -2601,7 +2580,7 @@ static void cy_flush_buffer(struct tty_struct *tty) info->xmit_cnt = info->xmit_head = info->xmit_tail = 0; spin_unlock_irqrestore(&card->card_lock, flags); - if (IS_CYC_Z(*card)) { /* If it is a Z card, flush the on-board + if (cy_is_Z(card)) { /* If it is a Z card, flush the on-board buffers as well */ spin_lock_irqsave(&card->card_lock, flags); retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L); @@ -2682,7 +2661,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp) spin_lock_irqsave(&card->card_lock, flags); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { int channel = info->line - card->first_line; int index = card->bus_index; void __iomem *base_addr = card->base_addr + @@ -2902,7 +2881,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty) channel = (info->line) - (card->first_line); #ifdef Z_EXT_CHARS_IN_BUFFER - if (!IS_CYC_Z(cy_card[card])) { + if (!cy_is_Z(card)) { #endif /* Z_EXT_CHARS_IN_BUFFER */ #ifdef CY_DEBUG_IO printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n", @@ -2984,7 +2963,6 @@ static void set_line_char(struct cyclades_port *info) void __iomem *base_addr; int chip, channel, index; unsigned cflag, iflag; - unsigned short chip_number; int baud, baud_rate = 0; int i; @@ -3013,9 +2991,8 @@ static void set_line_char(struct cyclades_port *info) card = info->card; channel = info->line - card->first_line; - chip_number = channel / 4; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { index = card->bus_index; @@ -3233,21 +3210,17 @@ static void set_line_char(struct cyclades_port *info) } else { struct FIRM_ID __iomem *firm_id; struct ZFW_CTRL __iomem *zfw_ctrl; - struct BOARD_CTRL __iomem *board_ctrl; struct CH_CTRL __iomem *ch_ctrl; - struct BUF_CTRL __iomem *buf_ctrl; __u32 sw_flow; int retval; firm_id = card->base_addr + ID_ADDRESS; - if (!ISZLOADED(*card)) + if (!cyz_is_loaded(card)) return; zfw_ctrl = card->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); - board_ctrl = &zfw_ctrl->board_ctrl; ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]); - buf_ctrl = &zfw_ctrl->buf_ctrl[channel]; /* baud rate */ baud = tty_get_baud_rate(info->port.tty); @@ -3457,7 +3430,7 @@ static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value) card = info->card; channel = (info->line) - (card->first_line); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3497,7 +3470,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3523,7 +3496,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file) } else { base_addr = card->base_addr; firm_id = card->base_addr + ID_ADDRESS; - if (ISZLOADED(*card)) { + if (cyz_is_loaded(card)) { zfw_ctrl = card->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; @@ -3566,7 +3539,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file, card = info->card; channel = (info->line) - (card->first_line); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3641,7 +3614,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file, base_addr = card->base_addr; firm_id = card->base_addr + ID_ADDRESS; - if (ISZLOADED(*card)) { + if (cyz_is_loaded(card)) { zfw_ctrl = card->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff); board_ctrl = &zfw_ctrl->board_ctrl; @@ -3713,7 +3686,7 @@ static int cy_break(struct tty_struct *tty, int break_state) card = info->card; spin_lock_irqsave(&card->card_lock, flags); - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { /* Let the transmit ISR take care of this (since it requires stuffing characters into the output stream). */ @@ -3782,7 +3755,7 @@ static int set_threshold(struct cyclades_port *info, unsigned long value) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3810,7 +3783,7 @@ static int get_threshold(struct cyclades_port *info, card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3844,7 +3817,7 @@ static int set_timeout(struct cyclades_port *info, unsigned long value) card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -3867,7 +3840,7 @@ static int get_timeout(struct cyclades_port *info, card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -4121,7 +4094,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch) card = info->card; channel = info->line - card->first_line; - if (IS_CYC_Z(*card)) { + if (cy_is_Z(card)) { if (ch == STOP_CHAR(tty)) cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L); else if (ch == START_CHAR(tty)) @@ -4154,7 +4127,7 @@ static void cy_throttle(struct tty_struct *tty) card = info->card; if (I_IXOFF(tty)) { - if (!IS_CYC_Z(*card)) + if (!cy_is_Z(card)) cy_send_xchar(tty, STOP_CHAR(tty)); else info->throttle = 1; @@ -4162,7 +4135,7 @@ static void cy_throttle(struct tty_struct *tty) if (tty->termios->c_cflag & CRTSCTS) { channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -4219,7 +4192,7 @@ static void cy_unthrottle(struct tty_struct *tty) if (tty->termios->c_cflag & CRTSCTS) { card = info->card; channel = info->line - card->first_line; - if (!IS_CYC_Z(*card)) { + if (!cy_is_Z(card)) { chip = channel >> 2; channel &= 0x03; index = card->bus_index; @@ -4263,7 +4236,7 @@ static void cy_stop(struct tty_struct *tty) cinfo = info->card; channel = info->line - cinfo->first_line; - if (!IS_CYC_Z(*cinfo)) { + if (!cy_is_Z(cinfo)) { index = cinfo->bus_index; chip = channel >> 2; channel &= 0x03; @@ -4296,7 +4269,7 @@ static void cy_start(struct tty_struct *tty) cinfo = info->card; channel = info->line - cinfo->first_line; index = cinfo->bus_index; - if (!IS_CYC_Z(*cinfo)) { + if (!cy_is_Z(cinfo)) { chip = channel >> 2; channel &= 0x03; base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index); @@ -4347,33 +4320,20 @@ static void cy_hangup(struct tty_struct *tty) static int __devinit cy_init_card(struct cyclades_card *cinfo) { struct cyclades_port *info; - u32 uninitialized_var(mailbox); - unsigned int nports, port; + unsigned int port; unsigned short chip_number; - int uninitialized_var(index); spin_lock_init(&cinfo->card_lock); + cinfo->intr_enabled = 0; - if (IS_CYC_Z(*cinfo)) { /* Cyclades-Z */ - mailbox = readl(&((struct RUNTIME_9060 __iomem *) - cinfo->ctl_addr)->mail_box_0); - nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8; - cinfo->intr_enabled = 0; - cinfo->nports = 0; /* Will be correctly set later, after - Z FW is loaded */ - } else { - index = cinfo->bus_index; - nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips; - } - - cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL); + cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports), + GFP_KERNEL); if (cinfo->ports == NULL) { printk(KERN_ERR "Cyclades: cannot allocate ports\n"); - cinfo->nports = 0; return -ENOMEM; } - for (port = cinfo->first_line; port < cinfo->first_line + nports; + for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports; port++) { info = &cinfo->ports[port - cinfo->first_line]; tty_port_init(&info->port); @@ -4387,9 +4347,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) init_completion(&info->shutdown_wait); init_waitqueue_head(&info->delta_msr_wait); - if (IS_CYC_Z(*cinfo)) { + if (cy_is_Z(cinfo)) { info->type = PORT_STARTECH; - if (mailbox == ZO_V1) + if (cinfo->hw_ver == ZO_V1) info->xmit_fifo_size = CYZ_FIFO_SIZE; else info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE; @@ -4398,6 +4358,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) cyz_rx_restart, (unsigned long)info); #endif } else { + int index = cinfo->bus_index; info->type = PORT_CIRRUS; info->xmit_fifo_size = CyMAX_CHAR_FIFO; info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS; @@ -4430,7 +4391,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo) } #ifndef CONFIG_CYZ_INTR - if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) { + if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) { mod_timer(&cyz_timerlist, jiffies + 1); #ifdef CY_PCI_DEBUG printk(KERN_DEBUG "Cyclades-Z polling initialized\n"); @@ -4621,11 +4582,12 @@ static int __init cy_detect_isa(void) /* set cy_card */ cy_card[j].base_addr = cy_isa_address; - cy_card[j].ctl_addr = NULL; + cy_card[j].ctl_addr.p9050 = NULL; cy_card[j].irq = (int)cy_isa_irq; cy_card[j].bus_index = 0; cy_card[j].first_line = cy_next_channel; - cy_card[j].num_chips = cy_isa_nchan / 4; + cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP; + cy_card[j].nports = cy_isa_nchan; if (cy_init_card(&cy_card[j])) { cy_card[j].base_addr = NULL; free_irq(cy_isa_irq, &cy_card[j]); @@ -4781,7 +4743,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, struct CUSTOM_REG __iomem *cust = base_addr; struct ZFW_CTRL __iomem *pt_zfwctrl; void __iomem *tmp; - u32 mailbox, status; + u32 mailbox, status, nchan; unsigned int i; int retval; @@ -4793,7 +4755,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, /* Check whether the firmware is already loaded and running. If positive, skip this board */ - if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) { + if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) { u32 cntval = readl(base_addr + 0x190); udelay(100); @@ -4812,7 +4774,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, mailbox = readl(&ctl_addr->mail_box_0); - if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) { + if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) { /* stops CPU and set window to beginning of RAM */ cy_writel(&ctl_addr->loc_addr_base, WIN_CREG); cy_writel(&cust->cpu_stop, 0); @@ -4828,7 +4790,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, base_addr); if (retval) goto err_rel; - if (!Z_FPGA_LOADED(ctl_addr)) { + if (!__cyz_fpga_loaded(ctl_addr)) { dev_err(&pdev->dev, "fw upload successful, but fw is " "not loaded\n"); goto err_rel; @@ -4887,7 +4849,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, "system before loading the new FW to the " "Cyclades-Z.\n"); - if (Z_FPGA_LOADED(ctl_addr)) + if (__cyz_fpga_loaded(ctl_addr)) plx_init(pdev, irq, ctl_addr); retval = -EIO; @@ -4902,16 +4864,16 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr), base_addr + readl(&fid->zfwctrl_addr)); + nchan = readl(&pt_zfwctrl->board_ctrl.n_channel); dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n", - readl(&pt_zfwctrl->board_ctrl.fw_version), - readl(&pt_zfwctrl->board_ctrl.n_channel)); + readl(&pt_zfwctrl->board_ctrl.fw_version), nchan); - if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) { + if (nchan == 0) { dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please " "check the connection between the Z host card and the " "serial expanders.\n"); - if (Z_FPGA_LOADED(ctl_addr)) + if (__cyz_fpga_loaded(ctl_addr)) plx_init(pdev, irq, ctl_addr); dev_info(&pdev->dev, "Null number of ports detected. Board " @@ -4932,9 +4894,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr, cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) | 0x00030800UL); - plx_init(pdev, irq, ctl_addr); - - return 0; + return nchan; err_rel: release_firmware(fw); err: @@ -4946,7 +4906,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, { void __iomem *addr0 = NULL, *addr2 = NULL; char *card_name = NULL; - u32 mailbox; + u32 uninitialized_var(mailbox); unsigned int device_id, nchan = 0, card_no, i; unsigned char plx_ver; int retval, irq; @@ -5023,11 +4983,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } /* Disable interrupts on the PLX before resetting it */ - cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900); + cy_writew(&ctl_addr->intr_ctrl_stat, + readw(&ctl_addr->intr_ctrl_stat) & ~0x0900); plx_init(pdev, irq, addr0); - mailbox = (u32)readl(&ctl_addr->mail_box_0); + mailbox = readl(&ctl_addr->mail_box_0); addr2 = ioremap_nocache(pci_resource_start(pdev, 2), mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin); @@ -5038,12 +4999,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, if (mailbox == ZE_V1) { card_name = "Cyclades-Ze"; - - readl(&ctl_addr->mail_box_0); - nchan = ZE_V1_NPORTS; } else { card_name = "Cyclades-8Zo"; - #ifdef CY_PCI_DEBUG if (mailbox == ZO_V1) { cy_writel(&ctl_addr->loc_addr_base, WIN_CREG); @@ -5065,15 +5022,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, */ if ((mailbox == ZO_V1) || (mailbox == ZO_V2)) cy_writel(addr2 + ID_ADDRESS, 0L); - - retval = cyz_load_fw(pdev, addr2, addr0, irq); - if (retval) - goto err_unmap; - /* This must be a Cyclades-8Zo/PCI. The extendable - version will have a different device_id and will - be allocated its maximum number of ports. */ - nchan = 8; } + + retval = cyz_load_fw(pdev, addr2, addr0, irq); + if (retval <= 0) + goto err_unmap; + nchan = retval; } if ((cy_next_channel + nchan) > NR_PORTS) { @@ -5103,8 +5057,10 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "could not allocate IRQ\n"); goto err_unmap; } - cy_card[card_no].num_chips = nchan / 4; + cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP; } else { + cy_card[card_no].hw_ver = mailbox; + cy_card[card_no].num_chips = (unsigned int)-1; #ifdef CONFIG_CYZ_INTR /* allocate IRQ only if board has an IRQ */ if (irq != 0 && irq != 255) { @@ -5117,15 +5073,15 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, } } #endif /* CONFIG_CYZ_INTR */ - cy_card[card_no].num_chips = (unsigned int)-1; } /* set cy_card */ cy_card[card_no].base_addr = addr2; - cy_card[card_no].ctl_addr = addr0; + cy_card[card_no].ctl_addr.p9050 = addr0; cy_card[card_no].irq = irq; cy_card[card_no].bus_index = 1; cy_card[card_no].first_line = cy_next_channel; + cy_card[card_no].nports = nchan; retval = cy_init_card(&cy_card[card_no]); if (retval) goto err_null; @@ -5138,17 +5094,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev, plx_ver = readb(addr2 + CyPLX_VER) & 0x0f; switch (plx_ver) { case PLX_9050: - cy_writeb(addr0 + 0x4c, 0x43); break; case PLX_9060: case PLX_9080: default: /* Old boards, use PLX_9060 */ - plx_init(pdev, irq, addr0); - cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900); + { + struct RUNTIME_9060 __iomem *ctl_addr = addr0; + plx_init(pdev, irq, ctl_addr); + cy_writew(&ctl_addr->intr_ctrl_stat, + readw(&ctl_addr->intr_ctrl_stat) | 0x0900); break; } + } } dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from " @@ -5179,22 +5138,23 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev) unsigned int i; /* non-Z with old PLX */ - if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) == + if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) == PLX_9050) - cy_writeb(cinfo->ctl_addr + 0x4c, 0); + cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0); else #ifndef CONFIG_CYZ_INTR - if (!IS_CYC_Z(*cinfo)) + if (!cy_is_Z(cinfo)) #endif - cy_writew(cinfo->ctl_addr + 0x68, - readw(cinfo->ctl_addr + 0x68) & ~0x0900); + cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat, + readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) & + ~0x0900); iounmap(cinfo->base_addr); - if (cinfo->ctl_addr) - iounmap(cinfo->ctl_addr); + if (cinfo->ctl_addr.p9050) + iounmap(cinfo->ctl_addr.p9050); if (cinfo->irq #ifndef CONFIG_CYZ_INTR - && !IS_CYC_Z(*cinfo) + && !cy_is_Z(cinfo) #endif /* CONFIG_CYZ_INTR */ ) free_irq(cinfo->irq, cinfo); @@ -5240,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v) (cur_jifs - info->idle_stats.recv_idle)/ HZ, info->idle_stats.overruns, /* FIXME: double check locking */ - (long)info->port.tty->ldisc.ops->num); + (long)info->port.tty->ldisc->ops->num); else seq_printf(m, "%3d %8lu %10lu %8lu " "%10lu %8lu %9lu %6ld\n", @@ -5386,11 +5346,11 @@ static void __exit cy_cleanup_module(void) /* clear interrupt */ cy_writeb(card->base_addr + Cy_ClrIntr, 0); iounmap(card->base_addr); - if (card->ctl_addr) - iounmap(card->ctl_addr); + if (card->ctl_addr.p9050) + iounmap(card->ctl_addr.p9050); if (card->irq #ifndef CONFIG_CYZ_INTR - && !IS_CYC_Z(*card) + && !cy_is_Z(card) #endif /* CONFIG_CYZ_INTR */ ) free_irq(card->irq, card); diff --git a/drivers/char/epca.c b/drivers/char/epca.c index af7c13ca949..abef1f7d84f 100644 --- a/drivers/char/epca.c +++ b/drivers/char/epca.c @@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port) return 0; } -static void epca_raise_dtr_rts(struct tty_port *port) +static void epca_dtr_rts(struct tty_port *port, int onoff) { } @@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = { static const struct tty_port_operations epca_port_ops = { .carrier_raised = epca_carrier_raised, - .raise_dtr_rts = epca_raise_dtr_rts, + .dtr_rts = epca_dtr_rts, }; static int info_open(struct tty_struct *tty, struct file *filp) @@ -1518,7 +1518,7 @@ static void doevent(int crd) if (event & MODEMCHG_IND) { /* A modem signal change has been indicated */ ch->imodem = mstat; - if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) { + if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) { /* We are now receiving dcd */ if (mstat & ch->dcd) wake_up_interruptible(&ch->port.open_wait); @@ -1765,9 +1765,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch) * that the driver will wait on carrier detect. */ if (ts->c_cflag & CLOCAL) - clear_bit(ASYNC_CHECK_CD, &ch->port.flags); + clear_bit(ASYNCB_CHECK_CD, &ch->port.flags); else - set_bit(ASYNC_CHECK_CD, &ch->port.flags); + set_bit(ASYNCB_CHECK_CD, &ch->port.flags); mval = ch->m_dtr | ch->m_rts; } /* End CBAUD not detected */ iflag = termios2digi_i(ch, ts->c_iflag); @@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file, tty_wait_until_sent(tty, 0); } else { /* ldisc lock already held in ioctl */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); } unlock_kernel(); /* Fall Thru */ @@ -2244,7 +2244,8 @@ static void do_softint(struct work_struct *work) if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) { tty_hangup(tty); wake_up_interruptible(&ch->port.open_wait); - clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags); + clear_bit(ASYNCB_NORMAL_ACTIVE, + &ch->port.flags); } } tty_kref_put(tty); diff --git a/drivers/char/ip2/i2lib.c b/drivers/char/ip2/i2lib.c index 0061e18aff6..0d10b89218e 100644 --- a/drivers/char/ip2/i2lib.c +++ b/drivers/char/ip2/i2lib.c @@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh) amountToMove = count; } // Move the first block - pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, + pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY, &(pCh->Ibuf[stripIndex]), NULL, amountToMove ); // If we needed to wrap, do the second data move if (count > amountToMove) { - pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY, + pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY, pCh->Ibuf, NULL, count - amountToMove ); } // Bump and wrap the stripIndex all at once by the amount of data read. This diff --git a/drivers/char/ip2/ip2main.c b/drivers/char/ip2/ip2main.c index afd9247cf08..517271c762e 100644 --- a/drivers/char/ip2/ip2main.c +++ b/drivers/char/ip2/ip2main.c @@ -1315,8 +1315,8 @@ static inline void isig(int sig, struct tty_struct *tty, int flush) if (tty->pgrp) kill_pgrp(tty->pgrp, sig, 1); if (flush || !L_NOFLSH(tty)) { - if ( tty->ldisc.ops->flush_buffer ) - tty->ldisc.ops->flush_buffer(tty); + if ( tty->ldisc->ops->flush_buffer ) + tty->ldisc->ops->flush_buffer(tty); i2InputFlush( tty->driver_data ); } } diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c index a59eac584d1..4d745a89504 100644 --- a/drivers/char/isicom.c +++ b/drivers/char/isicom.c @@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port) /* card->lock MUST NOT be held */ -static void isicom_raise_dtr_rts(struct tty_port *port) +static void isicom_dtr_rts(struct tty_port *port, int on) { struct isi_port *ip = container_of(port, struct isi_port, port); struct isi_board *card = ip->card; @@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port) if (!lock_card(card)) return; - outw(0x8000 | (channel << card->shift_count) | 0x02, base); - outw(0x0f04, base); - InterruptTheCard(base); - ip->status |= (ISI_DTR | ISI_RTS); + if (on) { + outw(0x8000 | (channel << card->shift_count) | 0x02, base); + outw(0x0f04, base); + InterruptTheCard(base); + ip->status |= (ISI_DTR | ISI_RTS); + } else { + outw(0x8000 | (channel << card->shift_count) | 0x02, base); + outw(0x0C04, base); + InterruptTheCard(base); + ip->status &= ~(ISI_DTR | ISI_RTS); + } unlock_card(card); } @@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = { static const struct tty_port_operations isicom_port_ops = { .carrier_raised = isicom_carrier_raised, - .raise_dtr_rts = isicom_raise_dtr_rts, + .dtr_rts = isicom_dtr_rts, }; static int __devinit reset_card(struct pci_dev *pdev, diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c index fff19f7e29d..e18800c400b 100644 --- a/drivers/char/istallion.c +++ b/drivers/char/istallion.c @@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port) return (portp->sigs & TIOCM_CD) ? 1 : 0; } -static void stli_raise_dtr_rts(struct tty_port *port) +static void stli_dtr_rts(struct tty_port *port, int on) { struct stliport *portp = container_of(port, struct stliport, port); struct stlibrd *brdp = stli_brds[portp->brdnr]; - stli_mkasysigs(&portp->asig, 1, 1); + stli_mkasysigs(&portp->asig, on, on); if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig, sizeof(asysigs_t), 0) < 0) - printk(KERN_WARNING "istallion: dtr raise failed.\n"); + printk(KERN_WARNING "istallion: dtr set failed.\n"); } @@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = { static const struct tty_port_operations stli_port_ops = { .carrier_raised = stli_carrier_raised, - .raise_dtr_rts = stli_raise_dtr_rts, + .dtr_rts = stli_dtr_rts, }; /*****************************************************************************/ diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c index 4a4cab73d0b..65b6ff2442c 100644 --- a/drivers/char/moxa.c +++ b/drivers/char/moxa.c @@ -1184,6 +1184,11 @@ static int moxa_open(struct tty_struct *tty, struct file *filp) return -ENODEV; } + if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) { + mutex_unlock(&moxa_openlock); + return -ENODEV; + } + ch = &brd->ports[port % MAX_PORTS_PER_BOARD]; ch->port.count++; tty->driver_data = ch; diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c index 13f8871e5b2..9533f43a30b 100644 --- a/drivers/char/mxser.c +++ b/drivers/char/mxser.c @@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port) return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0; } -static void mxser_raise_dtr_rts(struct tty_port *port) +static void mxser_dtr_rts(struct tty_port *port, int on) { struct mxser_port *mp = container_of(port, struct mxser_port, port); unsigned long flags; spin_lock_irqsave(&mp->slock, flags); - outb(inb(mp->ioaddr + UART_MCR) | - UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + if (on) + outb(inb(mp->ioaddr + UART_MCR) | + UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR); + else + outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS), + mp->ioaddr + UART_MCR); spin_unlock_irqrestore(&mp->slock, flags); } @@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = { struct tty_port_operations mxser_port_ops = { .carrier_raised = mxser_carrier_raised, - .raise_dtr_rts = mxser_raise_dtr_rts, + .dtr_rts = mxser_dtr_rts, }; /* diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c index bacb3e2872a..461ece591a5 100644 --- a/drivers/char/n_hdlc.c +++ b/drivers/char/n_hdlc.c @@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty) #endif /* Flush any pending characters in the driver and discipline. */ - if (tty->ldisc.ops->flush_buffer) - tty->ldisc.ops->flush_buffer(tty); + if (tty->ldisc->ops->flush_buffer) + tty->ldisc->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index f6f0e4ec2b5..94a5d5020ab 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -73,24 +73,6 @@ #define ECHO_OP_SET_CANON_COL 0x81 #define ECHO_OP_ERASE_TAB 0x82 -static inline unsigned char *alloc_buf(void) -{ - gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; - - if (PAGE_SIZE != N_TTY_BUF_SIZE) - return kmalloc(N_TTY_BUF_SIZE, prio); - else - return (unsigned char *)__get_free_page(prio); -} - -static inline void free_buf(unsigned char *buf) -{ - if (PAGE_SIZE != N_TTY_BUF_SIZE) - kfree(buf); - else - free_page((unsigned long) buf); -} - static inline int tty_put_user(struct tty_struct *tty, unsigned char x, unsigned char __user *ptr) { @@ -1558,11 +1540,11 @@ static void n_tty_close(struct tty_struct *tty) { n_tty_flush_buffer(tty); if (tty->read_buf) { - free_buf(tty->read_buf); + kfree(tty->read_buf); tty->read_buf = NULL; } if (tty->echo_buf) { - free_buf(tty->echo_buf); + kfree(tty->echo_buf); tty->echo_buf = NULL; } } @@ -1584,17 +1566,16 @@ static int n_tty_open(struct tty_struct *tty) /* These are ugly. Currently a malloc failure here can panic */ if (!tty->read_buf) { - tty->read_buf = alloc_buf(); + tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL); if (!tty->read_buf) return -ENOMEM; } if (!tty->echo_buf) { - tty->echo_buf = alloc_buf(); + tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL); + if (!tty->echo_buf) return -ENOMEM; } - memset(tty->read_buf, 0, N_TTY_BUF_SIZE); - memset(tty->echo_buf, 0, N_TTY_BUF_SIZE); reset_buffer_flags(tty); tty->column = 0; n_tty_set_termios(tty, NULL); diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c index 19d79fc5446..77b36488922 100644 --- a/drivers/char/pcmcia/synclink_cs.c +++ b/drivers/char/pcmcia/synclink_cs.c @@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info); static void tx_timeout(unsigned long context); static int carrier_raised(struct tty_port *port); -static void raise_dtr_rts(struct tty_port *port); +static void dtr_rts(struct tty_port *port, int onoff); #if SYNCLINK_GENERIC_HDLC #define dev_to_port(D) (dev_to_hdlc(D)->priv) @@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty, static const struct tty_port_operations mgslpc_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts + .dtr_rts = dtr_rts }; static int mgslpc_probe(struct pcmcia_device *link) @@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port) return 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int onoff) { MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port); unsigned long flags; spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (onoff) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR; set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/char/pty.c b/drivers/char/pty.c index 31038a0052a..5acd29e6e04 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -30,7 +30,6 @@ #include <asm/system.h> -/* These are global because they are accessed in tty_io.c */ #ifdef CONFIG_UNIX98_PTYS static struct tty_driver *ptm_driver; static struct tty_driver *pts_driver; @@ -111,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf, c = to->receive_room; if (c > count) c = count; - to->ldisc.ops->receive_buf(to, buf, NULL, c); + to->ldisc->ops->receive_buf(to, buf, NULL, c); return c; } @@ -149,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty) int count; /* We should get the line discipline lock for "tty->link" */ - if (!to || !to->ldisc.ops->chars_in_buffer) + if (!to || !to->ldisc->ops->chars_in_buffer) return 0; /* The ldisc must report 0 if no characters available to be read */ - count = to->ldisc.ops->chars_in_buffer(to); + count = to->ldisc->ops->chars_in_buffer(to); if (tty->driver->subtype == PTY_TYPE_SLAVE) return count; @@ -187,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty) if (!to) return; - if (to->ldisc.ops->flush_buffer) - to->ldisc.ops->flush_buffer(to); + if (to->ldisc->ops->flush_buffer) + to->ldisc->ops->flush_buffer(to); if (to->packet) { spin_lock_irqsave(&tty->ctrl_lock, flags); diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c index f59fc5cea06..63d5b628477 100644 --- a/drivers/char/rocket.c +++ b/drivers/char/rocket.c @@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port) return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { struct r_port *info = container_of(port, struct r_port, port); - sSetDTR(&info->channel); - sSetRTS(&info->channel); + if (on) { + sSetDTR(&info->channel); + sSetRTS(&info->channel); + } else { + sClrDTR(&info->channel); + sClrRTS(&info->channel); + } } /* @@ -934,7 +939,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) /* * Info->count is now 1; so it's safe to sleep now. */ - if (!test_bit(ASYNC_INITIALIZED, &port->flags)) { + if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) { cp = &info->channel; sSetRxTrigger(cp, TRIG_1); if (sGetChanStatus(cp) & CD_ACT) @@ -958,7 +963,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp) sEnRxFIFO(cp); sEnTransmit(cp); - set_bit(ASYNC_INITIALIZED, &info->port.flags); + set_bit(ASYNCB_INITIALIZED, &info->port.flags); /* * Set up the tty->alt_speed kludge @@ -1641,7 +1646,7 @@ static int rp_write(struct tty_struct *tty, /* Write remaining data into the port's xmit_buf */ while (1) { /* Hung up ? */ - if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags)) + if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags)) goto end; c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1); c = min(c, XMIT_BUF_SIZE - info->xmit_head); @@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = { static const struct tty_port_operations rocket_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* diff --git a/drivers/char/selection.c b/drivers/char/selection.c index cb8ca569896..f97b9e84806 100644 --- a/drivers/char/selection.c +++ b/drivers/char/selection.c @@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty) } count = sel_buffer_lth - pasted; count = min(count, tty->receive_room); - tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted, + tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/drivers/char/stallion.c b/drivers/char/stallion.c index 2ad813a801d..53e504f41b2 100644 --- a/drivers/char/stallion.c +++ b/drivers/char/stallion.c @@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port) return (portp->sigs & TIOCM_CD) ? 1 : 0; } -static void stl_raise_dtr_rts(struct tty_port *port) +static void stl_dtr_rts(struct tty_port *port, int on) { struct stlport *portp = container_of(port, struct stlport, port); /* Takes brd_lock internally */ - stl_setsignals(portp, 1, 1); + stl_setsignals(portp, on, on); } /*****************************************************************************/ @@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = { static const struct tty_port_operations stl_port_ops = { .carrier_raised = stl_carrier_raised, - .raise_dtr_rts = stl_raise_dtr_rts, + .dtr_rts = stl_dtr_rts, }; /*****************************************************************************/ diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c index afd0b26ca05..afded3a2379 100644 --- a/drivers/char/synclink.c +++ b/drivers/char/synclink.c @@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port) return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { struct mgsl_struct *info = container_of(port, struct mgsl_struct, port); unsigned long flags; spin_lock_irqsave(&info->irq_spinlock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR); usc_set_serial_signals(info); spin_unlock_irqrestore(&info->irq_spinlock,flags); } @@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info ) static const struct tty_port_operations mgsl_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 5e256494686..1386625fc4c 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -214,6 +214,7 @@ struct slgt_desc #define set_desc_next(a,b) (a).next = cpu_to_le32((unsigned int)(b)) #define set_desc_count(a,b)(a).count = cpu_to_le16((unsigned short)(b)) #define set_desc_eof(a,b) (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0)) +#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b)) #define desc_count(a) (le16_to_cpu((a).count)) #define desc_status(a) (le16_to_cpu((a).status)) #define desc_complete(a) (le16_to_cpu((a).status) & BIT15) @@ -297,6 +298,7 @@ struct slgt_info { u32 max_frame_size; /* as set by device config */ unsigned int rbuf_fill_level; + unsigned int rx_pio; unsigned int if_mode; unsigned int base_clock; @@ -331,6 +333,8 @@ struct slgt_info { struct slgt_desc *rbufs; unsigned int rbuf_current; unsigned int rbuf_index; + unsigned int rbuf_fill_index; + unsigned short rbuf_fill_count; unsigned int tbuf_count; struct slgt_desc *tbufs; @@ -2110,6 +2114,40 @@ static void ri_change(struct slgt_info *info, unsigned short status) info->pending_bh |= BH_STATUS; } +static void isr_rxdata(struct slgt_info *info) +{ + unsigned int count = info->rbuf_fill_count; + unsigned int i = info->rbuf_fill_index; + unsigned short reg; + + while (rd_reg16(info, SSR) & IRQ_RXDATA) { + reg = rd_reg16(info, RDR); + DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg)); + if (desc_complete(info->rbufs[i])) { + /* all buffers full */ + rx_stop(info); + info->rx_restart = 1; + continue; + } + info->rbufs[i].buf[count++] = (unsigned char)reg; + /* async mode saves status byte to buffer for each data byte */ + if (info->params.mode == MGSL_MODE_ASYNC) + info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8); + if (count == info->rbuf_fill_level || (reg & BIT10)) { + /* buffer full or end of frame */ + set_desc_count(info->rbufs[i], count); + set_desc_status(info->rbufs[i], BIT15 | (reg >> 8)); + info->rbuf_fill_count = count = 0; + if (++i == info->rbuf_count) + i = 0; + info->pending_bh |= BH_RECEIVE; + } + } + + info->rbuf_fill_index = i; + info->rbuf_fill_count = count; +} + static void isr_serial(struct slgt_info *info) { unsigned short status = rd_reg16(info, SSR); @@ -2125,6 +2163,8 @@ static void isr_serial(struct slgt_info *info) if (info->tx_count) isr_txeom(info, status); } + if (info->rx_pio && (status & IRQ_RXDATA)) + isr_rxdata(info); if ((status & IRQ_RXBREAK) && (status & RXBREAK)) { info->icount.brk++; /* process break detection if tty control allows */ @@ -2141,7 +2181,8 @@ static void isr_serial(struct slgt_info *info) } else { if (status & (IRQ_TXIDLE + IRQ_TXUNDER)) isr_txeom(info, status); - + if (info->rx_pio && (status & IRQ_RXDATA)) + isr_rxdata(info); if (status & IRQ_RXIDLE) { if (status & RXIDLE) info->icount.rxidle++; @@ -2642,6 +2683,10 @@ static int rx_enable(struct slgt_info *info, int enable) return -EINVAL; } info->rbuf_fill_level = rbuf_fill_level; + if (rbuf_fill_level < 128) + info->rx_pio = 1; /* PIO mode */ + else + info->rx_pio = 0; /* DMA mode */ rx_stop(info); /* restart receiver to use new fill level */ } @@ -3099,13 +3144,16 @@ static int carrier_raised(struct tty_port *port) return (info->signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { unsigned long flags; struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - info->signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR); set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3419,7 +3467,7 @@ static void add_device(struct slgt_info *info) static const struct tty_port_operations slgt_port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* @@ -3841,15 +3889,27 @@ static void rx_start(struct slgt_info *info) rdma_reset(info); reset_rbufs(info); - /* set 1st descriptor address */ - wr_reg32(info, RDDAR, info->rbufs[0].pdesc); - - if (info->params.mode != MGSL_MODE_ASYNC) { - /* enable rx DMA and DMA interrupt */ - wr_reg32(info, RDCSR, (BIT2 + BIT0)); + if (info->rx_pio) { + /* rx request when rx FIFO not empty */ + wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14)); + slgt_irq_on(info, IRQ_RXDATA); + if (info->params.mode == MGSL_MODE_ASYNC) { + /* enable saving of rx status */ + wr_reg32(info, RDCSR, BIT6); + } } else { - /* enable saving of rx status, rx DMA and DMA interrupt */ - wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); + /* rx request when rx FIFO half full */ + wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14)); + /* set 1st descriptor address */ + wr_reg32(info, RDDAR, info->rbufs[0].pdesc); + + if (info->params.mode != MGSL_MODE_ASYNC) { + /* enable rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT2 + BIT0)); + } else { + /* enable saving of rx status, rx DMA and DMA interrupt */ + wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0)); + } } slgt_irq_on(info, IRQ_RXOVER); @@ -4467,6 +4527,8 @@ static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last static void reset_rbufs(struct slgt_info *info) { free_rbufs(info, 0, info->rbuf_count - 1); + info->rbuf_fill_index = 0; + info->rbuf_fill_count = 0; } /* diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c index 26de60efe4b..6f727e3c53a 100644 --- a/drivers/char/synclinkmp.c +++ b/drivers/char/synclinkmp.c @@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port) return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; } -static void raise_dtr_rts(struct tty_port *port) +static void dtr_rts(struct tty_port *port, int on) { SLMP_INFO *info = container_of(port, SLMP_INFO, port); unsigned long flags; spin_lock_irqsave(&info->lock,flags); - info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + if (on) + info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR; + else + info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR); set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } @@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info) static const struct tty_port_operations port_ops = { .carrier_raised = carrier_raised, - .raise_dtr_rts = raise_dtr_rts, + .dtr_rts = dtr_rts, }; /* Allocate and initialize a device instance structure diff --git a/drivers/char/tty_audit.c b/drivers/char/tty_audit.c index 55ba6f14288..ac16fbec72d 100644 --- a/drivers/char/tty_audit.c +++ b/drivers/char/tty_audit.c @@ -29,10 +29,7 @@ static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor, buf = kmalloc(sizeof(*buf), GFP_KERNEL); if (!buf) goto err; - if (PAGE_SIZE != N_TTY_BUF_SIZE) - buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); - else - buf->data = (unsigned char *)__get_free_page(GFP_KERNEL); + buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL); if (!buf->data) goto err_buf; atomic_set(&buf->count, 1); @@ -52,10 +49,7 @@ err: static void tty_audit_buf_free(struct tty_audit_buf *buf) { WARN_ON(buf->valid != 0); - if (PAGE_SIZE != N_TTY_BUF_SIZE) - kfree(buf->data); - else - free_page((unsigned long)buf->data); + kfree(buf->data); kfree(buf); } diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 66b99a2049e..939e198d767 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -295,7 +295,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) struct tty_driver *p, *res = NULL; int tty_line = 0; int len; - char *str; + char *str, *stp; for (str = name; *str; str++) if ((*str >= '0' && *str <= '9') || *str == ',') @@ -311,13 +311,14 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) list_for_each_entry(p, &tty_drivers, tty_drivers) { if (strncmp(name, p->name, len) != 0) continue; - if (*str == ',') - str++; - if (*str == '\0') - str = NULL; + stp = str; + if (*stp == ',') + stp++; + if (*stp == '\0') + stp = NULL; if (tty_line >= 0 && tty_line <= p->num && p->ops && - p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) { + p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) { res = tty_driver_kref_get(p); *line = tty_line; break; @@ -470,43 +471,6 @@ void tty_wakeup(struct tty_struct *tty) EXPORT_SYMBOL_GPL(tty_wakeup); /** - * tty_ldisc_flush - flush line discipline queue - * @tty: tty - * - * Flush the line discipline queue (if any) for this tty. If there - * is no line discipline active this is a no-op. - */ - -void tty_ldisc_flush(struct tty_struct *tty) -{ - struct tty_ldisc *ld = tty_ldisc_ref(tty); - if (ld) { - if (ld->ops->flush_buffer) - ld->ops->flush_buffer(tty); - tty_ldisc_deref(ld); - } - tty_buffer_flush(tty); -} - -EXPORT_SYMBOL_GPL(tty_ldisc_flush); - -/** - * tty_reset_termios - reset terminal state - * @tty: tty to reset - * - * Restore a terminal to the driver default state - */ - -static void tty_reset_termios(struct tty_struct *tty) -{ - mutex_lock(&tty->termios_mutex); - *tty->termios = tty->driver->init_termios; - tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios); - tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); - mutex_unlock(&tty->termios_mutex); -} - -/** * do_tty_hangup - actual handler for hangup events * @work: tty device * @@ -535,7 +499,6 @@ static void do_tty_hangup(struct work_struct *work) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; - struct tty_ldisc *ld; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -566,40 +529,8 @@ static void do_tty_hangup(struct work_struct *work) filp->f_op = &hung_up_tty_fops; } file_list_unlock(); - /* - * FIXME! What are the locking issues here? This may me overdoing - * things... This question is especially important now that we've - * removed the irqlock. - */ - ld = tty_ldisc_ref(tty); - if (ld != NULL) { - /* We may have no line discipline at this point */ - if (ld->ops->flush_buffer) - ld->ops->flush_buffer(tty); - tty_driver_flush_buffer(tty); - if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && - ld->ops->write_wakeup) - ld->ops->write_wakeup(tty); - if (ld->ops->hangup) - ld->ops->hangup(tty); - } - /* - * FIXME: Once we trust the LDISC code better we can wait here for - * ldisc completion and fix the driver call race - */ - wake_up_interruptible_poll(&tty->write_wait, POLLOUT); - wake_up_interruptible_poll(&tty->read_wait, POLLIN); - /* - * Shutdown the current line discipline, and reset it to - * N_TTY. - */ - if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) - tty_reset_termios(tty); - /* Defer ldisc switch */ - /* tty_deferred_ldisc_switch(N_TTY); - This should get done automatically when the port closes and - tty_release is called */ + tty_ldisc_hangup(tty); read_lock(&tasklist_lock); if (tty->session) { @@ -628,12 +559,15 @@ static void do_tty_hangup(struct work_struct *work) read_unlock(&tasklist_lock); spin_lock_irqsave(&tty->ctrl_lock, flags); - tty->flags = 0; + clear_bit(TTY_THROTTLED, &tty->flags); + clear_bit(TTY_PUSH, &tty->flags); + clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); put_pid(tty->session); put_pid(tty->pgrp); tty->session = NULL; tty->pgrp = NULL; tty->ctrl_status = 0; + set_bit(TTY_HUPPED, &tty->flags); spin_unlock_irqrestore(&tty->ctrl_lock, flags); /* Account for the p->signal references we killed */ @@ -659,10 +593,7 @@ static void do_tty_hangup(struct work_struct *work) * can't yet guarantee all that. */ set_bit(TTY_HUPPED, &tty->flags); - if (ld) { - tty_ldisc_enable(tty); - tty_ldisc_deref(ld); - } + tty_ldisc_enable(tty); unlock_kernel(); if (f) fput(f); @@ -2480,6 +2411,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int return tty->ops->tiocmset(tty, file, set, clear); } +struct tty_struct *tty_pair_get_tty(struct tty_struct *tty) +{ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + tty = tty->link; + return tty; +} +EXPORT_SYMBOL(tty_pair_get_tty); + +struct tty_struct *tty_pair_get_pty(struct tty_struct *tty) +{ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + return tty; + return tty->link; +} +EXPORT_SYMBOL(tty_pair_get_pty); + /* * Split this up, as gcc can choke on it otherwise.. */ @@ -2495,11 +2444,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; - real_tty = tty; - if (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->driver->subtype == PTY_TYPE_MASTER) - real_tty = tty->link; - + real_tty = tty_pair_get_tty(tty); /* * Factor out some common prep work @@ -2555,7 +2500,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case TIOCGSID: return tiocgsid(tty, real_tty, p); case TIOCGETD: - return put_user(tty->ldisc.ops->num, (int __user *)p); + return put_user(tty->ldisc->ops->num, (int __user *)p); case TIOCSETD: return tiocsetd(tty, p); /* @@ -2770,6 +2715,7 @@ void initialize_tty_struct(struct tty_struct *tty, tty->buf.head = tty->buf.tail = NULL; tty_buffer_init(tty); mutex_init(&tty->termios_mutex); + mutex_init(&tty->ldisc_mutex); init_waitqueue_head(&tty->write_wait); init_waitqueue_head(&tty->read_wait); INIT_WORK(&tty->hangup_work, do_tty_hangup); diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index 6f4c7d0a53b..8116bb1c8f8 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer); * @tty: terminal * * Indicate that a tty should stop transmitting data down the stack. + * Takes the termios mutex to protect against parallel throttle/unthrottle + * and also to ensure the driver can consistently reference its own + * termios data at this point when implementing software flow control. */ void tty_throttle(struct tty_struct *tty) { + mutex_lock(&tty->termios_mutex); /* check TTY_THROTTLED first so it indicates our state */ if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) && tty->ops->throttle) tty->ops->throttle(tty); + mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_throttle); @@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle); * @tty: terminal * * Indicate that a tty may continue transmitting data down the stack. + * Takes the termios mutex to protect against parallel throttle/unthrottle + * and also to ensure the driver can consistently reference its own + * termios data at this point when implementing software flow control. + * + * Drivers should however remember that the stack can issue a throttle, + * then change flow control method, then unthrottle. */ void tty_unthrottle(struct tty_struct *tty) { + mutex_lock(&tty->termios_mutex); if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) && tty->ops->unthrottle) tty->ops->unthrottle(tty); + mutex_unlock(&tty->termios_mutex); } EXPORT_SYMBOL(tty_unthrottle); @@ -613,9 +626,25 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt) return 0; } +static void copy_termios(struct tty_struct *tty, struct ktermios *kterm) +{ + mutex_lock(&tty->termios_mutex); + memcpy(kterm, tty->termios, sizeof(struct ktermios)); + mutex_unlock(&tty->termios_mutex); +} + +static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm) +{ + mutex_lock(&tty->termios_mutex); + memcpy(kterm, tty->termios_locked, sizeof(struct ktermios)); + mutex_unlock(&tty->termios_mutex); +} + static int get_termio(struct tty_struct *tty, struct termio __user *termio) { - if (kernel_termios_to_user_termio(termio, tty->termios)) + struct ktermios kterm; + copy_termios(tty, &kterm); + if (kernel_termios_to_user_termio(termio, &kterm)) return -EFAULT; return 0; } @@ -917,6 +946,8 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, struct tty_struct *real_tty; void __user *p = (void __user *)arg; int ret = 0; + struct ktermios kterm; + struct termiox ktermx; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -952,23 +983,20 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, return set_termios(real_tty, p, TERMIOS_OLD); #ifndef TCGETS2 case TCGETS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios)) + copy_termios(real_tty, &kterm); + if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; #else case TCGETS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios)) + copy_termios(real_tty, &kterm); + if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TCGETS2: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios)) + copy_termios(real_tty, &kterm); + if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TCSETSF2: return set_termios(real_tty, p, TERMIOS_FLUSH | TERMIOS_WAIT); @@ -987,34 +1015,36 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, return set_termios(real_tty, p, TERMIOS_TERMIO); #ifndef TCGETS2 case TIOCGLCKTRMIOS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked)) + copy_termios_locked(real_tty, &kterm); + if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TIOCSLCKTRMIOS: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - mutex_lock(&real_tty->termios_mutex); - if (user_termios_to_kernel_termios(real_tty->termios_locked, + copy_termios_locked(real_tty, &kterm); + if (user_termios_to_kernel_termios(&kterm, (struct termios __user *) arg)) - ret = -EFAULT; + return -EFAULT; + mutex_lock(&real_tty->termios_mutex); + memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios)); mutex_unlock(&real_tty->termios_mutex); - return ret; + return 0; #else case TIOCGLCKTRMIOS: - mutex_lock(&real_tty->termios_mutex); - if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked)) + copy_termios_locked(real_tty, &kterm); + if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm)) ret = -EFAULT; - mutex_unlock(&real_tty->termios_mutex); return ret; case TIOCSLCKTRMIOS: if (!capable(CAP_SYS_ADMIN)) - ret = -EPERM; - mutex_lock(&real_tty->termios_mutex); - if (user_termios_to_kernel_termios_1(real_tty->termios_locked, + return -EPERM; + copy_termios_locked(real_tty, &kterm); + if (user_termios_to_kernel_termios_1(&kterm, (struct termios __user *) arg)) - ret = -EFAULT; + return -EFAULT; + mutex_lock(&real_tty->termios_mutex); + memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios)); mutex_unlock(&real_tty->termios_mutex); return ret; #endif @@ -1023,9 +1053,10 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, if (real_tty->termiox == NULL) return -EINVAL; mutex_lock(&real_tty->termios_mutex); - if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox))) - ret = -EFAULT; + memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox)); mutex_unlock(&real_tty->termios_mutex); + if (copy_to_user(p, &ktermx, sizeof(struct termiox))) + ret = -EFAULT; return ret; case TCSETX: return set_termiox(real_tty, p, 0); @@ -1035,10 +1066,9 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file, return set_termiox(real_tty, p, TERMIOS_FLUSH); #endif case TIOCGSOFTCAR: - mutex_lock(&real_tty->termios_mutex); - ret = put_user(C_CLOCAL(real_tty) ? 1 : 0, + copy_termios(real_tty, &kterm); + ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0, (int __user *)arg); - mutex_unlock(&real_tty->termios_mutex); return ret; case TIOCSSOFTCAR: if (get_user(arg, (unsigned int __user *) arg)) diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index f78f5b0127a..39c8f86dedd 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc); /** * tty_ldisc_try_get - try and reference an ldisc * @disc: ldisc number - * @ld: tty ldisc structure to complete * * Attempt to open and lock a line discipline into place. Return - * the line discipline refcounted and assigned in ld. On an error - * report the error code back + * the line discipline refcounted or an error. */ -static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) +static struct tty_ldisc *tty_ldisc_try_get(int disc) { unsigned long flags; + struct tty_ldisc *ld; struct tty_ldisc_ops *ldops; int err = -EINVAL; - + + ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL); + if (ld == NULL) + return ERR_PTR(-ENOMEM); + spin_lock_irqsave(&tty_ldisc_lock, flags); ld->ops = NULL; ldops = tty_ldiscs[disc]; @@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) /* lock it */ ldops->refcount++; ld->ops = ldops; + ld->refcount = 0; err = 0; } } spin_unlock_irqrestore(&tty_ldisc_lock, flags); - return err; + if (err) + return ERR_PTR(err); + return ld; } /** * tty_ldisc_get - take a reference to an ldisc * @disc: ldisc number - * @ld: tty line discipline structure to use * * Takes a reference to a line discipline. Deals with refcounts and * module locking counts. Returns NULL if the discipline is not available. @@ -161,52 +166,54 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld) * takes tty_ldisc_lock to guard against ldisc races */ -static int tty_ldisc_get(int disc, struct tty_ldisc *ld) +static struct tty_ldisc *tty_ldisc_get(int disc) { - int err; + struct tty_ldisc *ld; if (disc < N_TTY || disc >= NR_LDISCS) - return -EINVAL; - err = tty_ldisc_try_get(disc, ld); - if (err < 0) { + return ERR_PTR(-EINVAL); + ld = tty_ldisc_try_get(disc); + if (IS_ERR(ld)) { request_module("tty-ldisc-%d", disc); - err = tty_ldisc_try_get(disc, ld); + ld = tty_ldisc_try_get(disc); } - return err; + return ld; } /** * tty_ldisc_put - drop ldisc reference - * @disc: ldisc number + * @ld: ldisc * * Drop a reference to a line discipline. Manage refcounts and - * module usage counts + * module usage counts. Free the ldisc once the recount hits zero. * * Locking: * takes tty_ldisc_lock to guard against ldisc races */ -static void tty_ldisc_put(struct tty_ldisc_ops *ld) +static void tty_ldisc_put(struct tty_ldisc *ld) { unsigned long flags; - int disc = ld->num; + int disc = ld->ops->num; + struct tty_ldisc_ops *ldo; BUG_ON(disc < N_TTY || disc >= NR_LDISCS); spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = tty_ldiscs[disc]; - BUG_ON(ld->refcount == 0); - ld->refcount--; - module_put(ld->owner); + ldo = tty_ldiscs[disc]; + BUG_ON(ldo->refcount == 0); + ldo->refcount--; + module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); + kfree(ld); } -static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) +static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { return (*pos < NR_LDISCS) ? pos : NULL; } -static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) +static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return (*pos < NR_LDISCS) ? pos : NULL; @@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) static int tty_ldiscs_seq_show(struct seq_file *m, void *v) { int i = *(loff_t *)v; - struct tty_ldisc ld; - - if (tty_ldisc_get(i, &ld) < 0) + struct tty_ldisc *ld; + + ld = tty_ldisc_try_get(i); + if (IS_ERR(ld)) return 0; - seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i); - tty_ldisc_put(ld.ops); + seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i); + tty_ldisc_put(ld); return 0; } @@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = { static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) { - ld->refcount = 0; - tty->ldisc = *ld; + tty->ldisc = ld; } /** @@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty) int ret = 0; spin_lock_irqsave(&tty_ldisc_lock, flags); - ld = &tty->ldisc; + ld = tty->ldisc; if (test_bit(TTY_LDISC, &tty->flags)) { ld->refcount++; ret = 1; @@ -315,10 +322,9 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { /* wait_event is a macro */ wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - WARN_ON(tty->ldisc.refcount == 0); - return &tty->ldisc; + WARN_ON(tty->ldisc->refcount == 0); + return tty->ldisc; } - EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); /** @@ -335,10 +341,9 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { if (tty_ldisc_try(tty)) - return &tty->ldisc; + return tty->ldisc; return NULL; } - EXPORT_SYMBOL_GPL(tty_ldisc_ref); /** @@ -366,7 +371,6 @@ void tty_ldisc_deref(struct tty_ldisc *ld) wake_up(&tty_ldisc_wait); spin_unlock_irqrestore(&tty_ldisc_lock, flags); } - EXPORT_SYMBOL_GPL(tty_ldisc_deref); /** @@ -389,6 +393,26 @@ void tty_ldisc_enable(struct tty_struct *tty) } /** + * tty_ldisc_flush - flush line discipline queue + * @tty: tty + * + * Flush the line discipline queue (if any) for this tty. If there + * is no line discipline active this is a no-op. + */ + +void tty_ldisc_flush(struct tty_struct *tty) +{ + struct tty_ldisc *ld = tty_ldisc_ref(tty); + if (ld) { + if (ld->ops->flush_buffer) + ld->ops->flush_buffer(tty); + tty_ldisc_deref(ld); + } + tty_buffer_flush(tty); +} +EXPORT_SYMBOL_GPL(tty_ldisc_flush); + +/** * tty_set_termios_ldisc - set ldisc field * @tty: tty structure * @num: line discipline number @@ -407,6 +431,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) mutex_unlock(&tty->termios_mutex); } +/** + * tty_ldisc_open - open a line discipline + * @tty: tty we are opening the ldisc on + * @ld: discipline to open + * + * A helper opening method. Also a convenient debugging and check + * point. + */ + +static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) +{ + WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags)); + if (ld->ops->open) + return ld->ops->open(tty); + return 0; +} + +/** + * tty_ldisc_close - close a line discipline + * @tty: tty we are opening the ldisc on + * @ld: discipline to close + * + * A helper close method. Also a convenient debugging and check + * point. + */ + +static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) +{ + WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); + clear_bit(TTY_LDISC_OPEN, &tty->flags); + if (ld->ops->close) + ld->ops->close(tty); +} /** * tty_ldisc_restore - helper for tty ldisc change @@ -420,66 +477,136 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { char buf[64]; - struct tty_ldisc new_ldisc; + struct tty_ldisc *new_ldisc; + int r; /* There is an outstanding reference here so this is safe */ - tty_ldisc_get(old->ops->num, old); + old = tty_ldisc_get(old->ops->num); + WARN_ON(IS_ERR(old)); tty_ldisc_assign(tty, old); tty_set_termios_ldisc(tty, old->ops->num); - if (old->ops->open && (old->ops->open(tty) < 0)) { - tty_ldisc_put(old->ops); + if (tty_ldisc_open(tty, old) < 0) { + tty_ldisc_put(old); /* This driver is always present */ - if (tty_ldisc_get(N_TTY, &new_ldisc) < 0) + new_ldisc = tty_ldisc_get(N_TTY); + if (IS_ERR(new_ldisc)) panic("n_tty: get"); - tty_ldisc_assign(tty, &new_ldisc); + tty_ldisc_assign(tty, new_ldisc); tty_set_termios_ldisc(tty, N_TTY); - if (new_ldisc.ops->open) { - int r = new_ldisc.ops->open(tty); - if (r < 0) - panic("Couldn't open N_TTY ldisc for " - "%s --- error %d.", - tty_name(tty, buf), r); - } + r = tty_ldisc_open(tty, new_ldisc); + if (r < 0) + panic("Couldn't open N_TTY ldisc for " + "%s --- error %d.", + tty_name(tty, buf), r); } } /** + * tty_ldisc_halt - shut down the line discipline + * @tty: tty device + * + * Shut down the line discipline and work queue for this tty device. + * The TTY_LDISC flag being cleared ensures no further references can + * be obtained while the delayed work queue halt ensures that no more + * data is fed to the ldisc. + * + * In order to wait for any existing references to complete see + * tty_ldisc_wait_idle. + */ + +static int tty_ldisc_halt(struct tty_struct *tty) +{ + clear_bit(TTY_LDISC, &tty->flags); + return cancel_delayed_work(&tty->buf.work); +} + +/** + * tty_ldisc_wait_idle - wait for the ldisc to become idle + * @tty: tty to wait for + * + * Wait for the line discipline to become idle. The discipline must + * have been halted for this to guarantee it remains idle. + * + * tty_ldisc_lock protects the ref counts currently. + */ + +static int tty_ldisc_wait_idle(struct tty_struct *tty) +{ + unsigned long flags; + spin_lock_irqsave(&tty_ldisc_lock, flags); + while (tty->ldisc->refcount) { + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + if (wait_event_timeout(tty_ldisc_wait, + tty->ldisc->refcount == 0, 5 * HZ) == 0) + return -EBUSY; + spin_lock_irqsave(&tty_ldisc_lock, flags); + } + spin_unlock_irqrestore(&tty_ldisc_lock, flags); + return 0; +} + +/** * tty_set_ldisc - set line discipline * @tty: the terminal to set * @ldisc: the line discipline * * Set the discipline of a tty line. Must be called from a process - * context. + * context. The ldisc change logic has to protect itself against any + * overlapping ldisc change (including on the other end of pty pairs), + * the close of one side of a tty/pty pair, and eventually hangup. * - * Locking: takes tty_ldisc_lock. - * called functions take termios_mutex + * Locking: takes tty_ldisc_lock, termios_mutex */ int tty_set_ldisc(struct tty_struct *tty, int ldisc) { int retval; - struct tty_ldisc o_ldisc, new_ldisc; - int work; - unsigned long flags; + struct tty_ldisc *o_ldisc, *new_ldisc; + int work, o_work = 0; struct tty_struct *o_tty; -restart: - /* This is a bit ugly for now but means we can break the 'ldisc - is part of the tty struct' assumption later */ - retval = tty_ldisc_get(ldisc, &new_ldisc); - if (retval) - return retval; + new_ldisc = tty_ldisc_get(ldisc); + if (IS_ERR(new_ldisc)) + return PTR_ERR(new_ldisc); + + /* + * We need to look at the tty locking here for pty/tty pairs + * when both sides try to change in parallel. + */ + + o_tty = tty->link; /* o_tty is the pty side or NULL */ + + + /* + * Check the no-op case + */ + + if (tty->ldisc->ops->num == ldisc) { + tty_ldisc_put(new_ldisc); + return 0; + } /* * Problem: What do we do if this blocks ? + * We could deadlock here */ tty_wait_until_sent(tty, 0); - if (tty->ldisc.ops->num == ldisc) { - tty_ldisc_put(new_ldisc.ops); - return 0; + mutex_lock(&tty->ldisc_mutex); + + /* + * We could be midstream of another ldisc change which has + * dropped the lock during processing. If so we need to wait. + */ + + while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { + mutex_unlock(&tty->ldisc_mutex); + wait_event(tty_ldisc_wait, + test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0); + mutex_lock(&tty->ldisc_mutex); } + set_bit(TTY_LDISC_CHANGING, &tty->flags); /* * No more input please, we are switching. The new ldisc @@ -489,8 +616,6 @@ restart: tty->receive_room = 0; o_ldisc = tty->ldisc; - o_tty = tty->link; - /* * Make sure we don't change while someone holds a * reference to the line discipline. The TTY_LDISC bit @@ -501,108 +626,181 @@ restart: * with a userspace app continually trying to use the tty in * parallel to the change and re-referencing the tty. */ - clear_bit(TTY_LDISC, &tty->flags); - if (o_tty) - clear_bit(TTY_LDISC, &o_tty->flags); - spin_lock_irqsave(&tty_ldisc_lock, flags); - if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) { - if (tty->ldisc.refcount) { - /* Free the new ldisc we grabbed. Must drop the lock - first. */ - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(o_ldisc.ops); - /* - * There are several reasons we may be busy, including - * random momentary I/O traffic. We must therefore - * retry. We could distinguish between blocking ops - * and retries if we made tty_ldisc_wait() smarter. - * That is up for discussion. - */ - if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - if (o_tty && o_tty->ldisc.refcount) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(o_tty->ldisc.ops); - if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0) - return -ERESTARTSYS; - goto restart; - } - } - /* - * If the TTY_LDISC bit is set, then we are racing against - * another ldisc change - */ - if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) { - struct tty_ldisc *ld; - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - tty_ldisc_put(new_ldisc.ops); - ld = tty_ldisc_ref_wait(tty); - tty_ldisc_deref(ld); - goto restart; - } - /* - * This flag is used to avoid two parallel ldisc changes. Once - * open and close are fine grained locked this may work better - * as a mutex shared with the open/close/hup paths - */ - set_bit(TTY_LDISC_CHANGING, &tty->flags); + work = tty_ldisc_halt(tty); if (o_tty) - set_bit(TTY_LDISC_CHANGING, &o_tty->flags); - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - - /* - * From this point on we know nobody has an ldisc - * usage reference, nor can they obtain one until - * we say so later on. - */ + o_work = tty_ldisc_halt(o_tty); - work = cancel_delayed_work(&tty->buf.work); /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - * MUST NOT hold locks here. + * Wait for ->hangup_work and ->buf.work handlers to terminate. + * We must drop the mutex here in case a hangup is also in process. */ + + mutex_unlock(&tty->ldisc_mutex); + flush_scheduled_work(); + + /* Let any existing reference holders finish */ + retval = tty_ldisc_wait_idle(tty); + if (retval < 0) { + clear_bit(TTY_LDISC_CHANGING, &tty->flags); + tty_ldisc_put(new_ldisc); + return retval; + } + + mutex_lock(&tty->ldisc_mutex); + if (test_bit(TTY_HUPPED, &tty->flags)) { + /* We were raced by the hangup method. It will have stomped + the ldisc data and closed the ldisc down */ + clear_bit(TTY_LDISC_CHANGING, &tty->flags); + mutex_unlock(&tty->ldisc_mutex); + tty_ldisc_put(new_ldisc); + return -EIO; + } + /* Shutdown the current discipline. */ - if (o_ldisc.ops->close) - (o_ldisc.ops->close)(tty); + tty_ldisc_close(tty, o_ldisc); /* Now set up the new line discipline. */ - tty_ldisc_assign(tty, &new_ldisc); + tty_ldisc_assign(tty, new_ldisc); tty_set_termios_ldisc(tty, ldisc); - if (new_ldisc.ops->open) - retval = (new_ldisc.ops->open)(tty); + + retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { - tty_ldisc_put(new_ldisc.ops); - tty_ldisc_restore(tty, &o_ldisc); + /* Back to the old one or N_TTY if we can't */ + tty_ldisc_put(new_ldisc); + tty_ldisc_restore(tty, o_ldisc); } + /* At this point we hold a reference to the new ldisc and a a reference to the old ldisc. If we ended up flipping back to the existing ldisc we have two references to it */ - if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc) + if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc) tty->ops->set_ldisc(tty); - tty_ldisc_put(o_ldisc.ops); + tty_ldisc_put(o_ldisc); /* - * Allow ldisc referencing to occur as soon as the driver - * ldisc callback completes. + * Allow ldisc referencing to occur again */ tty_ldisc_enable(tty); if (o_tty) tty_ldisc_enable(o_tty); - /* Restart it in case no characters kick it off. Safe if + /* Restart the work queue in case no characters kick it off. Safe if already running */ if (work) schedule_delayed_work(&tty->buf.work, 1); + if (o_work) + schedule_delayed_work(&o_tty->buf.work, 1); + mutex_unlock(&tty->ldisc_mutex); return retval; } +/** + * tty_reset_termios - reset terminal state + * @tty: tty to reset + * + * Restore a terminal to the driver default state. + */ + +static void tty_reset_termios(struct tty_struct *tty) +{ + mutex_lock(&tty->termios_mutex); + *tty->termios = tty->driver->init_termios; + tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios); + tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios); + mutex_unlock(&tty->termios_mutex); +} + + +/** + * tty_ldisc_reinit - reinitialise the tty ldisc + * @tty: tty to reinit + * + * Switch the tty back to N_TTY line discipline and leave the + * ldisc state closed + */ + +static void tty_ldisc_reinit(struct tty_struct *tty) +{ + struct tty_ldisc *ld; + + tty_ldisc_close(tty, tty->ldisc); + tty_ldisc_put(tty->ldisc); + tty->ldisc = NULL; + /* + * Switch the line discipline back + */ + ld = tty_ldisc_get(N_TTY); + BUG_ON(IS_ERR(ld)); + tty_ldisc_assign(tty, ld); + tty_set_termios_ldisc(tty, N_TTY); +} + +/** + * tty_ldisc_hangup - hangup ldisc reset + * @tty: tty being hung up + * + * Some tty devices reset their termios when they receive a hangup + * event. In that situation we must also switch back to N_TTY properly + * before we reset the termios data. + * + * Locking: We can take the ldisc mutex as the rest of the code is + * careful to allow for this. + * + * In the pty pair case this occurs in the close() path of the + * tty itself so we must be careful about locking rules. + */ + +void tty_ldisc_hangup(struct tty_struct *tty) +{ + struct tty_ldisc *ld; + + /* + * FIXME! What are the locking issues here? This may me overdoing + * things... This question is especially important now that we've + * removed the irqlock. + */ + ld = tty_ldisc_ref(tty); + if (ld != NULL) { + /* We may have no line discipline at this point */ + if (ld->ops->flush_buffer) + ld->ops->flush_buffer(tty); + tty_driver_flush_buffer(tty); + if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && + ld->ops->write_wakeup) + ld->ops->write_wakeup(tty); + if (ld->ops->hangup) + ld->ops->hangup(tty); + tty_ldisc_deref(ld); + } + /* + * FIXME: Once we trust the LDISC code better we can wait here for + * ldisc completion and fix the driver call race + */ + wake_up_interruptible_poll(&tty->write_wait, POLLOUT); + wake_up_interruptible_poll(&tty->read_wait, POLLIN); + /* + * Shutdown the current line discipline, and reset it to + * N_TTY. + */ + if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) { + /* Avoid racing set_ldisc */ + mutex_lock(&tty->ldisc_mutex); + /* Switch back to N_TTY */ + tty_ldisc_reinit(tty); + /* At this point we have a closed ldisc and we want to + reopen it. We could defer this to the next open but + it means auditing a lot of other paths so this is a FIXME */ + WARN_ON(tty_ldisc_open(tty, tty->ldisc)); + tty_ldisc_enable(tty); + mutex_unlock(&tty->ldisc_mutex); + tty_reset_termios(tty); + } +} /** * tty_ldisc_setup - open line discipline @@ -610,24 +808,23 @@ restart: * @o_tty: pair tty for pty/tty pairs * * Called during the initial open of a tty/pty pair in order to set up the - * line discplines and bind them to the tty. + * line disciplines and bind them to the tty. This has no locking issues + * as the device isn't yet active. */ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) { - struct tty_ldisc *ld = &tty->ldisc; + struct tty_ldisc *ld = tty->ldisc; int retval; - if (ld->ops->open) { - retval = (ld->ops->open)(tty); - if (retval) - return retval; - } - if (o_tty && o_tty->ldisc.ops->open) { - retval = (o_tty->ldisc.ops->open)(o_tty); + retval = tty_ldisc_open(tty, ld); + if (retval) + return retval; + + if (o_tty) { + retval = tty_ldisc_open(o_tty, o_tty->ldisc); if (retval) { - if (ld->ops->close) - (ld->ops->close)(tty); + tty_ldisc_close(tty, ld); return retval; } tty_ldisc_enable(o_tty); @@ -635,32 +832,25 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) tty_ldisc_enable(tty); return 0; } - /** * tty_ldisc_release - release line discipline * @tty: tty being shut down * @o_tty: pair tty for pty/tty pairs * - * Called during the final close of a tty/pty pair in order to shut down the - * line discpline layer. + * Called during the final close of a tty/pty pair in order to shut down + * the line discpline layer. On exit the ldisc assigned is N_TTY and the + * ldisc has not been opened. */ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) { - unsigned long flags; - struct tty_ldisc ld; /* * Prevent flush_to_ldisc() from rescheduling the work for later. Then * kill any delayed work. As this is the final close it does not * race with the set_ldisc code path. */ - clear_bit(TTY_LDISC, &tty->flags); - cancel_delayed_work(&tty->buf.work); - - /* - * Wait for ->hangup_work and ->buf.work handlers to terminate - */ + tty_ldisc_halt(tty); flush_scheduled_work(); /* @@ -668,38 +858,19 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) * side waiters as the file is closing so user count on the file * side is zero. */ - spin_lock_irqsave(&tty_ldisc_lock, flags); - while (tty->ldisc.refcount) { - spin_unlock_irqrestore(&tty_ldisc_lock, flags); - wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0); - spin_lock_irqsave(&tty_ldisc_lock, flags); - } - spin_unlock_irqrestore(&tty_ldisc_lock, flags); + + tty_ldisc_wait_idle(tty); + /* * Shutdown the current line discipline, and reset it to N_TTY. * * FIXME: this MUST get fixed for the new reflocking */ - if (tty->ldisc.ops->close) - (tty->ldisc.ops->close)(tty); - tty_ldisc_put(tty->ldisc.ops); - /* - * Switch the line discipline back - */ - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(tty, &ld); - tty_set_termios_ldisc(tty, N_TTY); - if (o_tty) { - /* FIXME: could o_tty be in setldisc here ? */ - clear_bit(TTY_LDISC, &o_tty->flags); - if (o_tty->ldisc.ops->close) - (o_tty->ldisc.ops->close)(o_tty); - tty_ldisc_put(o_tty->ldisc.ops); - WARN_ON(tty_ldisc_get(N_TTY, &ld)); - tty_ldisc_assign(o_tty, &ld); - tty_set_termios_ldisc(o_tty, N_TTY); - } + tty_ldisc_reinit(tty); + /* This will need doing differently if we need to lock */ + if (o_tty) + tty_ldisc_release(o_tty, NULL); } /** @@ -712,10 +883,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty) void tty_ldisc_init(struct tty_struct *tty) { - struct tty_ldisc ld; - if (tty_ldisc_get(N_TTY, &ld) < 0) + struct tty_ldisc *ld = tty_ldisc_get(N_TTY); + if (IS_ERR(ld)) panic("n_tty: init_tty"); - tty_ldisc_assign(tty, &ld); + tty_ldisc_assign(tty, ld); } void tty_ldisc_begin(void) diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c index 9b8004c7268..62dadfc95e3 100644 --- a/drivers/char/tty_port.c +++ b/drivers/char/tty_port.c @@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port) EXPORT_SYMBOL(tty_port_carrier_raised); /** - * tty_port_raise_dtr_rts - Riase DTR/RTS + * tty_port_raise_dtr_rts - Raise DTR/RTS * @port: tty port * * Wrapper for the DTR/RTS raise logic. For the moment this is used @@ -147,12 +147,28 @@ EXPORT_SYMBOL(tty_port_carrier_raised); void tty_port_raise_dtr_rts(struct tty_port *port) { - if (port->ops->raise_dtr_rts) - port->ops->raise_dtr_rts(port); + if (port->ops->dtr_rts) + port->ops->dtr_rts(port, 1); } EXPORT_SYMBOL(tty_port_raise_dtr_rts); /** + * tty_port_lower_dtr_rts - Lower DTR/RTS + * @port: tty port + * + * Wrapper for the DTR/RTS raise logic. For the moment this is used + * to hide some internal details. This will eventually become entirely + * internal to the tty port. + */ + +void tty_port_lower_dtr_rts(struct tty_port *port) +{ + if (port->ops->dtr_rts) + port->ops->dtr_rts(port, 0); +} +EXPORT_SYMBOL(tty_port_lower_dtr_rts); + +/** * tty_port_block_til_ready - Waiting logic for tty open * @port: the tty port being opened * @tty: the tty device being bound @@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts); * - port flags and counts * * The passed tty_port must implement the carrier_raised method if it can - * do carrier detect and the raise_dtr_rts method if it supports software + * do carrier detect and the dtr_rts method if it supports software * management of these lines. Note that the dtr/rts raise is done each * iteration as a hangup may have previously dropped them while we wait. */ @@ -182,7 +198,8 @@ int tty_port_block_til_ready(struct tty_port *port, /* block if port is in the process of being closed */ if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) { - interruptible_sleep_on(&port->close_wait); + wait_event_interruptible(port->close_wait, + !(port->flags & ASYNC_CLOSING)); if (port->flags & ASYNC_HUP_NOTIFY) return -EAGAIN; else @@ -205,7 +222,6 @@ int tty_port_block_til_ready(struct tty_port *port, before the next open may complete */ retval = 0; - add_wait_queue(&port->open_wait, &wait); /* The port lock protects the port counts */ spin_lock_irqsave(&port->lock, flags); @@ -219,7 +235,7 @@ int tty_port_block_til_ready(struct tty_port *port, if (tty->termios->c_cflag & CBAUD) tty_port_raise_dtr_rts(port); - set_current_state(TASK_INTERRUPTIBLE); + prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE); /* Check for a hangup or uninitialised port. Return accordingly */ if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) { if (port->flags & ASYNC_HUP_NOTIFY) @@ -240,8 +256,7 @@ int tty_port_block_til_ready(struct tty_port *port, } schedule(); } - set_current_state(TASK_RUNNING); - remove_wait_queue(&port->open_wait, &wait); + finish_wait(&port->open_wait, &wait); /* Update counts. A parallel hangup will have set count to zero and we must not mess that up further */ @@ -292,6 +307,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f if (port->flags & ASYNC_INITIALIZED && port->closing_wait != ASYNC_CLOSING_WAIT_NONE) tty_wait_until_sent(tty, port->closing_wait); + if (port->drain_delay) { + unsigned int bps = tty_get_baud_rate(tty); + long timeout; + + if (bps > 1200) + timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps, + HZ / 10); + else + timeout = 2 * HZ; + schedule_timeout_interruptible(timeout); + } return 1; } EXPORT_SYMBOL(tty_port_close_start); @@ -302,6 +328,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty) tty_ldisc_flush(tty); + if (tty->termios->c_cflag & HUPCL) + tty_port_lower_dtr_rts(port); + spin_lock_irqsave(&port->lock, flags); tty->closing = 0; diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c index 537da1cde16..e59b6dee9ae 100644 --- a/drivers/ide/alim15x3.c +++ b/drivers/ide/alim15x3.c @@ -402,27 +402,23 @@ static u8 ali_cable_detect(ide_hwif_t *hwif) return cbl; } -#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) +#ifndef CONFIG_SPARC64 /** * init_hwif_ali15x3 - Initialize the ALI IDE x86 stuff * @hwif: interface to configure * * Obtain the IRQ tables for an ALi based IDE solution on the PC * class platforms. This part of the code isn't applicable to the - * Sparc and PowerPC systems. + * Sparc systems. */ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) { - struct pci_dev *dev = to_pci_dev(hwif->dev); u8 ideic, inmir; s8 irq_routing_table[] = { -1, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 }; int irq = -1; - if (dev->device == PCI_DEVICE_ID_AL_M5229) - hwif->irq = hwif->channel ? 15 : 14; - if (isa_dev) { /* * read IDE interface control @@ -455,7 +451,7 @@ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif) } #else #define init_hwif_ali15x3 NULL -#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */ +#endif /* CONFIG_SPARC64 */ /** * init_dma_ali15x3 - set up DMA on ALi15x3 diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index 7201b176d75..afe5a432387 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -80,34 +80,6 @@ void ide_init_pc(struct ide_atapi_pc *pc) EXPORT_SYMBOL_GPL(ide_init_pc); /* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. - */ -static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, - struct ide_atapi_pc *pc, struct request *rq) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->buffer = (char *)pc; - rq->rq_disk = disk; - - if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; - } - - memcpy(rq->cmd, pc->c, 12); - if (drive->media == ide_tape) - rq->cmd[13] = REQ_IDETAPE_PC1; - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - -/* * Add a special packet command request to the tail of the request queue, * and wait for it to be serviced. */ @@ -119,19 +91,21 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; - rq->buffer = (char *)pc; + rq->special = (char *)pc; if (pc->req_xfer) { - rq->data = pc->buf; - rq->data_len = pc->req_xfer; + error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer, + GFP_NOIO); + if (error) + goto put_req; } memcpy(rq->cmd, pc->c, 12); if (drive->media == ide_tape) rq->cmd[13] = REQ_IDETAPE_PC1; error = blk_execute_rq(drive->queue, disk, rq, 0); +put_req: blk_put_request(rq); - return error; } EXPORT_SYMBOL_GPL(ide_queue_pc_tail); @@ -191,20 +165,103 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) } EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd); +void ide_prep_sense(ide_drive_t *drive, struct request *rq) +{ + struct request_sense *sense = &drive->sense_data; + struct request *sense_rq = &drive->sense_rq; + unsigned int cmd_len, sense_len; + int err; + + debug_log("%s: enter\n", __func__); + + switch (drive->media) { + case ide_floppy: + cmd_len = 255; + sense_len = 18; + break; + case ide_tape: + cmd_len = 20; + sense_len = 20; + break; + default: + cmd_len = 18; + sense_len = 18; + } + + BUG_ON(sense_len > sizeof(*sense)); + + if (blk_sense_request(rq) || drive->sense_rq_armed) + return; + + memset(sense, 0, sizeof(*sense)); + + blk_rq_init(rq->q, sense_rq); + + err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len, + GFP_NOIO); + if (unlikely(err)) { + if (printk_ratelimit()) + printk(KERN_WARNING "%s: failed to map sense buffer\n", + drive->name); + return; + } + + sense_rq->rq_disk = rq->rq_disk; + sense_rq->cmd[0] = GPCMD_REQUEST_SENSE; + sense_rq->cmd[4] = cmd_len; + sense_rq->cmd_type = REQ_TYPE_SENSE; + sense_rq->cmd_flags |= REQ_PREEMPT; + + if (drive->media == ide_tape) + sense_rq->cmd[13] = REQ_IDETAPE_PC1; + + drive->sense_rq_armed = true; +} +EXPORT_SYMBOL_GPL(ide_prep_sense); + +int ide_queue_sense_rq(ide_drive_t *drive, void *special) +{ + /* deferred failure from ide_prep_sense() */ + if (!drive->sense_rq_armed) { + printk(KERN_WARNING "%s: failed queue sense request\n", + drive->name); + return -ENOMEM; + } + + drive->sense_rq.special = special; + drive->sense_rq_armed = false; + + drive->hwif->rq = NULL; + + elv_add_request(drive->queue, &drive->sense_rq, + ELEVATOR_INSERT_FRONT, 0); + return 0; +} +EXPORT_SYMBOL_GPL(ide_queue_sense_rq); + /* * Called when an error was detected during the last packet command. - * We queue a request sense packet command in the head of the request list. + * We queue a request sense packet command at the head of the request + * queue. */ -void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk) +void ide_retry_pc(ide_drive_t *drive) { - struct request *rq = &drive->request_sense_rq; + struct request *sense_rq = &drive->sense_rq; struct ide_atapi_pc *pc = &drive->request_sense_pc; (void)ide_read_error(drive); - ide_create_request_sense_cmd(drive, pc); + + /* init pc from sense_rq */ + ide_init_pc(pc); + memcpy(pc->c, sense_rq->cmd, 12); + pc->buf = bio_data(sense_rq->bio); /* pointer to mapped address */ + pc->req_xfer = sense_rq->data_len; + if (drive->media == ide_tape) set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - ide_queue_pc_head(drive, disk, pc, rq); + + if (ide_queue_sense_rq(drive, pc)) + ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq)); } EXPORT_SYMBOL_GPL(ide_retry_pc); @@ -276,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) struct ide_cmd *cmd = &hwif->cmd; struct request *rq = hwif->rq; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xferfunc; unsigned int timeout, done; u16 bcount; u8 stat, ireason, dsc = 0; @@ -303,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) drive->name, rq_data_dir(pc->rq) ? "write" : "read"); pc->flags |= PC_FLAG_DMA_ERROR; - } else { + } else pc->xferred = pc->req_xfer; - if (drive->pc_update_buffers) - drive->pc_update_buffers(drive, pc); - } debug_log("%s: DMA finished\n", drive->name); } @@ -343,7 +396,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) debug_log("[cmd %x]: check condition\n", rq->cmd[0]); /* Retry operation */ - ide_retry_pc(drive, rq->rq_disk); + ide_retry_pc(drive); /* queued, but not started */ return ide_stopped; @@ -353,6 +406,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0) dsc = 1; + /* + * ->pc_callback() might change rq->data_len for + * residual count, cache total length. + */ + done = blk_rq_bytes(rq); + /* Command finished - Call the callback function */ uptodate = drive->pc_callback(drive, dsc); @@ -361,7 +420,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) if (blk_special_request(rq)) { rq->errors = 0; - done = blk_rq_bytes(rq); error = 0; } else { @@ -370,11 +428,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) rq->errors = -EIO; } - if (drive->media == ide_tape) - done = ide_rq_bytes(rq); /* FIXME */ - else - done = blk_rq_bytes(rq); - error = uptodate ? 0 : -EIO; } @@ -407,21 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive) return ide_do_reset(drive); } - xferfunc = write ? tp_ops->output_data : tp_ops->input_data; - - if (drive->media == ide_floppy && pc->buf == NULL) { - done = min_t(unsigned int, bcount, cmd->nleft); - ide_pio_bytes(drive, cmd, write, done); - } else if (drive->media == ide_tape && pc->bh) { - done = drive->pc_io_buffers(drive, pc, bcount, write); - } else { - done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred); - xferfunc(drive, NULL, pc->cur_pos, done); - } + done = min_t(unsigned int, bcount, cmd->nleft); + ide_pio_bytes(drive, cmd, write, done); - /* Update the current position */ + /* Update transferred byte count */ pc->xferred += done; - pc->cur_pos += done; bcount -= done; @@ -599,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd) /* We haven't transferred any data yet */ pc->xferred = 0; - pc->cur_pos = pc->buf; valid_tf = IDE_VALID_DEVICE; bcount = ((drive->media == ide_tape) ? diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 925eb9e245d..a75e4ee1cd1 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -206,54 +206,25 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive, ide_cd_log_error(drive->name, failed_command, sense); } -static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense, - struct request *failed_command) -{ - struct cdrom_info *info = drive->driver_data; - struct request *rq = &drive->request_sense_rq; - - ide_debug_log(IDE_DBG_SENSE, "enter"); - - if (sense == NULL) - sense = &info->sense_data; - - /* stuff the sense request in front of our current request */ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_ATA_PC; - rq->rq_disk = info->disk; - - rq->data = sense; - rq->cmd[0] = GPCMD_REQUEST_SENSE; - rq->cmd[4] = 18; - rq->data_len = 18; - - rq->cmd_type = REQ_TYPE_SENSE; - rq->cmd_flags |= REQ_PREEMPT; - - /* NOTE! Save the failed command in "rq->buffer" */ - rq->buffer = (void *) failed_command; - - if (failed_command) - ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x", - failed_command->cmd[0]); - - drive->hwif->rq = NULL; - - elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0); -} - static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq) { /* - * For REQ_TYPE_SENSE, "rq->buffer" points to the original - * failed request + * For REQ_TYPE_SENSE, "rq->special" points to the original + * failed request. Also, the sense data should be read + * directly from rq which might be different from the original + * sense buffer if it got copied during mapping. */ - struct request *failed = (struct request *)rq->buffer; - struct cdrom_info *info = drive->driver_data; - void *sense = &info->sense_data; + struct request *failed = (struct request *)rq->special; + void *sense = bio_data(rq->bio); if (failed) { if (failed->sense) { + /* + * Sense is always read into drive->sense_data. + * Copy back if the failed request has its + * sense pointer set. + */ + memcpy(failed->sense, sense, 18); sense = failed->sense; failed->sense_len = rq->sense_len; } @@ -428,7 +399,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat) /* if we got a CHECK_CONDITION status, queue a request sense command */ if (stat & ATA_ERR) - cdrom_queue_request_sense(drive, NULL, NULL); + return ide_queue_sense_rq(drive, NULL) ? 2 : 1; return 1; end_request: @@ -442,8 +413,7 @@ end_request: hwif->rq = NULL; - cdrom_queue_request_sense(drive, rq->sense, rq); - return 1; + return ide_queue_sense_rq(drive, rq) ? 2 : 1; } else return 2; } @@ -503,14 +473,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd) * and some drives don't send them. Sigh. */ if (rq->cmd[0] == GPCMD_REQUEST_SENSE && - cmd->nleft > 0 && cmd->nleft <= 5) { - unsigned int ofs = cmd->nbytes - cmd->nleft; - - while (cmd->nleft > 0) { - *((u8 *)rq->data + ofs++) = 0; - cmd->nleft--; - } - } + cmd->nleft > 0 && cmd->nleft <= 5) + cmd->nleft = 0; } int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, @@ -543,8 +507,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd, rq->cmd_flags |= cmd_flags; rq->timeout = timeout; if (buffer) { - rq->data = buffer; - rq->data_len = *bufflen; + error = blk_rq_map_kern(drive->queue, rq, buffer, + *bufflen, GFP_NOIO); + if (error) { + blk_put_request(rq); + return error; + } } error = blk_execute_rq(drive->queue, info->disk, rq, 0); @@ -838,15 +806,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) drive->dma = 0; /* sg request */ - if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { + if (rq->bio) { struct request_queue *q = drive->queue; + char *buf = bio_data(rq->bio); unsigned int alignment; - char *buf; - - if (rq->bio) - buf = bio_data(rq->bio); - else - buf = rq->data; drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA); @@ -896,6 +859,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, goto out_end; } + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index 1d97101099c..93a3cf1b0f3 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -87,10 +87,6 @@ struct cdrom_info { struct atapi_toc *toc; - /* The result of the last successful request sense command - on this device. */ - struct request_sense sense_data; - u8 max_speed; /* Max speed of the drive. */ u8 current_speed; /* Current speed of the drive. */ diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index a9fbe2c3121..c2438804d3c 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) cmd->protocol = ATA_PROT_NODATA; rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = cmd; } diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index a0b8cab1d9a..d9123ecae4a 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error) /* * un-busy drive etc and make sure request is sane */ - rq = hwif->rq; - if (!rq) - goto out; - - hwif->rq = NULL; - - rq->errors = 0; - - if (!rq->bio) - goto out; - - rq->sector = rq->bio->bi_sector; - rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9; - rq->hard_cur_sectors = rq->current_nr_sectors; - rq->buffer = bio_data(rq->bio); -out: + if (rq) { + hwif->rq = NULL; + rq->errors = 0; + } return ret; } diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index 2b4868d95f8..537b7c55803 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -134,13 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive, drive->pc = pc; if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR)) ide_floppy_report_error(floppy, pc); + /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } @@ -216,15 +220,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy, ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - if (rq->data_len && rq_data_dir(rq) == WRITE) - pc->flags |= PC_FLAG_WRITING; - pc->buf = rq->data; - if (rq->bio) + if (rq->data_len) { pc->flags |= PC_FLAG_DMA_OK; - /* - * possibly problematic, doesn't look like ide-floppy correctly - * handled scattered requests if dma fails... - */ + if (rq_data_dir(rq) == WRITE) + pc->flags |= PC_FLAG_WRITING; + } + /* pio will be performed by ide_pio_bytes() which handles sg fine */ + pc->buf = NULL; pc->req_xfer = pc->buf_size = rq->data_len; } @@ -265,8 +267,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, } pc = &floppy->queued_pc; idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block); - } else if (blk_special_request(rq)) { - pc = (struct ide_atapi_pc *) rq->buffer; + } else if (blk_special_request(rq) || blk_sense_request(rq)) { + pc = (struct ide_atapi_pc *)rq->special; } else if (blk_pc_request(rq)) { pc = &floppy->queued_pc; idefloppy_blockpc_cmd(floppy, pc, rq); @@ -275,6 +277,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive, goto out_end; } + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 6415a2e2ba8..41d804065d3 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -248,14 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd) struct scatterlist *sg = hwif->sg_table; struct request *rq = cmd->rq; - if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { - sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE); - cmd->sg_nents = 1; - } else if (!rq->bio) { - sg_init_one(sg, rq->data, rq->data_len); - cmd->sg_nents = 1; - } else - cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); + cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg); } EXPORT_SYMBOL_GPL(ide_map_sg); @@ -371,7 +364,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) return execute_drive_cmd(drive, rq); else if (blk_pm_request(rq)) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk("%s: start_power_step(step: %d)\n", drive->name, pm->pm_step); @@ -484,6 +477,9 @@ void do_ide_request(struct request_queue *q) spin_unlock_irq(q->queue_lock); + /* HLD do_request() callback might sleep, make sure it's okay */ + might_sleep(); + if (ide_lock_host(host, hwif)) goto plug_device_2; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c index c1c25ebbaa1..5991b23793f 100644 --- a/drivers/ide/ide-ioctls.c +++ b/drivers/ide/ide-ioctls.c @@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive) rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd_len = 1; rq->cmd[0] = REQ_DRIVE_RESET; - rq->cmd_flags |= REQ_SOFTBARRIER; if (blk_execute_rq(drive->queue, NULL, rq, 1)) ret = rq->errors; blk_put_request(rq); diff --git a/drivers/ide/ide-park.c b/drivers/ide/ide-park.c index 310d03f2b5b..a914023d6d0 100644 --- a/drivers/ide/ide-park.c +++ b/drivers/ide/ide-park.c @@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout) start_queue = 1; spin_unlock_irq(&hwif->lock); - if (start_queue) { - spin_lock_irq(q->queue_lock); - blk_start_queueing(q); - spin_unlock_irq(q->queue_lock); - } + if (start_queue) + blk_run_queue(q); return; } spin_unlock_irq(&hwif->lock); diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 0d8a151c0a0..ba1488bd843 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int ret; /* call ACPI _GTM only once */ @@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg) ide_acpi_get_timing(hwif); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_SUSPEND; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_SUSPEND; if (mesg.event == PM_EVENT_PRETHAW) mesg.event = PM_EVENT_FREEZE; @@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev) ide_hwif_t *hwif = drive->hwif; struct request *rq; struct request_pm_state rqpm; - struct ide_cmd cmd; int err; /* call ACPI _PS0 / _STM only once */ @@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev) ide_acpi_exec_tfs(drive); memset(&rqpm, 0, sizeof(rqpm)); - memset(&cmd, 0, sizeof(cmd)); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_PM_RESUME; rq->cmd_flags |= REQ_PREEMPT; - rq->special = &cmd; - rq->data = &rqpm; + rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; @@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev) void ide_complete_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; #ifdef DEBUG_PM printk(KERN_INFO "%s: complete_power_step(step: %d)\n", @@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq) ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; - struct ide_cmd *cmd = rq->special; - - memset(cmd, 0, sizeof(*cmd)); + struct request_pm_state *pm = rq->special; + struct ide_cmd cmd = { }; switch (pm->pm_step) { case IDE_PM_FLUSH_CACHE: /* Suspend step 1 (flush cache) */ @@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; } if (ata_id_flush_ext_enabled(drive->id)) - cmd->tf.command = ATA_CMD_FLUSH_EXT; + cmd.tf.command = ATA_CMD_FLUSH_EXT; else - cmd->tf.command = ATA_CMD_FLUSH; + cmd.tf.command = ATA_CMD_FLUSH; goto out_do_tf; case IDE_PM_STANDBY: /* Suspend step 2 (standby) */ - cmd->tf.command = ATA_CMD_STANDBYNOW1; + cmd.tf.command = ATA_CMD_STANDBYNOW1; goto out_do_tf; case IDE_PM_RESTORE_PIO: /* Resume step 1 (restore PIO) */ ide_set_max_pio(drive); @@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) ide_complete_power_step(drive, rq); return ide_stopped; case IDE_PM_IDLE: /* Resume step 2 (idle) */ - cmd->tf.command = ATA_CMD_IDLEIMMEDIATE; + cmd.tf.command = ATA_CMD_IDLEIMMEDIATE; goto out_do_tf; case IDE_PM_RESTORE_DMA: /* Resume step 3 (restore DMA) */ /* @@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq) return ide_stopped; out_do_tf: - cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; - cmd->valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; - cmd->protocol = ATA_PROT_NODATA; + cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE; + cmd.valid.in.tf = IDE_VALID_IN_TF | IDE_VALID_DEVICE; + cmd.protocol = ATA_PROT_NODATA; - return do_rw_taskfile(drive, cmd); + return do_rw_taskfile(drive, &cmd); } /** @@ -181,7 +173,7 @@ out_do_tf: void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) { struct request_queue *q = drive->queue; - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; unsigned long flags; ide_complete_power_step(drive, rq); @@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq) void ide_check_pm_state(ide_drive_t *drive, struct request *rq) { - struct request_pm_state *pm = rq->data; + struct request_pm_state *pm = rq->special; if (blk_pm_suspend_request(rq) && pm->pm_step == IDE_PM_START_SUSPEND) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 3a53e0834cf..203bbeac182 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -131,13 +131,6 @@ enum { IDETAPE_DIR_WRITE = (1 << 2), }; -struct idetape_bh { - u32 b_size; - atomic_t b_count; - struct idetape_bh *b_reqnext; - char *b_data; -}; - /* Tape door status */ #define DOOR_UNLOCKED 0 #define DOOR_LOCKED 1 @@ -219,18 +212,12 @@ typedef struct ide_tape_obj { /* Data buffer size chosen based on the tape's recommendation */ int buffer_size; - /* merge buffer */ - struct idetape_bh *merge_bh; - /* size of the merge buffer */ - int merge_bh_size; - /* pointer to current buffer head within the merge buffer */ - struct idetape_bh *bh; - char *b_data; - int b_count; - - int pages_per_buffer; - /* Wasted space in each stage */ - int excess_bh_size; + /* Staging buffer of buffer_size bytes */ + void *buf; + /* The read/write cursor */ + void *cur; + /* The number of valid bytes in buf */ + size_t valid; /* Measures average tape speed */ unsigned long avg_time; @@ -297,84 +284,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) return tape; } -static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - while (bcount) { - if (bh == NULL) - break; - count = min( - (unsigned int)(bh->b_size - atomic_read(&bh->b_count)), - bcount); - drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data + - atomic_read(&bh->b_count), count); - bcount -= count; - atomic_add(count, &bh->b_count); - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } - } - - pc->bh = bh; - - return bcount; -} - -static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount) -{ - struct idetape_bh *bh = pc->bh; - int count; - - while (bcount) { - if (bh == NULL) - break; - count = min((unsigned int)pc->b_count, (unsigned int)bcount); - drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count); - bcount -= count; - pc->b_data += count; - pc->b_count -= count; - if (!pc->b_count) { - bh = bh->b_reqnext; - pc->bh = bh; - if (bh) { - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); - } - } - } - - return bcount; -} - -static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - struct idetape_bh *bh = pc->bh; - int count; - unsigned int bcount = pc->xferred; - - if (pc->flags & PC_FLAG_WRITING) - return; - while (bcount) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return; - } - count = min((unsigned int)bh->b_size, (unsigned int)bcount); - atomic_set(&bh->b_count, count); - if (atomic_read(&bh->b_count) == bh->b_size) - bh = bh->b_reqnext; - bcount -= count; - } - pc->bh = bh; -} - /* * called on each failed packet command retry to analyze the request sense. We * currently do not utilize this information. @@ -392,12 +301,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) pc->c[0], tape->sense_key, tape->asc, tape->ascq); /* Correct pc->xferred by asking the tape. */ - if (pc->flags & PC_FLAG_DMA_ERROR) { + if (pc->flags & PC_FLAG_DMA_ERROR) pc->xferred = pc->req_xfer - tape->blk_size * get_unaligned_be32(&sense[3]); - idetape_update_buffers(drive, pc); - } /* * If error was the result of a zero-length read or write command, @@ -436,29 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense) } } -/* Free data buffers completely. */ -static void ide_tape_kfree_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *prev_bh, *bh = tape->merge_bh; - - while (bh) { - u32 size = bh->b_size; - - while (size) { - unsigned int order = fls(size >> PAGE_SHIFT)-1; - - if (bh->b_data) - free_pages((unsigned long)bh->b_data, order); - - size &= (order-1); - bh->b_data += (1 << order) * PAGE_SIZE; - } - prev_bh = bh; - bh = bh->b_reqnext; - kfree(prev_bh); - } -} - static void ide_tape_handle_dsc(ide_drive_t *); static int ide_tape_callback(ide_drive_t *drive, int dsc) @@ -496,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc) } tape->first_frame += blocks; - rq->current_nr_sectors -= blocks; + rq->data_len -= blocks * tape->blk_size; if (pc->error) { uptodate = 0; @@ -558,19 +442,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) idetape_postpone_request(drive); } -static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - unsigned int bleft; - - if (write) - bleft = idetape_output_buffers(drive, pc, bcount); - else - bleft = idetape_input_buffers(drive, pc, bcount); - - return bcount - bleft; -} - /* * Packet Command Interface * @@ -622,6 +493,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, if (pc->retries > IDETAPE_MAX_PC_RETRIES || (pc->flags & PC_FLAG_ABORT)) { + unsigned int done = blk_rq_bytes(drive->hwif->rq); + /* * We will "abort" retrying a packet command in case legitimate * error code was received (crossing a filemark, or end of the @@ -641,8 +514,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive, /* Giving up */ pc->error = IDE_DRV_ERROR_GENERAL; } + drive->failed_pc = NULL; drive->pc_callback(drive, 0); + ide_complete_rq(drive, -EIO, done); return ide_stopped; } debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]); @@ -695,7 +570,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) printk(KERN_ERR "ide-tape: %s: I/O error, ", tape->name); /* Retry operation */ - ide_retry_pc(drive, tape->disk); + ide_retry_pc(drive); return ide_stopped; } pc->error = 0; @@ -711,27 +586,22 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, struct ide_atapi_pc *pc, struct request *rq, u8 opcode) { - struct idetape_bh *bh = (struct idetape_bh *)rq->special; - unsigned int length = rq->current_nr_sectors; + unsigned int length = rq->nr_sectors; ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; - pc->bh = bh; pc->buf = NULL; pc->buf_size = length * tape->blk_size; pc->req_xfer = pc->buf_size; if (pc->req_xfer == tape->buffer_size) pc->flags |= PC_FLAG_DMA_OK; - if (opcode == READ_6) { + if (opcode == READ_6) pc->c[0] = READ_6; - atomic_set(&bh->b_count, 0); - } else if (opcode == WRITE_6) { + else if (opcode == WRITE_6) { pc->c[0] = WRITE_6; pc->flags |= PC_FLAG_WRITING; - pc->b_data = bh->b_data; - pc->b_count = atomic_read(&bh->b_count); } memcpy(rq->cmd, pc->c, 12); @@ -747,12 +617,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, struct ide_cmd cmd; u8 stat; - debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," - " current_nr_sectors: %u\n", - (unsigned long long)rq->sector, rq->nr_sectors, - rq->current_nr_sectors); + debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n", + (unsigned long long)rq->sector, rq->nr_sectors); - if (!blk_special_request(rq)) { + if (!(blk_special_request(rq) || blk_sense_request(rq))) { /* We do not support buffer cache originated requests. */ printk(KERN_NOTICE "ide-tape: %s: Unsupported request in " "request queue (%d)\n", drive->name, rq->cmd_type); @@ -828,7 +696,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, goto out; } if (rq->cmd[13] & REQ_IDETAPE_PC1) { - pc = (struct ide_atapi_pc *) rq->buffer; + pc = (struct ide_atapi_pc *)rq->special; rq->cmd[13] &= ~(REQ_IDETAPE_PC1); rq->cmd[13] |= REQ_IDETAPE_PC2; goto out; @@ -840,6 +708,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, BUG(); out: + /* prepare sense request for this command */ + ide_prep_sense(drive, rq); + memset(&cmd, 0, sizeof(cmd)); if (rq_data_dir(rq)) @@ -847,167 +718,10 @@ out: cmd.rq = rq; - return ide_tape_issue_pc(drive, &cmd, pc); -} - -/* - * The function below uses __get_free_pages to allocate a data buffer of size - * tape->buffer_size (or a bit more). We attempt to combine sequential pages as - * much as possible. - * - * It returns a pointer to the newly allocated buffer, or NULL in case of - * failure. - */ -static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape, - int full, int clear) -{ - struct idetape_bh *prev_bh, *bh, *merge_bh; - int pages = tape->pages_per_buffer; - unsigned int order, b_allocd; - char *b_data = NULL; - - merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - bh = merge_bh; - if (bh == NULL) - goto abort; - - order = fls(pages) - 1; - bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!bh->b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - pages &= (order-1); - - if (clear) - memset(bh->b_data, 0, b_allocd); - bh->b_reqnext = NULL; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - - while (pages) { - order = fls(pages) - 1; - b_data = (char *) __get_free_pages(GFP_KERNEL, order); - if (!b_data) - goto abort; - b_allocd = (1 << order) * PAGE_SIZE; - - if (clear) - memset(b_data, 0, b_allocd); - - /* newly allocated page frames below buffer header or ...*/ - if (bh->b_data == b_data + b_allocd) { - bh->b_size += b_allocd; - bh->b_data -= b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - /* they are above the header */ - if (b_data == bh->b_data + bh->b_size) { - bh->b_size += b_allocd; - if (full) - atomic_add(b_allocd, &bh->b_count); - continue; - } - prev_bh = bh; - bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL); - if (!bh) { - free_pages((unsigned long) b_data, order); - goto abort; - } - bh->b_reqnext = NULL; - bh->b_data = b_data; - bh->b_size = b_allocd; - atomic_set(&bh->b_count, full ? bh->b_size : 0); - prev_bh->b_reqnext = bh; - - pages &= (order-1); - } - - bh->b_size -= tape->excess_bh_size; - if (full) - atomic_sub(tape->excess_bh_size, &bh->b_count); - return merge_bh; -abort: - ide_tape_kfree_buffer(tape); - return NULL; -} + ide_init_sg_cmd(&cmd, pc->req_xfer); + ide_map_sg(drive, &cmd); -static int idetape_copy_stage_from_user(idetape_tape_t *tape, - const char __user *buf, int n) -{ - struct idetape_bh *bh = tape->bh; - int count; - int ret = 0; - - while (n) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - count = min((unsigned int) - (bh->b_size - atomic_read(&bh->b_count)), - (unsigned int)n); - if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf, - count)) - ret = 1; - n -= count; - atomic_add(count, &bh->b_count); - buf += count; - if (atomic_read(&bh->b_count) == bh->b_size) { - bh = bh->b_reqnext; - if (bh) - atomic_set(&bh->b_count, 0); - } - } - tape->bh = bh; - return ret; -} - -static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf, - int n) -{ - struct idetape_bh *bh = tape->bh; - int count; - int ret = 0; - - while (n) { - if (bh == NULL) { - printk(KERN_ERR "ide-tape: bh == NULL in %s\n", - __func__); - return 1; - } - count = min(tape->b_count, n); - if (copy_to_user(buf, tape->b_data, count)) - ret = 1; - n -= count; - tape->b_data += count; - tape->b_count -= count; - buf += count; - if (!tape->b_count) { - bh = bh->b_reqnext; - tape->bh = bh; - if (bh) { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } - } - } - return ret; -} - -static void idetape_init_merge_buffer(idetape_tape_t *tape) -{ - struct idetape_bh *bh = tape->merge_bh; - tape->bh = tape->merge_bh; - - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) - atomic_set(&bh->b_count, 0); - else { - tape->b_data = bh->b_data; - tape->b_count = atomic_read(&bh->b_count); - } + return ide_tape_issue_pc(drive, &cmd, pc); } /* @@ -1107,10 +821,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) return; clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags); - tape->merge_bh_size = 0; - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + tape->valid = 0; + if (tape->buf != NULL) { + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; @@ -1164,36 +878,44 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive, * Generate a read/write request for the block device interface and wait for it * to be serviced. */ -static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, - struct idetape_bh *bh) +static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size) { idetape_tape_t *tape = drive->driver_data; struct request *rq; - int ret, errors; + int ret; debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd); + BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE); + BUG_ON(size < 0 || size % tape->blk_size); rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_SPECIAL; rq->cmd[13] = cmd; rq->rq_disk = tape->disk; - rq->special = (void *)bh; rq->sector = tape->first_frame; - rq->nr_sectors = blocks; - rq->current_nr_sectors = blocks; - blk_execute_rq(drive->queue, tape->disk, rq, 0); - errors = rq->errors; - ret = tape->blk_size * (blocks - rq->current_nr_sectors); - blk_put_request(rq); + if (size) { + ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size, + __GFP_WAIT); + if (ret) + goto out_put; + } - if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0) - return 0; + blk_execute_rq(drive->queue, tape->disk, rq, 0); - if (tape->merge_bh) - idetape_init_merge_buffer(tape); - if (errors == IDE_DRV_ERROR_GENERAL) - return -EIO; + /* calculate the number of transferred bytes and update buffer state */ + size -= rq->data_len; + tape->cur = tape->buf; + if (cmd == REQ_IDETAPE_READ) + tape->valid = size; + else + tape->valid = 0; + + ret = size; + if (rq->errors == IDE_DRV_ERROR_GENERAL) + ret = -EIO; +out_put: + blk_put_request(rq); return ret; } @@ -1230,153 +952,87 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) pc->flags |= PC_FLAG_WAIT_FOR_DSC; } -/* Queue up a character device originated write request. */ -static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks) -{ - idetape_tape_t *tape = drive->driver_data; - - debug_log(DBG_CHRDEV, "Enter %s\n", __func__); - - return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, - blocks, tape->merge_bh); -} - static void ide_tape_flush_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - int blocks, min; - struct idetape_bh *bh; if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer" " but we are not writing.\n"); return; } - if (tape->merge_bh_size > tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n"); - tape->merge_bh_size = tape->buffer_size; - } - if (tape->merge_bh_size) { - blocks = tape->merge_bh_size / tape->blk_size; - if (tape->merge_bh_size % tape->blk_size) { - unsigned int i; - - blocks++; - i = tape->blk_size - tape->merge_bh_size % - tape->blk_size; - bh = tape->bh->b_reqnext; - while (bh) { - atomic_set(&bh->b_count, 0); - bh = bh->b_reqnext; - } - bh = tape->bh; - while (i) { - if (bh == NULL) { - printk(KERN_INFO "ide-tape: bug," - " bh NULL\n"); - break; - } - min = min(i, (unsigned int)(bh->b_size - - atomic_read(&bh->b_count))); - memset(bh->b_data + atomic_read(&bh->b_count), - 0, min); - atomic_add(min, &bh->b_count); - i -= min; - bh = bh->b_reqnext; - } - } - (void) idetape_add_chrdev_write_request(drive, blocks); - tape->merge_bh_size = 0; - } - if (tape->merge_bh != NULL) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + if (tape->buf) { + size_t aligned = roundup(tape->valid, tape->blk_size); + + memset(tape->cur, 0, aligned - tape->valid); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned); + kfree(tape->buf); + tape->buf = NULL; } tape->chrdev_dir = IDETAPE_DIR_NONE; } -static int idetape_init_read(ide_drive_t *drive) +static int idetape_init_rw(ide_drive_t *drive, int dir) { idetape_tape_t *tape = drive->driver_data; - int bytes_read; + int rc; - /* Initialize read operation */ - if (tape->chrdev_dir != IDETAPE_DIR_READ) { - if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { - ide_tape_flush_merge_buffer(drive); - idetape_flush_tape_buffers(drive); - } - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size should be" - " 0 now\n"); - tape->merge_bh_size = 0; - } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); - if (!tape->merge_bh) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_READ; + BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE); - /* - * Issue a read 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. - * No point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - bytes_read = idetape_queue_rw_tail(drive, - REQ_IDETAPE_READ, 0, - tape->merge_bh); - if (bytes_read < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return bytes_read; - } - } - } + if (tape->chrdev_dir == dir) + return 0; - return 0; -} + if (tape->chrdev_dir == IDETAPE_DIR_READ) + ide_tape_discard_merge_buffer(drive, 1); + else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) { + ide_tape_flush_merge_buffer(drive); + idetape_flush_tape_buffers(drive); + } -/* called from idetape_chrdev_read() to service a chrdev read request. */ -static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks) -{ - idetape_tape_t *tape = drive->driver_data; + if (tape->buf || tape->valid) { + printk(KERN_ERR "ide-tape: valid should be 0 now\n"); + tape->valid = 0; + } - debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks); + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (!tape->buf) + return -ENOMEM; + tape->chrdev_dir = dir; + tape->cur = tape->buf; - /* If we are at a filemark, return a read length of 0 */ - if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) - return 0; - - idetape_init_read(drive); + /* + * Issue a 0 rw command to ensure that DSC handshake is + * switched from completion mode to buffer available mode. No + * point in issuing this if DSC overlap isn't supported, some + * drives (Seagate STT3401A) will return an error. + */ + if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { + int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ + : REQ_IDETAPE_WRITE; + + rc = idetape_queue_rw_tail(drive, cmd, 0); + if (rc < 0) { + kfree(tape->buf); + tape->buf = NULL; + tape->chrdev_dir = IDETAPE_DIR_NONE; + return rc; + } + } - return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks, - tape->merge_bh); + return 0; } static void idetape_pad_zeros(ide_drive_t *drive, int bcount) { idetape_tape_t *tape = drive->driver_data; - struct idetape_bh *bh; - int blocks; + + memset(tape->buf, 0, tape->buffer_size); while (bcount) { - unsigned int count; + unsigned int count = min(tape->buffer_size, bcount); - bh = tape->merge_bh; - count = min(tape->buffer_size, bcount); + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count); bcount -= count; - blocks = count / tape->blk_size; - while (count) { - atomic_set(&bh->b_count, - min(count, (unsigned int)bh->b_size)); - memset(bh->b_data, 0, atomic_read(&bh->b_count)); - count -= atomic_read(&bh->b_count); - bh = bh->b_reqnext; - } - idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks, - tape->merge_bh); } } @@ -1456,7 +1112,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, } if (tape->chrdev_dir == IDETAPE_DIR_READ) { - tape->merge_bh_size = 0; + tape->valid = 0; if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) ++count; ide_tape_discard_merge_buffer(drive, 0); @@ -1505,9 +1161,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t bytes_read, temp, actually_read = 0, rc; + size_t done = 0; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; + int rc; debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); @@ -1517,49 +1173,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf, (count % tape->blk_size) == 0) tape->user_bs_factor = count / tape->blk_size; } - rc = idetape_init_read(drive); + + rc = idetape_init_rw(drive, IDETAPE_DIR_READ); if (rc < 0) return rc; - if (count == 0) - return (0); - if (tape->merge_bh_size) { - actually_read = min((unsigned int)(tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_to_user(tape, buf, actually_read)) - ret = -EFAULT; - buf += actually_read; - tape->merge_bh_size -= actually_read; - count -= actually_read; - } - while (count >= tape->buffer_size) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); - if (bytes_read <= 0) - goto finish; - if (idetape_copy_stage_to_user(tape, buf, bytes_read)) - ret = -EFAULT; - buf += bytes_read; - count -= bytes_read; - actually_read += bytes_read; - } - if (count) { - bytes_read = idetape_add_chrdev_read_request(drive, ctl); - if (bytes_read <= 0) - goto finish; - temp = min((unsigned long)count, (unsigned long)bytes_read); - if (idetape_copy_stage_to_user(tape, buf, temp)) + + while (done < count) { + size_t todo; + + /* refill if staging buffer is empty */ + if (!tape->valid) { + /* If we are at a filemark, nothing more to read */ + if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) + break; + /* read */ + if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, + tape->buffer_size) <= 0) + break; + } + + /* copy out */ + todo = min_t(size_t, count - done, tape->valid); + if (copy_to_user(buf + done, tape->cur, todo)) ret = -EFAULT; - actually_read += temp; - tape->merge_bh_size = bytes_read-temp; + + tape->cur += todo; + tape->valid -= todo; + done += todo; } -finish: - if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { + + if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) { debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name); idetape_space_over_filemarks(drive, MTFSF, 1); return 0; } - return ret ? ret : actually_read; + return ret ? ret : done; } static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, @@ -1567,9 +1217,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, { struct ide_tape_obj *tape = file->private_data; ide_drive_t *drive = tape->drive; - ssize_t actually_written = 0; + size_t done = 0; ssize_t ret = 0; - u16 ctl = *(u16 *)&tape->caps[12]; + int rc; /* The drive is write protected. */ if (tape->write_prot) @@ -1578,80 +1228,31 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count); /* Initialize write operation */ - if (tape->chrdev_dir != IDETAPE_DIR_WRITE) { - if (tape->chrdev_dir == IDETAPE_DIR_READ) - ide_tape_discard_merge_buffer(drive, 1); - if (tape->merge_bh || tape->merge_bh_size) { - printk(KERN_ERR "ide-tape: merge_bh_size " - "should be 0 now\n"); - tape->merge_bh_size = 0; - } - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0); - if (!tape->merge_bh) - return -ENOMEM; - tape->chrdev_dir = IDETAPE_DIR_WRITE; - idetape_init_merge_buffer(tape); + rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE); + if (rc < 0) + return rc; - /* - * Issue a write 0 command to ensure that DSC handshake is - * switched from completion mode to buffer available mode. No - * point in issuing this if DSC overlap isn't supported, some - * drives (Seagate STT3401A) will return an error. - */ - if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) { - ssize_t retval = idetape_queue_rw_tail(drive, - REQ_IDETAPE_WRITE, 0, - tape->merge_bh); - if (retval < 0) { - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; - tape->chrdev_dir = IDETAPE_DIR_NONE; - return retval; - } - } - } - if (count == 0) - return (0); - if (tape->merge_bh_size) { - if (tape->merge_bh_size >= tape->buffer_size) { - printk(KERN_ERR "ide-tape: bug: merge buf too big\n"); - tape->merge_bh_size = 0; - } - actually_written = min((unsigned int) - (tape->buffer_size - tape->merge_bh_size), - (unsigned int)count); - if (idetape_copy_stage_from_user(tape, buf, actually_written)) - ret = -EFAULT; - buf += actually_written; - tape->merge_bh_size += actually_written; - count -= actually_written; - - if (tape->merge_bh_size == tape->buffer_size) { - ssize_t retval; - tape->merge_bh_size = 0; - retval = idetape_add_chrdev_write_request(drive, ctl); - if (retval <= 0) - return (retval); - } - } - while (count >= tape->buffer_size) { - ssize_t retval; - if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size)) - ret = -EFAULT; - buf += tape->buffer_size; - count -= tape->buffer_size; - retval = idetape_add_chrdev_write_request(drive, ctl); - actually_written += tape->buffer_size; - if (retval <= 0) - return (retval); - } - if (count) { - actually_written += count; - if (idetape_copy_stage_from_user(tape, buf, count)) + while (done < count) { + size_t todo; + + /* flush if staging buffer is full */ + if (tape->valid == tape->buffer_size && + idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, + tape->buffer_size) <= 0) + return rc; + + /* copy in */ + todo = min_t(size_t, count - done, + tape->buffer_size - tape->valid); + if (copy_from_user(tape->cur, buf + done, todo)) ret = -EFAULT; - tape->merge_bh_size += count; + + tape->cur += todo; + tape->valid += todo; + done += todo; } - return ret ? ret : actually_written; + + return ret ? ret : done; } static int idetape_write_filemark(ide_drive_t *drive) @@ -1812,7 +1413,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file, idetape_flush_tape_buffers(drive); } if (cmd == MTIOCGET || cmd == MTIOCPOS) { - block_offset = tape->merge_bh_size / + block_offset = tape->valid / (tape->blk_size * tape->user_bs_factor); position = idetape_read_position(drive); if (position < 0) @@ -1960,12 +1561,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor) idetape_tape_t *tape = drive->driver_data; ide_tape_flush_merge_buffer(drive); - tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0); - if (tape->merge_bh != NULL) { + tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL); + if (tape->buf != NULL) { idetape_pad_zeros(drive, tape->blk_size * (tape->user_bs_factor - 1)); - ide_tape_kfree_buffer(tape); - tape->merge_bh = NULL; + kfree(tape->buf); + tape->buf = NULL; } idetape_write_filemark(drive); idetape_flush_tape_buffers(drive); @@ -2159,8 +1760,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) u16 *ctl = (u16 *)&tape->caps[12]; drive->pc_callback = ide_tape_callback; - drive->pc_update_buffers = idetape_update_buffers; - drive->pc_io_buffers = ide_tape_io_buffers; drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP; @@ -2191,11 +1790,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->buffer_size = *ctl * tape->blk_size; } buffer_size = tape->buffer_size; - tape->pages_per_buffer = buffer_size / PAGE_SIZE; - if (buffer_size % PAGE_SIZE) { - tape->pages_per_buffer++; - tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE; - } /* select the "best" DSC read/write polling freq */ speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]); @@ -2238,7 +1832,7 @@ static void ide_tape_release(struct device *dev) ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; - BUG_ON(tape->merge_bh_size); + BUG_ON(tape->valid); drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP; drive->driver_data = NULL; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 4aa6223c11b..f400eb4d4af 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, rq = blk_get_request(drive->queue, READ, __GFP_WAIT); rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - rq->buffer = buf; + + if (cmd->tf_flags & IDE_TFLAG_WRITE) + rq->cmd_flags |= REQ_RW; /* * (ks) We transfer currently only whole sectors. @@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf, * if we would find a solution to transfer any size. * To support special commands like READ LONG. */ - rq->hard_nr_sectors = rq->nr_sectors = nsect; - rq->hard_cur_sectors = rq->current_nr_sectors = nsect; - - if (cmd->tf_flags & IDE_TFLAG_WRITE) - rq->cmd_flags |= REQ_RW; + if (nsect) { + error = blk_rq_map_kern(drive->queue, rq, buf, + nsect * SECTOR_SIZE, __GFP_WAIT); + if (error) + goto put_req; + } rq->special = cmd; cmd->rq = rq; error = blk_execute_rq(drive->queue, NULL, rq, 0); - blk_put_request(rq); +put_req: + blk_put_request(rq); return error; } diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 4e63cc9e277..151bf5bc8af 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1,5 +1,5 @@ /* Low-level parallel-port routines for 8255-based PC-style hardware. - * + * * Authors: Phil Blundell <philb@gnu.org> * Tim Waugh <tim@cyberelk.demon.co.uk> * Jose Renau <renau@acm.org> @@ -11,7 +11,7 @@ * Cleaned up include files - Russell King <linux@arm.uk.linux.org> * DMA support - Bert De Jonghe <bert@sophis.be> * Many ECP bugs fixed. Fred Barnes & Jamie Lokier, 1999 - * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. + * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. * Various hacks, Fred Barnes, 04/2001 * Updated probing logic - Adam Belay <ambx1@neo.rr.com> */ @@ -56,10 +56,10 @@ #include <linux/pnp.h> #include <linux/platform_device.h> #include <linux/sysctl.h> +#include <linux/io.h> +#include <linux/uaccess.h> -#include <asm/io.h> #include <asm/dma.h> -#include <asm/uaccess.h> #include <linux/parport.h> #include <linux/parport_pc.h> @@ -82,7 +82,7 @@ #define ECR_TST 06 #define ECR_CNF 07 #define ECR_MODE_MASK 0xe0 -#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v)) +#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v)) #undef DEBUG @@ -109,27 +109,27 @@ static int pci_registered_parport; static int pnp_registered_parport; /* frob_control, but for ECR */ -static void frob_econtrol (struct parport *pb, unsigned char m, +static void frob_econtrol(struct parport *pb, unsigned char m, unsigned char v) { unsigned char ectr = 0; if (m != 0xff) - ectr = inb (ECONTROL (pb)); + ectr = inb(ECONTROL(pb)); - DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n", + DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n", m, v, ectr, (ectr & ~m) ^ v); - outb ((ectr & ~m) ^ v, ECONTROL (pb)); + outb((ectr & ~m) ^ v, ECONTROL(pb)); } -static __inline__ void frob_set_mode (struct parport *p, int mode) +static inline void frob_set_mode(struct parport *p, int mode) { - frob_econtrol (p, ECR_MODE_MASK, mode << 5); + frob_econtrol(p, ECR_MODE_MASK, mode << 5); } #ifdef CONFIG_PARPORT_PC_FIFO -/* Safely change the mode bits in the ECR +/* Safely change the mode bits in the ECR Returns: 0 : Success -EBUSY: Could not drain FIFO in some finite amount of time, @@ -141,17 +141,18 @@ static int change_mode(struct parport *p, int m) unsigned char oecr; int mode; - DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m); + DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m); if (!priv->ecr) { - printk (KERN_DEBUG "change_mode: but there's no ECR!\n"); + printk(KERN_DEBUG "change_mode: but there's no ECR!\n"); return 0; } /* Bits <7:5> contain the mode. */ - oecr = inb (ECONTROL (p)); + oecr = inb(ECONTROL(p)); mode = (oecr >> 5) & 0x7; - if (mode == m) return 0; + if (mode == m) + return 0; if (mode >= 2 && !(priv->ctr & 0x20)) { /* This mode resets the FIFO, so we may @@ -163,19 +164,21 @@ static int change_mode(struct parport *p, int m) case ECR_ECP: /* ECP Parallel Port mode */ /* Busy wait for 200us */ for (counter = 0; counter < 40; counter++) { - if (inb (ECONTROL (p)) & 0x01) + if (inb(ECONTROL(p)) & 0x01) + break; + if (signal_pending(current)) break; - if (signal_pending (current)) break; - udelay (5); + udelay(5); } /* Poll slowly. */ - while (!(inb (ECONTROL (p)) & 0x01)) { - if (time_after_eq (jiffies, expire)) + while (!(inb(ECONTROL(p)) & 0x01)) { + if (time_after_eq(jiffies, expire)) /* The FIFO is stuck. */ return -EBUSY; - schedule_timeout_interruptible(msecs_to_jiffies(10)); - if (signal_pending (current)) + schedule_timeout_interruptible( + msecs_to_jiffies(10)); + if (signal_pending(current)) break; } } @@ -185,20 +188,20 @@ static int change_mode(struct parport *p, int m) /* We have to go through mode 001 */ oecr &= ~(7 << 5); oecr |= ECR_PS2 << 5; - ECR_WRITE (p, oecr); + ECR_WRITE(p, oecr); } /* Set the mode. */ oecr &= ~(7 << 5); oecr |= m << 5; - ECR_WRITE (p, oecr); + ECR_WRITE(p, oecr); return 0; } #ifdef CONFIG_PARPORT_1284 /* Find FIFO lossage; FIFO is reset */ #if 0 -static int get_fifo_residue (struct parport *p) +static int get_fifo_residue(struct parport *p) { int residue; int cnfga; @@ -206,26 +209,26 @@ static int get_fifo_residue (struct parport *p) /* Adjust for the contents of the FIFO. */ for (residue = priv->fifo_depth; ; residue--) { - if (inb (ECONTROL (p)) & 0x2) + if (inb(ECONTROL(p)) & 0x2) /* Full up. */ break; - outb (0, FIFO (p)); + outb(0, FIFO(p)); } - printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name, + printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name, residue); /* Reset the FIFO. */ - frob_set_mode (p, ECR_PS2); + frob_set_mode(p, ECR_PS2); /* Now change to config mode and clean up. FIXME */ - frob_set_mode (p, ECR_CNF); - cnfga = inb (CONFIGA (p)); - printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga); + frob_set_mode(p, ECR_CNF); + cnfga = inb(CONFIGA(p)); + printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga); if (!(cnfga & (1<<2))) { - printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name); + printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name); residue++; } @@ -233,9 +236,11 @@ static int get_fifo_residue (struct parport *p) * PWord != 1 byte. */ /* Back to PS2 mode. */ - frob_set_mode (p, ECR_PS2); + frob_set_mode(p, ECR_PS2); - DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p))); + DPRINTK(KERN_DEBUG + "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", + inb(ECONTROL(p))); return residue; } #endif /* 0 */ @@ -257,8 +262,8 @@ static int clear_epp_timeout(struct parport *pb) /* To clear timeout some chips require double read */ parport_pc_read_status(pb); r = parport_pc_read_status(pb); - outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */ - outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */ + outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */ + outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */ r = parport_pc_read_status(pb); return !(r & 0x01); @@ -272,7 +277,8 @@ static int clear_epp_timeout(struct parport *pb) * of these are in parport_pc.h. */ -static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s) +static void parport_pc_init_state(struct pardevice *dev, + struct parport_state *s) { s->u.pc.ctr = 0xc; if (dev->irq_func && @@ -289,22 +295,23 @@ static void parport_pc_save_state(struct parport *p, struct parport_state *s) const struct parport_pc_private *priv = p->physport->private_data; s->u.pc.ctr = priv->ctr; if (priv->ecr) - s->u.pc.ecr = inb (ECONTROL (p)); + s->u.pc.ecr = inb(ECONTROL(p)); } -static void parport_pc_restore_state(struct parport *p, struct parport_state *s) +static void parport_pc_restore_state(struct parport *p, + struct parport_state *s) { struct parport_pc_private *priv = p->physport->private_data; register unsigned char c = s->u.pc.ctr & priv->ctr_writable; - outb (c, CONTROL (p)); + outb(c, CONTROL(p)); priv->ctr = c; if (priv->ecr) - ECR_WRITE (p, s->u.pc.ecr); + ECR_WRITE(p, s->u.pc.ecr); } #ifdef CONFIG_PARPORT_1284 -static size_t parport_pc_epp_read_data (struct parport *port, void *buf, - size_t length, int flags) +static size_t parport_pc_epp_read_data(struct parport *port, void *buf, + size_t length, int flags) { size_t got = 0; @@ -316,54 +323,52 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf, * nFault is 0 if there is at least 1 byte in the Warp's FIFO * pError is 1 if there are 16 bytes in the Warp's FIFO */ - status = inb (STATUS (port)); + status = inb(STATUS(port)); - while (!(status & 0x08) && (got < length)) { - if ((left >= 16) && (status & 0x20) && !(status & 0x08)) { + while (!(status & 0x08) && got < length) { + if (left >= 16 && (status & 0x20) && !(status & 0x08)) { /* can grab 16 bytes from warp fifo */ - if (!((long)buf & 0x03)) { - insl (EPPDATA (port), buf, 4); - } else { - insb (EPPDATA (port), buf, 16); - } + if (!((long)buf & 0x03)) + insl(EPPDATA(port), buf, 4); + else + insb(EPPDATA(port), buf, 16); buf += 16; got += 16; left -= 16; } else { /* grab single byte from the warp fifo */ - *((char *)buf) = inb (EPPDATA (port)); + *((char *)buf) = inb(EPPDATA(port)); buf++; got++; left--; } - status = inb (STATUS (port)); + status = inb(STATUS(port)); if (status & 0x01) { /* EPP timeout should never occur... */ - printk (KERN_DEBUG "%s: EPP timeout occurred while talking to " - "w91284pic (should not have done)\n", port->name); - clear_epp_timeout (port); + printk(KERN_DEBUG +"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name); + clear_epp_timeout(port); } } return got; } if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - if (!(((long)buf | length) & 0x03)) { - insl (EPPDATA (port), buf, (length >> 2)); - } else { - insb (EPPDATA (port), buf, length); - } - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (!(((long)buf | length) & 0x03)) + insl(EPPDATA(port), buf, (length >> 2)); + else + insb(EPPDATA(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; got < length; got++) { - *((char*)buf) = inb (EPPDATA(port)); + *((char *)buf) = inb(EPPDATA(port)); buf++; - if (inb (STATUS (port)) & 0x01) { + if (inb(STATUS(port)) & 0x01) { /* EPP timeout */ - clear_epp_timeout (port); + clear_epp_timeout(port); break; } } @@ -371,28 +376,27 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf, return got; } -static size_t parport_pc_epp_write_data (struct parport *port, const void *buf, - size_t length, int flags) +static size_t parport_pc_epp_write_data(struct parport *port, const void *buf, + size_t length, int flags) { size_t written = 0; if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - if (!(((long)buf | length) & 0x03)) { - outsl (EPPDATA (port), buf, (length >> 2)); - } else { - outsb (EPPDATA (port), buf, length); - } - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (!(((long)buf | length) & 0x03)) + outsl(EPPDATA(port), buf, (length >> 2)); + else + outsb(EPPDATA(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; written < length; written++) { - outb (*((char*)buf), EPPDATA(port)); + outb(*((char *)buf), EPPDATA(port)); buf++; - if (inb (STATUS(port)) & 0x01) { - clear_epp_timeout (port); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); break; } } @@ -400,24 +404,24 @@ static size_t parport_pc_epp_write_data (struct parport *port, const void *buf, return written; } -static size_t parport_pc_epp_read_addr (struct parport *port, void *buf, +static size_t parport_pc_epp_read_addr(struct parport *port, void *buf, size_t length, int flags) { size_t got = 0; if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - insb (EPPADDR (port), buf, length); - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + insb(EPPADDR(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; got < length; got++) { - *((char*)buf) = inb (EPPADDR (port)); + *((char *)buf) = inb(EPPADDR(port)); buf++; - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); break; } } @@ -425,25 +429,25 @@ static size_t parport_pc_epp_read_addr (struct parport *port, void *buf, return got; } -static size_t parport_pc_epp_write_addr (struct parport *port, +static size_t parport_pc_epp_write_addr(struct parport *port, const void *buf, size_t length, int flags) { size_t written = 0; if ((flags & PARPORT_EPP_FAST) && (length > 1)) { - outsb (EPPADDR (port), buf, length); - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + outsb(EPPADDR(port), buf, length); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); return -EIO; } return length; } for (; written < length; written++) { - outb (*((char*)buf), EPPADDR (port)); + outb(*((char *)buf), EPPADDR(port)); buf++; - if (inb (STATUS (port)) & 0x01) { - clear_epp_timeout (port); + if (inb(STATUS(port)) & 0x01) { + clear_epp_timeout(port); break; } } @@ -451,74 +455,74 @@ static size_t parport_pc_epp_write_addr (struct parport *port, return written; } -static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf, - size_t length, int flags) +static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf, + size_t length, int flags) { size_t got; - frob_set_mode (port, ECR_EPP); - parport_pc_data_reverse (port); - parport_pc_write_control (port, 0x4); - got = parport_pc_epp_read_data (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_data_reverse(port); + parport_pc_write_control(port, 0x4); + got = parport_pc_epp_read_data(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return got; } -static size_t parport_pc_ecpepp_write_data (struct parport *port, - const void *buf, size_t length, - int flags) +static size_t parport_pc_ecpepp_write_data(struct parport *port, + const void *buf, size_t length, + int flags) { size_t written; - frob_set_mode (port, ECR_EPP); - parport_pc_write_control (port, 0x4); - parport_pc_data_forward (port); - written = parport_pc_epp_write_data (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_write_control(port, 0x4); + parport_pc_data_forward(port); + written = parport_pc_epp_write_data(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return written; } -static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf, - size_t length, int flags) +static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf, + size_t length, int flags) { size_t got; - frob_set_mode (port, ECR_EPP); - parport_pc_data_reverse (port); - parport_pc_write_control (port, 0x4); - got = parport_pc_epp_read_addr (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_data_reverse(port); + parport_pc_write_control(port, 0x4); + got = parport_pc_epp_read_addr(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return got; } -static size_t parport_pc_ecpepp_write_addr (struct parport *port, +static size_t parport_pc_ecpepp_write_addr(struct parport *port, const void *buf, size_t length, int flags) { size_t written; - frob_set_mode (port, ECR_EPP); - parport_pc_write_control (port, 0x4); - parport_pc_data_forward (port); - written = parport_pc_epp_write_addr (port, buf, length, flags); - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_EPP); + parport_pc_write_control(port, 0x4); + parport_pc_data_forward(port); + written = parport_pc_epp_write_addr(port, buf, length, flags); + frob_set_mode(port, ECR_PS2); return written; } #endif /* IEEE 1284 support */ #ifdef CONFIG_PARPORT_PC_FIFO -static size_t parport_pc_fifo_write_block_pio (struct parport *port, +static size_t parport_pc_fifo_write_block_pio(struct parport *port, const void *buf, size_t length) { int ret = 0; const unsigned char *bufp = buf; size_t left = length; unsigned long expire = jiffies + port->physport->cad->timeout; - const int fifo = FIFO (port); + const int fifo = FIFO(port); int poll_for = 8; /* 80 usecs */ const struct parport_pc_private *priv = port->physport->private_data; const int fifo_depth = priv->fifo_depth; @@ -526,25 +530,25 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port, port = port->physport; /* We don't want to be interrupted every character. */ - parport_pc_disable_irq (port); + parport_pc_disable_irq(port); /* set nErrIntrEn and serviceIntr */ - frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2)); + frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2)); /* Forward mode. */ - parport_pc_data_forward (port); /* Must be in PS2 mode */ + parport_pc_data_forward(port); /* Must be in PS2 mode */ while (left) { unsigned char byte; - unsigned char ecrval = inb (ECONTROL (port)); + unsigned char ecrval = inb(ECONTROL(port)); int i = 0; - if (need_resched() && time_before (jiffies, expire)) + if (need_resched() && time_before(jiffies, expire)) /* Can't yield the port. */ - schedule (); + schedule(); /* Anyone else waiting for the port? */ if (port->waithead) { - printk (KERN_DEBUG "Somebody wants the port\n"); + printk(KERN_DEBUG "Somebody wants the port\n"); break; } @@ -552,21 +556,22 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port, /* FIFO is full. Wait for interrupt. */ /* Clear serviceIntr */ - ECR_WRITE (port, ecrval & ~(1<<2)); - false_alarm: - ret = parport_wait_event (port, HZ); - if (ret < 0) break; + ECR_WRITE(port, ecrval & ~(1<<2)); +false_alarm: + ret = parport_wait_event(port, HZ); + if (ret < 0) + break; ret = 0; - if (!time_before (jiffies, expire)) { + if (!time_before(jiffies, expire)) { /* Timed out. */ - printk (KERN_DEBUG "FIFO write timed out\n"); + printk(KERN_DEBUG "FIFO write timed out\n"); break; } - ecrval = inb (ECONTROL (port)); + ecrval = inb(ECONTROL(port)); if (!(ecrval & (1<<2))) { if (need_resched() && - time_before (jiffies, expire)) - schedule (); + time_before(jiffies, expire)) + schedule(); goto false_alarm; } @@ -577,38 +582,38 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port, /* Can't fail now. */ expire = jiffies + port->cad->timeout; - poll: - if (signal_pending (current)) +poll: + if (signal_pending(current)) break; if (ecrval & 0x01) { /* FIFO is empty. Blast it full. */ const int n = left < fifo_depth ? left : fifo_depth; - outsb (fifo, bufp, n); + outsb(fifo, bufp, n); bufp += n; left -= n; /* Adjust the poll time. */ - if (i < (poll_for - 2)) poll_for--; + if (i < (poll_for - 2)) + poll_for--; continue; } else if (i++ < poll_for) { - udelay (10); - ecrval = inb (ECONTROL (port)); + udelay(10); + ecrval = inb(ECONTROL(port)); goto poll; } - /* Half-full (call me an optimist) */ + /* Half-full(call me an optimist) */ byte = *bufp++; - outb (byte, fifo); + outb(byte, fifo); left--; - } - -dump_parport_state ("leave fifo_write_block_pio", port); + } + dump_parport_state("leave fifo_write_block_pio", port); return length - left; } #ifdef HAS_DMA -static size_t parport_pc_fifo_write_block_dma (struct parport *port, +static size_t parport_pc_fifo_write_block_dma(struct parport *port, const void *buf, size_t length) { int ret = 0; @@ -621,7 +626,7 @@ static size_t parport_pc_fifo_write_block_dma (struct parport *port, unsigned long start = (unsigned long) buf; unsigned long end = (unsigned long) buf + length - 1; -dump_parport_state ("enter fifo_write_block_dma", port); + dump_parport_state("enter fifo_write_block_dma", port); if (end < MAX_DMA_ADDRESS) { /* If it would cross a 64k boundary, cap it at the end. */ if ((start ^ end) & ~0xffffUL) @@ -629,8 +634,9 @@ dump_parport_state ("enter fifo_write_block_dma", port); dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length, DMA_TO_DEVICE); - } else { - /* above 16 MB we use a bounce buffer as ISA-DMA is not possible */ + } else { + /* above 16 MB we use a bounce buffer as ISA-DMA + is not possible */ maxlen = PAGE_SIZE; /* sizeof(priv->dma_buf) */ dma_addr = priv->dma_handle; dma_handle = 0; @@ -639,12 +645,12 @@ dump_parport_state ("enter fifo_write_block_dma", port); port = port->physport; /* We don't want to be interrupted every character. */ - parport_pc_disable_irq (port); + parport_pc_disable_irq(port); /* set nErrIntrEn and serviceIntr */ - frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2)); + frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2)); /* Forward mode. */ - parport_pc_data_forward (port); /* Must be in PS2 mode */ + parport_pc_data_forward(port); /* Must be in PS2 mode */ while (left) { unsigned long expire = jiffies + port->physport->cad->timeout; @@ -665,10 +671,10 @@ dump_parport_state ("enter fifo_write_block_dma", port); set_dma_count(port->dma, count); /* Set DMA mode */ - frob_econtrol (port, 1<<3, 1<<3); + frob_econtrol(port, 1<<3, 1<<3); /* Clear serviceIntr */ - frob_econtrol (port, 1<<2, 0); + frob_econtrol(port, 1<<2, 0); enable_dma(port->dma); release_dma_lock(dmaflag); @@ -676,20 +682,22 @@ dump_parport_state ("enter fifo_write_block_dma", port); /* assume DMA will be successful */ left -= count; buf += count; - if (dma_handle) dma_addr += count; + if (dma_handle) + dma_addr += count; /* Wait for interrupt. */ - false_alarm: - ret = parport_wait_event (port, HZ); - if (ret < 0) break; +false_alarm: + ret = parport_wait_event(port, HZ); + if (ret < 0) + break; ret = 0; - if (!time_before (jiffies, expire)) { + if (!time_before(jiffies, expire)) { /* Timed out. */ - printk (KERN_DEBUG "DMA write timed out\n"); + printk(KERN_DEBUG "DMA write timed out\n"); break; } /* Is serviceIntr set? */ - if (!(inb (ECONTROL (port)) & (1<<2))) { + if (!(inb(ECONTROL(port)) & (1<<2))) { cond_resched(); goto false_alarm; @@ -705,14 +713,15 @@ dump_parport_state ("enter fifo_write_block_dma", port); /* Anyone else waiting for the port? */ if (port->waithead) { - printk (KERN_DEBUG "Somebody wants the port\n"); + printk(KERN_DEBUG "Somebody wants the port\n"); break; } /* update for possible DMA residue ! */ buf -= count; left += count; - if (dma_handle) dma_addr -= count; + if (dma_handle) + dma_addr -= count; } /* Maybe got here through break, so adjust for DMA residue! */ @@ -723,12 +732,12 @@ dump_parport_state ("enter fifo_write_block_dma", port); release_dma_lock(dmaflag); /* Turn off DMA mode */ - frob_econtrol (port, 1<<3, 0); + frob_econtrol(port, 1<<3, 0); if (dma_handle) dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE); -dump_parport_state ("leave fifo_write_block_dma", port); + dump_parport_state("leave fifo_write_block_dma", port); return length - left; } #endif @@ -738,13 +747,13 @@ static inline size_t parport_pc_fifo_write_block(struct parport *port, { #ifdef HAS_DMA if (port->dma != PARPORT_DMA_NONE) - return parport_pc_fifo_write_block_dma (port, buf, length); + return parport_pc_fifo_write_block_dma(port, buf, length); #endif - return parport_pc_fifo_write_block_pio (port, buf, length); + return parport_pc_fifo_write_block_pio(port, buf, length); } /* Parallel Port FIFO mode (ECP chipsets) */ -static size_t parport_pc_compat_write_block_pio (struct parport *port, +static size_t parport_pc_compat_write_block_pio(struct parport *port, const void *buf, size_t length, int flags) { @@ -756,14 +765,16 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port, /* Special case: a timeout of zero means we cannot call schedule(). * Also if O_NONBLOCK is set then use the default implementation. */ if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) - return parport_ieee1284_write_compat (port, buf, + return parport_ieee1284_write_compat(port, buf, length, flags); /* Set up parallel port FIFO mode.*/ - parport_pc_data_forward (port); /* Must be in PS2 mode */ - parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0); - r = change_mode (port, ECR_PPF); /* Parallel port FIFO */ - if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name); + parport_pc_data_forward(port); /* Must be in PS2 mode */ + parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0); + r = change_mode(port, ECR_PPF); /* Parallel port FIFO */ + if (r) + printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", + port->name); port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA; @@ -775,40 +786,39 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port, * the FIFO is empty, so allow 4 seconds for each position * in the fifo. */ - expire = jiffies + (priv->fifo_depth * HZ * 4); + expire = jiffies + (priv->fifo_depth * HZ * 4); do { /* Wait for the FIFO to empty */ - r = change_mode (port, ECR_PS2); - if (r != -EBUSY) { + r = change_mode(port, ECR_PS2); + if (r != -EBUSY) break; - } - } while (time_before (jiffies, expire)); + } while (time_before(jiffies, expire)); if (r == -EBUSY) { - printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name); + printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name); /* Prevent further data transfer. */ - frob_set_mode (port, ECR_TST); + frob_set_mode(port, ECR_TST); /* Adjust for the contents of the FIFO. */ for (written -= priv->fifo_depth; ; written++) { - if (inb (ECONTROL (port)) & 0x2) { + if (inb(ECONTROL(port)) & 0x2) { /* Full up. */ break; } - outb (0, FIFO (port)); + outb(0, FIFO(port)); } /* Reset the FIFO and return to PS2 mode. */ - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_PS2); } - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_BUSY, PARPORT_STATUS_BUSY); if (r) - printk (KERN_DEBUG - "%s: BUSY timeout (%d) in compat_write_block_pio\n", + printk(KERN_DEBUG + "%s: BUSY timeout (%d) in compat_write_block_pio\n", port->name, r); port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE; @@ -818,7 +828,7 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port, /* ECP */ #ifdef CONFIG_PARPORT_1284 -static size_t parport_pc_ecp_write_block_pio (struct parport *port, +static size_t parport_pc_ecp_write_block_pio(struct parport *port, const void *buf, size_t length, int flags) { @@ -830,36 +840,38 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port, /* Special case: a timeout of zero means we cannot call schedule(). * Also if O_NONBLOCK is set then use the default implementation. */ if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) - return parport_ieee1284_ecp_write_data (port, buf, + return parport_ieee1284_ecp_write_data(port, buf, length, flags); /* Switch to forward mode if necessary. */ if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) { /* Event 47: Set nInit high. */ - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_INIT | PARPORT_CONTROL_AUTOFD, PARPORT_CONTROL_INIT | PARPORT_CONTROL_AUTOFD); /* Event 49: PError goes high. */ - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, PARPORT_STATUS_PAPEROUT); if (r) { - printk (KERN_DEBUG "%s: PError timeout (%d) " + printk(KERN_DEBUG "%s: PError timeout (%d) " "in ecp_write_block_pio\n", port->name, r); } } /* Set up ECP parallel port mode.*/ - parport_pc_data_forward (port); /* Must be in PS2 mode */ - parport_pc_frob_control (port, + parport_pc_data_forward(port); /* Must be in PS2 mode */ + parport_pc_frob_control(port, PARPORT_CONTROL_STROBE | PARPORT_CONTROL_AUTOFD, 0); - r = change_mode (port, ECR_ECP); /* ECP FIFO */ - if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name); + r = change_mode(port, ECR_ECP); /* ECP FIFO */ + if (r) + printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", + port->name); port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA; /* Write the data to the FIFO. */ @@ -873,55 +885,54 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port, expire = jiffies + (priv->fifo_depth * (HZ * 4)); do { /* Wait for the FIFO to empty */ - r = change_mode (port, ECR_PS2); - if (r != -EBUSY) { + r = change_mode(port, ECR_PS2); + if (r != -EBUSY) break; - } - } while (time_before (jiffies, expire)); + } while (time_before(jiffies, expire)); if (r == -EBUSY) { - printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name); + printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name); /* Prevent further data transfer. */ - frob_set_mode (port, ECR_TST); + frob_set_mode(port, ECR_TST); /* Adjust for the contents of the FIFO. */ for (written -= priv->fifo_depth; ; written++) { - if (inb (ECONTROL (port)) & 0x2) { + if (inb(ECONTROL(port)) & 0x2) { /* Full up. */ break; } - outb (0, FIFO (port)); + outb(0, FIFO(port)); } /* Reset the FIFO and return to PS2 mode. */ - frob_set_mode (port, ECR_PS2); + frob_set_mode(port, ECR_PS2); /* Host transfer recovery. */ - parport_pc_data_reverse (port); /* Must be in PS2 mode */ - udelay (5); - parport_frob_control (port, PARPORT_CONTROL_INIT, 0); - r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0); + parport_pc_data_reverse(port); /* Must be in PS2 mode */ + udelay(5); + parport_frob_control(port, PARPORT_CONTROL_INIT, 0); + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0); if (r) - printk (KERN_DEBUG "%s: PE,1 timeout (%d) " + printk(KERN_DEBUG "%s: PE,1 timeout (%d) " "in ecp_write_block_pio\n", port->name, r); - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, PARPORT_STATUS_PAPEROUT); - if (r) - printk (KERN_DEBUG "%s: PE,2 timeout (%d) " + if (r) + printk(KERN_DEBUG "%s: PE,2 timeout (%d) " "in ecp_write_block_pio\n", port->name, r); } - r = parport_wait_peripheral (port, - PARPORT_STATUS_BUSY, + r = parport_wait_peripheral(port, + PARPORT_STATUS_BUSY, PARPORT_STATUS_BUSY); - if(r) - printk (KERN_DEBUG + if (r) + printk(KERN_DEBUG "%s: BUSY timeout (%d) in ecp_write_block_pio\n", port->name, r); @@ -931,7 +942,7 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port, } #if 0 -static size_t parport_pc_ecp_read_block_pio (struct parport *port, +static size_t parport_pc_ecp_read_block_pio(struct parport *port, void *buf, size_t length, int flags) { @@ -944,13 +955,13 @@ static size_t parport_pc_ecp_read_block_pio (struct parport *port, char *bufp = buf; port = port->physport; -DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n"); -dump_parport_state ("enter fcn", port); + DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n"); + dump_parport_state("enter fcn", port); /* Special case: a timeout of zero means we cannot call schedule(). * Also if O_NONBLOCK is set then use the default implementation. */ if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK) - return parport_ieee1284_ecp_read_data (port, buf, + return parport_ieee1284_ecp_read_data(port, buf, length, flags); if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) { @@ -966,173 +977,178 @@ dump_parport_state ("enter fcn", port); * go through software emulation. Otherwise we may have to throw * away data. */ if (length < fifofull) - return parport_ieee1284_ecp_read_data (port, buf, + return parport_ieee1284_ecp_read_data(port, buf, length, flags); if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) { /* change to reverse-idle phase (must be in forward-idle) */ /* Event 38: Set nAutoFd low (also make sure nStrobe is high) */ - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_AUTOFD | PARPORT_CONTROL_STROBE, PARPORT_CONTROL_AUTOFD); - parport_pc_data_reverse (port); /* Must be in PS2 mode */ - udelay (5); + parport_pc_data_reverse(port); /* Must be in PS2 mode */ + udelay(5); /* Event 39: Set nInit low to initiate bus reversal */ - parport_frob_control (port, + parport_frob_control(port, PARPORT_CONTROL_INIT, 0); /* Event 40: Wait for nAckReverse (PError) to go low */ - r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0); - if (r) { - printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) " + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0); + if (r) { + printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) " "in ecp_read_block_pio\n", port->name, r); return 0; } } /* Set up ECP FIFO mode.*/ -/* parport_pc_frob_control (port, +/* parport_pc_frob_control(port, PARPORT_CONTROL_STROBE | PARPORT_CONTROL_AUTOFD, PARPORT_CONTROL_AUTOFD); */ - r = change_mode (port, ECR_ECP); /* ECP FIFO */ - if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name); + r = change_mode(port, ECR_ECP); /* ECP FIFO */ + if (r) + printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", + port->name); port->ieee1284.phase = IEEE1284_PH_REV_DATA; /* the first byte must be collected manually */ -dump_parport_state ("pre 43", port); + dump_parport_state("pre 43", port); /* Event 43: Wait for nAck to go low */ - r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0); + r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0); if (r) { /* timed out while reading -- no data */ - printk (KERN_DEBUG "PIO read timed out (initial byte)\n"); + printk(KERN_DEBUG "PIO read timed out (initial byte)\n"); goto out_no_data; } /* read byte */ - *bufp++ = inb (DATA (port)); + *bufp++ = inb(DATA(port)); left--; -dump_parport_state ("43-44", port); + dump_parport_state("43-44", port); /* Event 44: nAutoFd (HostAck) goes high to acknowledge */ - parport_pc_frob_control (port, + parport_pc_frob_control(port, PARPORT_CONTROL_AUTOFD, 0); -dump_parport_state ("pre 45", port); + dump_parport_state("pre 45", port); /* Event 45: Wait for nAck to go high */ -/* r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */ -dump_parport_state ("post 45", port); -r = 0; + /* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, + PARPORT_STATUS_ACK); */ + dump_parport_state("post 45", port); + r = 0; if (r) { /* timed out while waiting for peripheral to respond to ack */ - printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n"); + printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n"); /* keep hold of the byte we've got already */ goto out_no_data; } /* Event 46: nAutoFd (HostAck) goes low to accept more data */ - parport_pc_frob_control (port, + parport_pc_frob_control(port, PARPORT_CONTROL_AUTOFD, PARPORT_CONTROL_AUTOFD); -dump_parport_state ("rev idle", port); + dump_parport_state("rev idle", port); /* Do the transfer. */ while (left > fifofull) { int ret; unsigned long expire = jiffies + port->cad->timeout; - unsigned char ecrval = inb (ECONTROL (port)); + unsigned char ecrval = inb(ECONTROL(port)); - if (need_resched() && time_before (jiffies, expire)) + if (need_resched() && time_before(jiffies, expire)) /* Can't yield the port. */ - schedule (); + schedule(); /* At this point, the FIFO may already be full. In - * that case ECP is already holding back the - * peripheral (assuming proper design) with a delayed - * handshake. Work fast to avoid a peripheral - * timeout. */ + * that case ECP is already holding back the + * peripheral (assuming proper design) with a delayed + * handshake. Work fast to avoid a peripheral + * timeout. */ if (ecrval & 0x01) { /* FIFO is empty. Wait for interrupt. */ -dump_parport_state ("FIFO empty", port); + dump_parport_state("FIFO empty", port); /* Anyone else waiting for the port? */ if (port->waithead) { - printk (KERN_DEBUG "Somebody wants the port\n"); + printk(KERN_DEBUG "Somebody wants the port\n"); break; } /* Clear serviceIntr */ - ECR_WRITE (port, ecrval & ~(1<<2)); - false_alarm: -dump_parport_state ("waiting", port); - ret = parport_wait_event (port, HZ); -DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret); + ECR_WRITE(port, ecrval & ~(1<<2)); +false_alarm: + dump_parport_state("waiting", port); + ret = parport_wait_event(port, HZ); + DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n", + ret); if (ret < 0) break; ret = 0; - if (!time_before (jiffies, expire)) { + if (!time_before(jiffies, expire)) { /* Timed out. */ -dump_parport_state ("timeout", port); - printk (KERN_DEBUG "PIO read timed out\n"); + dump_parport_state("timeout", port); + printk(KERN_DEBUG "PIO read timed out\n"); break; } - ecrval = inb (ECONTROL (port)); + ecrval = inb(ECONTROL(port)); if (!(ecrval & (1<<2))) { if (need_resched() && - time_before (jiffies, expire)) { - schedule (); + time_before(jiffies, expire)) { + schedule(); } goto false_alarm; } /* Depending on how the FIFO threshold was - * set, how long interrupt service took, and - * how fast the peripheral is, we might be - * lucky and have a just filled FIFO. */ + * set, how long interrupt service took, and + * how fast the peripheral is, we might be + * lucky and have a just filled FIFO. */ continue; } if (ecrval & 0x02) { /* FIFO is full. */ -dump_parport_state ("FIFO full", port); - insb (fifo, bufp, fifo_depth); + dump_parport_state("FIFO full", port); + insb(fifo, bufp, fifo_depth); bufp += fifo_depth; left -= fifo_depth; continue; } -DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n"); + DPRINTK(KERN_DEBUG + "*** ecp_read_block_pio: reading one byte from the FIFO\n"); /* FIFO not filled. We will cycle this loop for a while - * and either the peripheral will fill it faster, - * tripping a fast empty with insb, or we empty it. */ - *bufp++ = inb (fifo); + * and either the peripheral will fill it faster, + * tripping a fast empty with insb, or we empty it. */ + *bufp++ = inb(fifo); left--; } /* scoop up anything left in the FIFO */ - while (left && !(inb (ECONTROL (port) & 0x01))) { - *bufp++ = inb (fifo); + while (left && !(inb(ECONTROL(port) & 0x01))) { + *bufp++ = inb(fifo); left--; } port->ieee1284.phase = IEEE1284_PH_REV_IDLE; -dump_parport_state ("rev idle2", port); + dump_parport_state("rev idle2", port); out_no_data: /* Go to forward idle mode to shut the peripheral up (event 47). */ - parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); + parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT); /* event 49: PError goes high */ - r = parport_wait_peripheral (port, + r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, PARPORT_STATUS_PAPEROUT); if (r) { - printk (KERN_DEBUG + printk(KERN_DEBUG "%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n", port->name, r); } @@ -1141,14 +1157,14 @@ out_no_data: /* Finish up. */ { - int lost = get_fifo_residue (port); + int lost = get_fifo_residue(port); if (lost) /* Shouldn't happen with compliant peripherals. */ - printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n", + printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n", port->name, lost); } -dump_parport_state ("fwd idle", port); + dump_parport_state("fwd idle", port); return length - left; } #endif /* 0 */ @@ -1164,8 +1180,7 @@ dump_parport_state ("fwd idle", port); /* GCC is not inlining extern inline function later overwriten to non-inline, so we use outlined_ variants here. */ -static const struct parport_operations parport_pc_ops = -{ +static const struct parport_operations parport_pc_ops = { .write_data = parport_pc_write_data, .read_data = parport_pc_read_data, @@ -1202,88 +1217,107 @@ static const struct parport_operations parport_pc_ops = }; #ifdef CONFIG_PARPORT_PC_SUPERIO + +static struct superio_struct *find_free_superio(void) +{ + int i; + for (i = 0; i < NR_SUPERIOS; i++) + if (superios[i].io == 0) + return &superios[i]; + return NULL; +} + + /* Super-IO chipset detection, Winbond, SMSC */ static void __devinit show_parconfig_smsc37c669(int io, int key) { - int cr1,cr4,cra,cr23,cr26,cr27,i=0; - static const char *const modes[]={ + int cr1, cr4, cra, cr23, cr26, cr27; + struct superio_struct *s; + + static const char *const modes[] = { "SPP and Bidirectional (PS/2)", "EPP and SPP", "ECP", "ECP and EPP" }; - outb(key,io); - outb(key,io); - outb(1,io); - cr1=inb(io+1); - outb(4,io); - cr4=inb(io+1); - outb(0x0a,io); - cra=inb(io+1); - outb(0x23,io); - cr23=inb(io+1); - outb(0x26,io); - cr26=inb(io+1); - outb(0x27,io); - cr27=inb(io+1); - outb(0xaa,io); + outb(key, io); + outb(key, io); + outb(1, io); + cr1 = inb(io + 1); + outb(4, io); + cr4 = inb(io + 1); + outb(0x0a, io); + cra = inb(io + 1); + outb(0x23, io); + cr23 = inb(io + 1); + outb(0x26, io); + cr26 = inb(io + 1); + outb(0x27, io); + cr27 = inb(io + 1); + outb(0xaa, io); if (verbose_probing) { - printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, " + printk(KERN_INFO + "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, " "A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n", - cr1,cr4,cra,cr23,cr26,cr27); - + cr1, cr4, cra, cr23, cr26, cr27); + /* The documentation calls DMA and IRQ-Lines by letters, so the board maker can/will wire them appropriately/randomly... G=reserved H=IDE-irq, */ - printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, " - "fifo threshold=%d\n", cr23*4, - (cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-', - (cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f); + printk(KERN_INFO + "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n", + cr23 * 4, + (cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-', + (cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-', + cra & 0x0f); printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n", - (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no"); - printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n", - (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], - (cr4 & 0x40) ? "1.7" : "1.9"); + (cr23 * 4 >= 0x100) ? "yes" : "no", + (cr1 & 4) ? "yes" : "no"); + printk(KERN_INFO + "SMSC LPT Config: Port mode=%s, EPP version =%s\n", + (cr1 & 0x08) ? "Standard mode only (SPP)" + : modes[cr4 & 0x03], + (cr4 & 0x40) ? "1.7" : "1.9"); } - + /* Heuristics ! BIOS setup for this mainboard device limits the choices to standard settings, i.e. io-address and IRQ are related, however DMA can be 1 or 3, assume DMA_A=DMA1, DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */ - if(cr23*4 >=0x100) { /* if active */ - while((superios[i].io!= 0) && (i<NR_SUPERIOS)) - i++; - if(i==NR_SUPERIOS) + if (cr23 * 4 >= 0x100) { /* if active */ + s = find_free_superio(); + if (s == NULL) printk(KERN_INFO "Super-IO: too many chips!\n"); else { int d; - switch (cr23*4) { - case 0x3bc: - superios[i].io = 0x3bc; - superios[i].irq = 7; - break; - case 0x378: - superios[i].io = 0x378; - superios[i].irq = 7; - break; - case 0x278: - superios[i].io = 0x278; - superios[i].irq = 5; + switch (cr23 * 4) { + case 0x3bc: + s->io = 0x3bc; + s->irq = 7; + break; + case 0x378: + s->io = 0x378; + s->irq = 7; + break; + case 0x278: + s->io = 0x278; + s->irq = 5; } - d=(cr26 &0x0f); - if((d==1) || (d==3)) - superios[i].dma= d; + d = (cr26 & 0x0f); + if (d == 1 || d == 3) + s->dma = d; else - superios[i].dma= PARPORT_DMA_NONE; + s->dma = PARPORT_DMA_NONE; } - } + } } static void __devinit show_parconfig_winbond(int io, int key) { - int cr30,cr60,cr61,cr70,cr74,crf0,i=0; + int cr30, cr60, cr61, cr70, cr74, crf0; + struct superio_struct *s; static const char *const modes[] = { "Standard (SPP) and Bidirectional(PS/2)", /* 0 */ "EPP-1.9 and SPP", @@ -1296,110 +1330,134 @@ static void __devinit show_parconfig_winbond(int io, int key) static char *const irqtypes[] = { "pulsed low, high-Z", "follows nACK" }; - + /* The registers are called compatible-PnP because the - register layout is modelled after ISA-PnP, the access - method is just another ... */ - outb(key,io); - outb(key,io); - outb(0x07,io); /* Register 7: Select Logical Device */ - outb(0x01,io+1); /* LD1 is Parallel Port */ - outb(0x30,io); - cr30=inb(io+1); - outb(0x60,io); - cr60=inb(io+1); - outb(0x61,io); - cr61=inb(io+1); - outb(0x70,io); - cr70=inb(io+1); - outb(0x74,io); - cr74=inb(io+1); - outb(0xf0,io); - crf0=inb(io+1); - outb(0xaa,io); + register layout is modelled after ISA-PnP, the access + method is just another ... */ + outb(key, io); + outb(key, io); + outb(0x07, io); /* Register 7: Select Logical Device */ + outb(0x01, io + 1); /* LD1 is Parallel Port */ + outb(0x30, io); + cr30 = inb(io + 1); + outb(0x60, io); + cr60 = inb(io + 1); + outb(0x61, io); + cr61 = inb(io + 1); + outb(0x70, io); + cr70 = inb(io + 1); + outb(0x74, io); + cr74 = inb(io + 1); + outb(0xf0, io); + crf0 = inb(io + 1); + outb(0xaa, io); if (verbose_probing) { - printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x " - "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0); - printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", - (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f ); + printk(KERN_INFO + "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n", + cr30, cr60, cr61, cr70, cr74, crf0); + printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", + (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f); if ((cr74 & 0x07) > 3) printk("dma=none\n"); else - printk("dma=%d\n",cr74 & 0x07); - printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n", - irqtypes[crf0>>7], (crf0>>3)&0x0f); - printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]); + printk("dma=%d\n", cr74 & 0x07); + printk(KERN_INFO + "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n", + irqtypes[crf0>>7], (crf0>>3)&0x0f); + printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", + modes[crf0 & 0x07]); } - if(cr30 & 0x01) { /* the settings can be interrogated later ... */ - while((superios[i].io!= 0) && (i<NR_SUPERIOS)) - i++; - if(i==NR_SUPERIOS) + if (cr30 & 0x01) { /* the settings can be interrogated later ... */ + s = find_free_superio(); + if (s == NULL) printk(KERN_INFO "Super-IO: too many chips!\n"); else { - superios[i].io = (cr60<<8)|cr61; - superios[i].irq = cr70&0x0f; - superios[i].dma = (((cr74 & 0x07) > 3) ? + s->io = (cr60 << 8) | cr61; + s->irq = cr70 & 0x0f; + s->dma = (((cr74 & 0x07) > 3) ? PARPORT_DMA_NONE : (cr74 & 0x07)); } } } -static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid) +static void __devinit decode_winbond(int efer, int key, int devid, + int devrev, int oldid) { const char *type = "unknown"; - int id,progif=2; + int id, progif = 2; if (devid == devrev) /* simple heuristics, we happened to read some - non-winbond register */ + non-winbond register */ return; - id=(devid<<8) | devrev; + id = (devid << 8) | devrev; /* Values are from public data sheets pdf files, I can just - confirm 83977TF is correct :-) */ - if (id == 0x9771) type="83977F/AF"; - else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x"; - else if (id == 0x9774) type="83977ATF"; - else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x"; - else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x"; - else if ((id & ~0x0f) == 0x5210) type="83627"; - else if ((id & ~0x0f) == 0x6010) type="83697HF"; - else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;} - else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;} - else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;} - else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;} - else progif=0; + confirm 83977TF is correct :-) */ + if (id == 0x9771) + type = "83977F/AF"; + else if (id == 0x9773) + type = "83977TF / SMSC 97w33x/97w34x"; + else if (id == 0x9774) + type = "83977ATF"; + else if ((id & ~0x0f) == 0x5270) + type = "83977CTF / SMSC 97w36x"; + else if ((id & ~0x0f) == 0x52f0) + type = "83977EF / SMSC 97w35x"; + else if ((id & ~0x0f) == 0x5210) + type = "83627"; + else if ((id & ~0x0f) == 0x6010) + type = "83697HF"; + else if ((oldid & 0x0f) == 0x0a) { + type = "83877F"; + progif = 1; + } else if ((oldid & 0x0f) == 0x0b) { + type = "83877AF"; + progif = 1; + } else if ((oldid & 0x0f) == 0x0c) { + type = "83877TF"; + progif = 1; + } else if ((oldid & 0x0f) == 0x0d) { + type = "83877ATF"; + progif = 1; + } else + progif = 0; if (verbose_probing) printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x " - "devid=%02x devrev=%02x oldid=%02x type=%s\n", + "devid=%02x devrev=%02x oldid=%02x type=%s\n", efer, key, devid, devrev, oldid, type); if (progif == 2) - show_parconfig_winbond(efer,key); + show_parconfig_winbond(efer, key); } static void __devinit decode_smsc(int efer, int key, int devid, int devrev) { - const char *type = "unknown"; + const char *type = "unknown"; void (*func)(int io, int key); - int id; + int id; - if (devid == devrev) + if (devid == devrev) /* simple heuristics, we happened to read some - non-smsc register */ + non-smsc register */ return; - func=NULL; - id=(devid<<8) | devrev; + func = NULL; + id = (devid << 8) | devrev; - if (id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;} - else if (id==0x6582) type="37c665IR"; - else if (devid==0x65) type="37c665GT"; - else if (devid==0x66) type="37c666GT"; + if (id == 0x0302) { + type = "37c669"; + func = show_parconfig_smsc37c669; + } else if (id == 0x6582) + type = "37c665IR"; + else if (devid == 0x65) + type = "37c665GT"; + else if (devid == 0x66) + type = "37c666GT"; if (verbose_probing) printk(KERN_INFO "SMSC chip at EFER=0x%x " @@ -1407,138 +1465,138 @@ static void __devinit decode_smsc(int efer, int key, int devid, int devrev) efer, key, devid, devrev, type); if (func) - func(efer,key); + func(efer, key); } static void __devinit winbond_check(int io, int key) { - int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + int devid, devrev, oldid, x_devid, x_devrev, x_oldid; if (!request_region(io, 3, __func__)) return; /* First probe without key */ - outb(0x20,io); - x_devid=inb(io+1); - outb(0x21,io); - x_devrev=inb(io+1); - outb(0x09,io); - x_oldid=inb(io+1); - - outb(key,io); - outb(key,io); /* Write Magic Sequence to EFER, extended - funtion enable register */ - outb(0x20,io); /* Write EFIR, extended function index register */ - devid=inb(io+1); /* Read EFDR, extended function data register */ - outb(0x21,io); - devrev=inb(io+1); - outb(0x09,io); - oldid=inb(io+1); - outb(0xaa,io); /* Magic Seal */ + outb(0x20, io); + x_devid = inb(io + 1); + outb(0x21, io); + x_devrev = inb(io + 1); + outb(0x09, io); + x_oldid = inb(io + 1); + + outb(key, io); + outb(key, io); /* Write Magic Sequence to EFER, extended + funtion enable register */ + outb(0x20, io); /* Write EFIR, extended function index register */ + devid = inb(io + 1); /* Read EFDR, extended function data register */ + outb(0x21, io); + devrev = inb(io + 1); + outb(0x09, io); + oldid = inb(io + 1); + outb(0xaa, io); /* Magic Seal */ if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) goto out; /* protection against false positives */ - decode_winbond(io,key,devid,devrev,oldid); + decode_winbond(io, key, devid, devrev, oldid); out: release_region(io, 3); } -static void __devinit winbond_check2(int io,int key) +static void __devinit winbond_check2(int io, int key) { - int devid,devrev,oldid,x_devid,x_devrev,x_oldid; + int devid, devrev, oldid, x_devid, x_devrev, x_oldid; if (!request_region(io, 3, __func__)) return; /* First probe without the key */ - outb(0x20,io+2); - x_devid=inb(io+2); - outb(0x21,io+1); - x_devrev=inb(io+2); - outb(0x09,io+1); - x_oldid=inb(io+2); - - outb(key,io); /* Write Magic Byte to EFER, extended - funtion enable register */ - outb(0x20,io+2); /* Write EFIR, extended function index register */ - devid=inb(io+2); /* Read EFDR, extended function data register */ - outb(0x21,io+1); - devrev=inb(io+2); - outb(0x09,io+1); - oldid=inb(io+2); - outb(0xaa,io); /* Magic Seal */ - - if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid)) + outb(0x20, io + 2); + x_devid = inb(io + 2); + outb(0x21, io + 1); + x_devrev = inb(io + 2); + outb(0x09, io + 1); + x_oldid = inb(io + 2); + + outb(key, io); /* Write Magic Byte to EFER, extended + funtion enable register */ + outb(0x20, io + 2); /* Write EFIR, extended function index register */ + devid = inb(io + 2); /* Read EFDR, extended function data register */ + outb(0x21, io + 1); + devrev = inb(io + 2); + outb(0x09, io + 1); + oldid = inb(io + 2); + outb(0xaa, io); /* Magic Seal */ + + if (x_devid == devid && x_devrev == devrev && x_oldid == oldid) goto out; /* protection against false positives */ - decode_winbond(io,key,devid,devrev,oldid); + decode_winbond(io, key, devid, devrev, oldid); out: release_region(io, 3); } static void __devinit smsc_check(int io, int key) { - int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev; + int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev; if (!request_region(io, 3, __func__)) return; /* First probe without the key */ - outb(0x0d,io); - x_oldid=inb(io+1); - outb(0x0e,io); - x_oldrev=inb(io+1); - outb(0x20,io); - x_id=inb(io+1); - outb(0x21,io); - x_rev=inb(io+1); - - outb(key,io); - outb(key,io); /* Write Magic Sequence to EFER, extended - funtion enable register */ - outb(0x0d,io); /* Write EFIR, extended function index register */ - oldid=inb(io+1); /* Read EFDR, extended function data register */ - outb(0x0e,io); - oldrev=inb(io+1); - outb(0x20,io); - id=inb(io+1); - outb(0x21,io); - rev=inb(io+1); - outb(0xaa,io); /* Magic Seal */ - - if ((x_id == id) && (x_oldrev == oldrev) && - (x_oldid == oldid) && (x_rev == rev)) + outb(0x0d, io); + x_oldid = inb(io + 1); + outb(0x0e, io); + x_oldrev = inb(io + 1); + outb(0x20, io); + x_id = inb(io + 1); + outb(0x21, io); + x_rev = inb(io + 1); + + outb(key, io); + outb(key, io); /* Write Magic Sequence to EFER, extended + funtion enable register */ + outb(0x0d, io); /* Write EFIR, extended function index register */ + oldid = inb(io + 1); /* Read EFDR, extended function data register */ + outb(0x0e, io); + oldrev = inb(io + 1); + outb(0x20, io); + id = inb(io + 1); + outb(0x21, io); + rev = inb(io + 1); + outb(0xaa, io); /* Magic Seal */ + + if (x_id == id && x_oldrev == oldrev && + x_oldid == oldid && x_rev == rev) goto out; /* protection against false positives */ - decode_smsc(io,key,oldid,oldrev); + decode_smsc(io, key, oldid, oldrev); out: release_region(io, 3); } -static void __devinit detect_and_report_winbond (void) -{ +static void __devinit detect_and_report_winbond(void) +{ if (verbose_probing) printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n"); - winbond_check(0x3f0,0x87); - winbond_check(0x370,0x87); - winbond_check(0x2e ,0x87); - winbond_check(0x4e ,0x87); - winbond_check(0x3f0,0x86); - winbond_check2(0x250,0x88); - winbond_check2(0x250,0x89); + winbond_check(0x3f0, 0x87); + winbond_check(0x370, 0x87); + winbond_check(0x2e , 0x87); + winbond_check(0x4e , 0x87); + winbond_check(0x3f0, 0x86); + winbond_check2(0x250, 0x88); + winbond_check2(0x250, 0x89); } -static void __devinit detect_and_report_smsc (void) +static void __devinit detect_and_report_smsc(void) { if (verbose_probing) printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n"); - smsc_check(0x3f0,0x55); - smsc_check(0x370,0x55); - smsc_check(0x3f0,0x44); - smsc_check(0x370,0x44); + smsc_check(0x3f0, 0x55); + smsc_check(0x370, 0x55); + smsc_check(0x3f0, 0x44); + smsc_check(0x370, 0x44); } static void __devinit detect_and_report_it87(void) @@ -1573,34 +1631,39 @@ static void __devinit detect_and_report_it87(void) } #endif /* CONFIG_PARPORT_PC_SUPERIO */ -static int get_superio_dma (struct parport *p) +static struct superio_struct *find_superio(struct parport *p) { - int i=0; - while( (superios[i].io != p->base) && (i<NR_SUPERIOS)) - i++; - if (i!=NR_SUPERIOS) - return superios[i].dma; + int i; + for (i = 0; i < NR_SUPERIOS; i++) + if (superios[i].io != p->base) + return &superios[i]; + return NULL; +} + +static int get_superio_dma(struct parport *p) +{ + struct superio_struct *s = find_superio(p); + if (s) + return s->dma; return PARPORT_DMA_NONE; } -static int get_superio_irq (struct parport *p) +static int get_superio_irq(struct parport *p) { - int i=0; - while( (superios[i].io != p->base) && (i<NR_SUPERIOS)) - i++; - if (i!=NR_SUPERIOS) - return superios[i].irq; - return PARPORT_IRQ_NONE; + struct superio_struct *s = find_superio(p); + if (s) + return s->irq; + return PARPORT_IRQ_NONE; } - + /* --- Mode detection ------------------------------------- */ /* * Checks for port existence, all ports support SPP MODE - * Returns: + * Returns: * 0 : No parallel port at this address - * PARPORT_MODE_PCSPP : SPP port detected + * PARPORT_MODE_PCSPP : SPP port detected * (if the user specified an ioport himself, * this shall always be the case!) * @@ -1610,7 +1673,7 @@ static int parport_SPP_supported(struct parport *pb) unsigned char r, w; /* - * first clear an eventually pending EPP timeout + * first clear an eventually pending EPP timeout * I (sailer@ife.ee.ethz.ch) have an SMSC chipset * that does not even respond to SPP cycles if an EPP * timeout is pending @@ -1619,19 +1682,19 @@ static int parport_SPP_supported(struct parport *pb) /* Do a simple read-write test to make sure the port exists. */ w = 0xc; - outb (w, CONTROL (pb)); + outb(w, CONTROL(pb)); /* Is there a control register that we can read from? Some * ports don't allow reads, so read_control just returns a * software copy. Some ports _do_ allow reads, so bypass the * software copy here. In addition, some bits aren't * writable. */ - r = inb (CONTROL (pb)); + r = inb(CONTROL(pb)); if ((r & 0xf) == w) { w = 0xe; - outb (w, CONTROL (pb)); - r = inb (CONTROL (pb)); - outb (0xc, CONTROL (pb)); + outb(w, CONTROL(pb)); + r = inb(CONTROL(pb)); + outb(0xc, CONTROL(pb)); if ((r & 0xf) == w) return PARPORT_MODE_PCSPP; } @@ -1639,18 +1702,18 @@ static int parport_SPP_supported(struct parport *pb) if (user_specified) /* That didn't work, but the user thinks there's a * port here. */ - printk (KERN_INFO "parport 0x%lx (WARNING): CTR: " + printk(KERN_INFO "parport 0x%lx (WARNING): CTR: " "wrote 0x%02x, read 0x%02x\n", pb->base, w, r); /* Try the data register. The data lines aren't tri-stated at * this stage, so we expect back what we wrote. */ w = 0xaa; - parport_pc_write_data (pb, w); - r = parport_pc_read_data (pb); + parport_pc_write_data(pb, w); + r = parport_pc_read_data(pb); if (r == w) { w = 0x55; - parport_pc_write_data (pb, w); - r = parport_pc_read_data (pb); + parport_pc_write_data(pb, w); + r = parport_pc_read_data(pb); if (r == w) return PARPORT_MODE_PCSPP; } @@ -1658,9 +1721,9 @@ static int parport_SPP_supported(struct parport *pb) if (user_specified) { /* Didn't work, but the user is convinced this is the * place. */ - printk (KERN_INFO "parport 0x%lx (WARNING): DATA: " + printk(KERN_INFO "parport 0x%lx (WARNING): DATA: " "wrote 0x%02x, read 0x%02x\n", pb->base, w, r); - printk (KERN_INFO "parport 0x%lx: You gave this address, " + printk(KERN_INFO "parport 0x%lx: You gave this address, " "but there is probably no parallel port there!\n", pb->base); } @@ -1691,33 +1754,33 @@ static int parport_ECR_present(struct parport *pb) struct parport_pc_private *priv = pb->private_data; unsigned char r = 0xc; - outb (r, CONTROL (pb)); - if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) { - outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */ + outb(r, CONTROL(pb)); + if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) { + outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */ - r = inb (CONTROL (pb)); - if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2)) + r = inb(CONTROL(pb)); + if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2)) goto no_reg; /* Sure that no ECR register exists */ } - - if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1) + + if ((inb(ECONTROL(pb)) & 0x3) != 0x1) goto no_reg; - ECR_WRITE (pb, 0x34); - if (inb (ECONTROL (pb)) != 0x35) + ECR_WRITE(pb, 0x34); + if (inb(ECONTROL(pb)) != 0x35) goto no_reg; priv->ecr = 1; - outb (0xc, CONTROL (pb)); - + outb(0xc, CONTROL(pb)); + /* Go to mode 000 */ - frob_set_mode (pb, ECR_SPP); + frob_set_mode(pb, ECR_SPP); return 1; no_reg: - outb (0xc, CONTROL (pb)); - return 0; + outb(0xc, CONTROL(pb)); + return 0; } #ifdef CONFIG_PARPORT_1284 @@ -1727,7 +1790,7 @@ static int parport_ECR_present(struct parport *pb) * allows us to read data from the data lines. In theory we would get back * 0xff but any peripheral attached to the port may drag some or all of the * lines down to zero. So if we get back anything that isn't the contents - * of the data register we deem PS/2 support to be present. + * of the data register we deem PS/2 support to be present. * * Some SPP ports have "half PS/2" ability - you can't turn off the line * drivers, but an external peripheral with sufficiently beefy drivers of @@ -1735,26 +1798,28 @@ static int parport_ECR_present(struct parport *pb) * where they can then be read back as normal. Ports with this property * and the right type of device attached are likely to fail the SPP test, * (as they will appear to have stuck bits) and so the fact that they might - * be misdetected here is rather academic. + * be misdetected here is rather academic. */ static int parport_PS2_supported(struct parport *pb) { int ok = 0; - + clear_epp_timeout(pb); /* try to tri-state the buffer */ - parport_pc_data_reverse (pb); - + parport_pc_data_reverse(pb); + parport_pc_write_data(pb, 0x55); - if (parport_pc_read_data(pb) != 0x55) ok++; + if (parport_pc_read_data(pb) != 0x55) + ok++; parport_pc_write_data(pb, 0xaa); - if (parport_pc_read_data(pb) != 0xaa) ok++; + if (parport_pc_read_data(pb) != 0xaa) + ok++; /* cancel input mode */ - parport_pc_data_forward (pb); + parport_pc_data_forward(pb); if (ok) { pb->modes |= PARPORT_MODE_TRISTATE; @@ -1773,68 +1838,68 @@ static int parport_ECP_supported(struct parport *pb) int config, configb; int pword; struct parport_pc_private *priv = pb->private_data; - /* Translate ECP intrLine to ISA irq value */ - static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; + /* Translate ECP intrLine to ISA irq value */ + static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 }; /* If there is no ECR, we have no hope of supporting ECP. */ if (!priv->ecr) return 0; /* Find out FIFO depth */ - ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ - ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */ - for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++) - outb (0xaa, FIFO (pb)); + ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */ + ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */ + for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++) + outb(0xaa, FIFO(pb)); /* * Using LGS chipset it uses ECR register, but * it doesn't support ECP or FIFO MODE */ if (i == 1024) { - ECR_WRITE (pb, ECR_SPP << 5); + ECR_WRITE(pb, ECR_SPP << 5); return 0; } priv->fifo_depth = i; if (verbose_probing) - printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i); + printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i); /* Find out writeIntrThreshold */ - frob_econtrol (pb, 1<<2, 1<<2); - frob_econtrol (pb, 1<<2, 0); + frob_econtrol(pb, 1<<2, 1<<2); + frob_econtrol(pb, 1<<2, 0); for (i = 1; i <= priv->fifo_depth; i++) { - inb (FIFO (pb)); - udelay (50); - if (inb (ECONTROL (pb)) & (1<<2)) + inb(FIFO(pb)); + udelay(50); + if (inb(ECONTROL(pb)) & (1<<2)) break; } if (i <= priv->fifo_depth) { if (verbose_probing) - printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n", + printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n", pb->base, i); } else /* Number of bytes we know we can write if we get an - interrupt. */ + interrupt. */ i = 0; priv->writeIntrThreshold = i; /* Find out readIntrThreshold */ - frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */ - parport_pc_data_reverse (pb); /* Must be in PS2 mode */ - frob_set_mode (pb, ECR_TST); /* Test FIFO */ - frob_econtrol (pb, 1<<2, 1<<2); - frob_econtrol (pb, 1<<2, 0); + frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */ + parport_pc_data_reverse(pb); /* Must be in PS2 mode */ + frob_set_mode(pb, ECR_TST); /* Test FIFO */ + frob_econtrol(pb, 1<<2, 1<<2); + frob_econtrol(pb, 1<<2, 0); for (i = 1; i <= priv->fifo_depth; i++) { - outb (0xaa, FIFO (pb)); - if (inb (ECONTROL (pb)) & (1<<2)) + outb(0xaa, FIFO(pb)); + if (inb(ECONTROL(pb)) & (1<<2)) break; } if (i <= priv->fifo_depth) { if (verbose_probing) - printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n", + printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n", pb->base, i); } else /* Number of bytes we can read if we get an interrupt. */ @@ -1842,23 +1907,23 @@ static int parport_ECP_supported(struct parport *pb) priv->readIntrThreshold = i; - ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ - ECR_WRITE (pb, 0xf4); /* Configuration mode */ - config = inb (CONFIGA (pb)); + ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */ + ECR_WRITE(pb, 0xf4); /* Configuration mode */ + config = inb(CONFIGA(pb)); pword = (config >> 4) & 0x7; switch (pword) { case 0: pword = 2; - printk (KERN_WARNING "0x%lx: Unsupported pword size!\n", + printk(KERN_WARNING "0x%lx: Unsupported pword size!\n", pb->base); break; case 2: pword = 4; - printk (KERN_WARNING "0x%lx: Unsupported pword size!\n", + printk(KERN_WARNING "0x%lx: Unsupported pword size!\n", pb->base); break; default: - printk (KERN_WARNING "0x%lx: Unknown implementation ID\n", + printk(KERN_WARNING "0x%lx: Unknown implementation ID\n", pb->base); /* Assume 1 */ case 1: @@ -1867,28 +1932,29 @@ static int parport_ECP_supported(struct parport *pb) priv->pword = pword; if (verbose_probing) { - printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword); - - printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base, + printk(KERN_DEBUG "0x%lx: PWord is %d bits\n", + pb->base, 8 * pword); + + printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base, config & 0x80 ? "Level" : "Pulses"); - configb = inb (CONFIGB (pb)); - printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", + configb = inb(CONFIGB(pb)); + printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n", pb->base, config, configb); - printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base); - if ((configb >>3) & 0x07) - printk("%d",intrline[(configb >>3) & 0x07]); + printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base); + if ((configb >> 3) & 0x07) + printk("%d", intrline[(configb >> 3) & 0x07]); else printk("<none or set by other means>"); - printk (" dma="); - if( (configb & 0x03 ) == 0x00) + printk(" dma="); + if ((configb & 0x03) == 0x00) printk("<none or set by other means>\n"); else - printk("%d\n",configb & 0x07); + printk("%d\n", configb & 0x07); } /* Go back to mode 000 */ - frob_set_mode (pb, ECR_SPP); + frob_set_mode(pb, ECR_SPP); return 1; } @@ -1903,10 +1969,10 @@ static int parport_ECPPS2_supported(struct parport *pb) if (!priv->ecr) return 0; - oecr = inb (ECONTROL (pb)); - ECR_WRITE (pb, ECR_PS2 << 5); + oecr = inb(ECONTROL(pb)); + ECR_WRITE(pb, ECR_PS2 << 5); result = parport_PS2_supported(pb); - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); return result; } @@ -1930,16 +1996,15 @@ static int parport_EPP_supported(struct parport *pb) */ /* If EPP timeout bit clear then EPP available */ - if (!clear_epp_timeout(pb)) { + if (!clear_epp_timeout(pb)) return 0; /* No way to clear timeout */ - } /* Check for Intel bug. */ if (priv->ecr) { unsigned char i; for (i = 0x00; i < 0x80; i += 0x20) { - ECR_WRITE (pb, i); - if (clear_epp_timeout (pb)) { + ECR_WRITE(pb, i); + if (clear_epp_timeout(pb)) { /* Phony EPP in ECP. */ return 0; } @@ -1963,17 +2028,16 @@ static int parport_ECPEPP_supported(struct parport *pb) int result; unsigned char oecr; - if (!priv->ecr) { + if (!priv->ecr) return 0; - } - oecr = inb (ECONTROL (pb)); + oecr = inb(ECONTROL(pb)); /* Search for SMC style EPP+ECP mode */ - ECR_WRITE (pb, 0x80); - outb (0x04, CONTROL (pb)); + ECR_WRITE(pb, 0x80); + outb(0x04, CONTROL(pb)); result = parport_EPP_supported(pb); - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); if (result) { /* Set up access functions to use ECP+EPP hardware. */ @@ -1991,11 +2055,25 @@ static int parport_ECPEPP_supported(struct parport *pb) /* Don't bother probing for modes we know we won't use. */ static int __devinit parport_PS2_supported(struct parport *pb) { return 0; } #ifdef CONFIG_PARPORT_PC_FIFO -static int parport_ECP_supported(struct parport *pb) { return 0; } +static int parport_ECP_supported(struct parport *pb) +{ + return 0; +} #endif -static int __devinit parport_EPP_supported(struct parport *pb) { return 0; } -static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;} -static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;} +static int __devinit parport_EPP_supported(struct parport *pb) +{ + return 0; +} + +static int __devinit parport_ECPEPP_supported(struct parport *pb) +{ + return 0; +} + +static int __devinit parport_ECPPS2_supported(struct parport *pb) +{ + return 0; +} #endif /* No IEEE 1284 support */ @@ -2005,17 +2083,17 @@ static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;} static int programmable_irq_support(struct parport *pb) { int irq, intrLine; - unsigned char oecr = inb (ECONTROL (pb)); + unsigned char oecr = inb(ECONTROL(pb)); static const int lookup[8] = { PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5 }; - ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */ + ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */ - intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07; + intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07; irq = lookup[intrLine]; - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); return irq; } @@ -2025,17 +2103,17 @@ static int irq_probe_ECP(struct parport *pb) unsigned long irqs; irqs = probe_irq_on(); - - ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */ - ECR_WRITE (pb, (ECR_TST << 5) | 0x04); - ECR_WRITE (pb, ECR_TST << 5); + + ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */ + ECR_WRITE(pb, (ECR_TST << 5) | 0x04); + ECR_WRITE(pb, ECR_TST << 5); /* If Full FIFO sure that writeIntrThreshold is generated */ - for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) - outb (0xaa, FIFO (pb)); - + for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++) + outb(0xaa, FIFO(pb)); + pb->irq = probe_irq_off(irqs); - ECR_WRITE (pb, ECR_SPP << 5); + ECR_WRITE(pb, ECR_SPP << 5); if (pb->irq <= 0) pb->irq = PARPORT_IRQ_NONE; @@ -2045,7 +2123,7 @@ static int irq_probe_ECP(struct parport *pb) /* * This detection seems that only works in National Semiconductors - * This doesn't work in SMC, LGS, and Winbond + * This doesn't work in SMC, LGS, and Winbond */ static int irq_probe_EPP(struct parport *pb) { @@ -2056,16 +2134,16 @@ static int irq_probe_EPP(struct parport *pb) unsigned char oecr; if (pb->modes & PARPORT_MODE_PCECR) - oecr = inb (ECONTROL (pb)); + oecr = inb(ECONTROL(pb)); irqs = probe_irq_on(); if (pb->modes & PARPORT_MODE_PCECR) - frob_econtrol (pb, 0x10, 0x10); - + frob_econtrol(pb, 0x10, 0x10); + clear_epp_timeout(pb); - parport_pc_frob_control (pb, 0x20, 0x20); - parport_pc_frob_control (pb, 0x10, 0x10); + parport_pc_frob_control(pb, 0x20, 0x20); + parport_pc_frob_control(pb, 0x10, 0x10); clear_epp_timeout(pb); /* Device isn't expecting an EPP read @@ -2074,9 +2152,9 @@ static int irq_probe_EPP(struct parport *pb) parport_pc_read_epp(pb); udelay(20); - pb->irq = probe_irq_off (irqs); + pb->irq = probe_irq_off(irqs); if (pb->modes & PARPORT_MODE_PCECR) - ECR_WRITE (pb, oecr); + ECR_WRITE(pb, oecr); parport_pc_write_control(pb, 0xc); if (pb->irq <= 0) @@ -2133,28 +2211,28 @@ static int parport_irq_probe(struct parport *pb) /* --- DMA detection -------------------------------------- */ /* Only if chipset conforms to ECP ISA Interface Standard */ -static int programmable_dma_support (struct parport *p) +static int programmable_dma_support(struct parport *p) { - unsigned char oecr = inb (ECONTROL (p)); + unsigned char oecr = inb(ECONTROL(p)); int dma; - frob_set_mode (p, ECR_CNF); - - dma = inb (CONFIGB(p)) & 0x07; + frob_set_mode(p, ECR_CNF); + + dma = inb(CONFIGB(p)) & 0x07; /* 000: Indicates jumpered 8-bit DMA if read-only. 100: Indicates jumpered 16-bit DMA if read-only. */ if ((dma & 0x03) == 0) dma = PARPORT_DMA_NONE; - ECR_WRITE (p, oecr); + ECR_WRITE(p, oecr); return dma; } -static int parport_dma_probe (struct parport *p) +static int parport_dma_probe(struct parport *p) { const struct parport_pc_private *priv = p->private_data; - if (priv->ecr) - p->dma = programmable_dma_support(p); /* ask ECP chipset first */ + if (priv->ecr) /* ask ECP chipset first */ + p->dma = programmable_dma_support(p); if (p->dma == PARPORT_DMA_NONE) { /* ask known Super-IO chips proper, although these claim ECP compatible, some don't report their DMA @@ -2212,7 +2290,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (!base_res) goto out4; - memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations)); + memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations)); priv->ctr = 0xc; priv->ctr_writable = ~0x10; priv->ecr = 0; @@ -2239,7 +2317,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (!parport_EPP_supported(p)) parport_ECPEPP_supported(p); } - if (!parport_SPP_supported (p)) + if (!parport_SPP_supported(p)) /* No port. */ goto out5; if (priv->ecr) @@ -2247,7 +2325,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, else parport_PS2_supported(p); - p->size = (p->modes & PARPORT_MODE_EPP)?8:3; + p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3; printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base); if (p->base_hi && priv->ecr) @@ -2271,7 +2349,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, } } if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq - is mandatory (see above) */ + is mandatory (see above) */ p->dma = PARPORT_DMA_NONE; #ifdef CONFIG_PARPORT_PC_FIFO @@ -2288,16 +2366,23 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (p->dma != PARPORT_DMA_NONE) { printk(", dma %d", p->dma); p->modes |= PARPORT_MODE_DMA; - } - else printk(", using FIFO"); - } - else + } else + printk(", using FIFO"); + } else /* We can't use the DMA channel after all. */ p->dma = PARPORT_DMA_NONE; #endif /* Allowed to use FIFO/DMA */ printk(" ["); -#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}} + +#define printmode(x) \ + {\ + if (p->modes & PARPORT_MODE_##x) {\ + printk("%s%s", f ? "," : "", #x);\ + f++;\ + } \ + } + { int f = 0; printmode(PCSPP); @@ -2309,10 +2394,10 @@ struct parport *parport_pc_probe_port(unsigned long int base, } #undef printmode #ifndef CONFIG_PARPORT_1284 - printk ("(,...)"); + printk("(,...)"); #endif /* CONFIG_PARPORT_1284 */ printk("]\n"); - if (probedirq != PARPORT_IRQ_NONE) + if (probedirq != PARPORT_IRQ_NONE) printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq); /* If No ECP release the ports grabbed above. */ @@ -2328,7 +2413,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, if (p->irq != PARPORT_IRQ_NONE) { if (request_irq(p->irq, parport_irq_handler, irqflags, p->name, p)) { - printk (KERN_WARNING "%s: irq %d in use, " + printk(KERN_WARNING "%s: irq %d in use, " "resorting to polled operation\n", p->name, p->irq); p->irq = PARPORT_IRQ_NONE; @@ -2338,8 +2423,8 @@ struct parport *parport_pc_probe_port(unsigned long int base, #ifdef CONFIG_PARPORT_PC_FIFO #ifdef HAS_DMA if (p->dma != PARPORT_DMA_NONE) { - if (request_dma (p->dma, p->name)) { - printk (KERN_WARNING "%s: dma %d in use, " + if (request_dma(p->dma, p->name)) { + printk(KERN_WARNING "%s: dma %d in use, " "resorting to PIO operation\n", p->name, p->dma); p->dma = PARPORT_DMA_NONE; @@ -2349,8 +2434,8 @@ struct parport *parport_pc_probe_port(unsigned long int base, PAGE_SIZE, &priv->dma_handle, GFP_KERNEL); - if (! priv->dma_buf) { - printk (KERN_WARNING "%s: " + if (!priv->dma_buf) { + printk(KERN_WARNING "%s: " "cannot get buffer for DMA, " "resorting to PIO operation\n", p->name); @@ -2369,10 +2454,10 @@ struct parport *parport_pc_probe_port(unsigned long int base, * Put the ECP detected port in PS2 mode. * Do this also for ports that have ECR but don't do ECP. */ - ECR_WRITE (p, 0x34); + ECR_WRITE(p, 0x34); parport_pc_write_data(p, 0); - parport_pc_data_forward (p); + parport_pc_data_forward(p); /* Now that we've told the sharing engine about the port, and found out its characteristics, let the high-level drivers @@ -2380,7 +2465,7 @@ struct parport *parport_pc_probe_port(unsigned long int base, spin_lock(&ports_lock); list_add(&priv->list, &ports_list); spin_unlock(&ports_lock); - parport_announce_port (p); + parport_announce_port(p); return p; @@ -2393,18 +2478,17 @@ out5: out4: parport_put_port(p); out3: - kfree (priv); + kfree(priv); out2: - kfree (ops); + kfree(ops); out1: if (pdev) platform_device_unregister(pdev); return NULL; } +EXPORT_SYMBOL(parport_pc_probe_port); -EXPORT_SYMBOL (parport_pc_probe_port); - -void parport_pc_unregister_port (struct parport *p) +void parport_pc_unregister_port(struct parport *p) { struct parport_pc_private *priv = p->private_data; struct parport_operations *ops = p->ops; @@ -2430,17 +2514,16 @@ void parport_pc_unregister_port (struct parport *p) priv->dma_buf, priv->dma_handle); #endif - kfree (p->private_data); + kfree(p->private_data); parport_put_port(p); - kfree (ops); /* hope no-one cached it */ + kfree(ops); /* hope no-one cached it */ } - -EXPORT_SYMBOL (parport_pc_unregister_port); +EXPORT_SYMBOL(parport_pc_unregister_port); #ifdef CONFIG_PCI /* ITE support maintained by Rich Liu <richliu@poorman.org> */ -static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, +static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq, int autodma, const struct parport_pc_via_data *via) { @@ -2452,73 +2535,74 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, int irq; int i; - DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n"); - - // make sure which one chip - for(i = 0; i < 5; i++) { + DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n"); + + /* make sure which one chip */ + for (i = 0; i < 5; i++) { base_res = request_region(inta_addr[i], 32, "it887x"); if (base_res) { int test; - pci_write_config_dword (pdev, 0x60, + pci_write_config_dword(pdev, 0x60, 0xe5000000 | inta_addr[i]); - pci_write_config_dword (pdev, 0x78, + pci_write_config_dword(pdev, 0x78, 0x00000000 | inta_addr[i]); - test = inb (inta_addr[i]); - if (test != 0xff) break; + test = inb(inta_addr[i]); + if (test != 0xff) + break; release_region(inta_addr[i], 0x8); } } - if(i >= 5) { - printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n"); + if (i >= 5) { + printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n"); return 0; } - type = inb (inta_addr[i] + 0x18); + type = inb(inta_addr[i] + 0x18); type &= 0x0f; switch (type) { case 0x2: - printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n"); + printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n"); ite8872set = 0x64200000; break; case 0xa: - printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n"); + printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n"); ite8872set = 0x64200000; break; case 0xe: - printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n"); + printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n"); ite8872set = 0x64e00000; break; case 0x6: - printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n"); + printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n"); return 0; case 0x8: - DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n"); + DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n"); return 0; default: - printk (KERN_INFO "parport_pc: unknown ITE887x\n"); - printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' " + printk(KERN_INFO "parport_pc: unknown ITE887x\n"); + printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' " "output to Rich.Liu@ite.com.tw\n"); return 0; } - pci_read_config_byte (pdev, 0x3c, &ite8872_irq); - pci_read_config_dword (pdev, 0x1c, &ite8872_lpt); + pci_read_config_byte(pdev, 0x3c, &ite8872_irq); + pci_read_config_dword(pdev, 0x1c, &ite8872_lpt); ite8872_lpt &= 0x0000ff00; - pci_read_config_dword (pdev, 0x20, &ite8872_lpthi); + pci_read_config_dword(pdev, 0x20, &ite8872_lpthi); ite8872_lpthi &= 0x0000ff00; - pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt); - pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi); - pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt); - // SET SPP&EPP , Parallel Port NO DMA , Enable All Function - // SET Parallel IRQ - pci_write_config_dword (pdev, 0x9c, + pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt); + pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi); + pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt); + /* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */ + /* SET Parallel IRQ */ + pci_write_config_dword(pdev, 0x9c, ite8872set | (ite8872_irq * 0x11111)); - DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq); - DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n", + DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq); + DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n", ite8872_lpt); - DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n", + DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n", ite8872_lpthi); /* Let the user (or defaults) steer us away from interrupts */ @@ -2530,14 +2614,14 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, * Release the resource so that parport_pc_probe_port can get it. */ release_resource(base_res); - if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi, + if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi, irq, PARPORT_DMA_NONE, &pdev->dev, 0)) { - printk (KERN_INFO + printk(KERN_INFO "parport_pc: ITE 8872 parallel port: io=0x%X", - ite8872_lpt); + ite8872_lpt); if (irq != PARPORT_IRQ_NONE) - printk (", irq=%d", irq); - printk ("\n"); + printk(", irq=%d", irq); + printk("\n"); return 1; } @@ -2546,7 +2630,7 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq, /* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru> based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */ -static int __devinitdata parport_init_mode = 0; +static int __devinitdata parport_init_mode; /* Data for two known VIA chips */ static struct parport_pc_via_data via_686a_data __devinitdata = { @@ -2568,7 +2652,7 @@ static struct parport_pc_via_data via_8231_data __devinitdata = { 0xF6 }; -static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, +static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq, int autodma, const struct parport_pc_via_data *via) { @@ -2580,38 +2664,38 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n"); - switch(parport_init_mode) - { + switch (parport_init_mode) { case 1: - printk(KERN_DEBUG "parport_pc: setting SPP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_SPP; - break; + printk(KERN_DEBUG "parport_pc: setting SPP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_SPP; + break; case 2: - printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n"); - siofunc = VIA_FUNCTION_PARPORT_SPP; - ppcontrol = VIA_PARPORT_BIDIR; - break; + printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n"); + siofunc = VIA_FUNCTION_PARPORT_SPP; + ppcontrol = VIA_PARPORT_BIDIR; + break; case 3: - printk(KERN_DEBUG "parport_pc: setting EPP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_EPP; - ppcontrol = VIA_PARPORT_BIDIR; - have_epp = 1; - break; + printk(KERN_DEBUG "parport_pc: setting EPP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_EPP; + ppcontrol = VIA_PARPORT_BIDIR; + have_epp = 1; + break; case 4: - printk(KERN_DEBUG "parport_pc: setting ECP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_ECP; - ppcontrol = VIA_PARPORT_BIDIR; - break; + printk(KERN_DEBUG "parport_pc: setting ECP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_ECP; + ppcontrol = VIA_PARPORT_BIDIR; + break; case 5: - printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n"); - siofunc = VIA_FUNCTION_PARPORT_ECP; - ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP; - have_epp = 1; - break; - default: - printk(KERN_DEBUG "parport_pc: probing current configuration\n"); - siofunc = VIA_FUNCTION_PROBE; - break; + printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n"); + siofunc = VIA_FUNCTION_PARPORT_ECP; + ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP; + have_epp = 1; + break; + default: + printk(KERN_DEBUG + "parport_pc: probing current configuration\n"); + siofunc = VIA_FUNCTION_PROBE; + break; } /* * unlock super i/o configuration @@ -2622,38 +2706,36 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, /* Bits 1-0: Parallel Port Mode / Enable */ outb(via->viacfg_function, VIA_CONFIG_INDEX); - tmp = inb (VIA_CONFIG_DATA); + tmp = inb(VIA_CONFIG_DATA); /* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */ outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); - tmp2 = inb (VIA_CONFIG_DATA); - if (siofunc == VIA_FUNCTION_PROBE) - { - siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE; - ppcontrol = tmp2; + tmp2 = inb(VIA_CONFIG_DATA); + if (siofunc == VIA_FUNCTION_PROBE) { + siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE; + ppcontrol = tmp2; + } else { + tmp &= ~VIA_FUNCTION_PARPORT_DISABLE; + tmp |= siofunc; + outb(via->viacfg_function, VIA_CONFIG_INDEX); + outb(tmp, VIA_CONFIG_DATA); + tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP); + tmp2 |= ppcontrol; + outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); + outb(tmp2, VIA_CONFIG_DATA); } - else - { - tmp &= ~VIA_FUNCTION_PARPORT_DISABLE; - tmp |= siofunc; - outb(via->viacfg_function, VIA_CONFIG_INDEX); - outb(tmp, VIA_CONFIG_DATA); - tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP); - tmp2 |= ppcontrol; - outb(via->viacfg_parport_control, VIA_CONFIG_INDEX); - outb(tmp2, VIA_CONFIG_DATA); - } - + /* Parallel Port I/O Base Address, bits 9-2 */ outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); port1 = inb(VIA_CONFIG_DATA) << 2; - - printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1); - if ((port1 == 0x3BC) && have_epp) - { - outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); - outb((0x378 >> 2), VIA_CONFIG_DATA); - printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n"); - port1 = 0x378; + + printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n", + port1); + if (port1 == 0x3BC && have_epp) { + outb(via->viacfg_parport_base, VIA_CONFIG_INDEX); + outb((0x378 >> 2), VIA_CONFIG_DATA); + printk(KERN_DEBUG + "parport_pc: Parallel port base changed to 0x378\n"); + port1 = 0x378; } /* @@ -2667,36 +2749,39 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n"); return 0; } - + /* Bits 7-4: PnP Routing for Parallel Port IRQ */ pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp); irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4); - if (siofunc == VIA_FUNCTION_PARPORT_ECP) - { - /* Bits 3-2: PnP Routing for Parallel Port DMA */ - pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp); - dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2); - } - else - /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */ - dma = PARPORT_DMA_NONE; + if (siofunc == VIA_FUNCTION_PARPORT_ECP) { + /* Bits 3-2: PnP Routing for Parallel Port DMA */ + pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp); + dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2); + } else + /* if ECP not enabled, DMA is not enabled, assumed + bogus 'dma' value */ + dma = PARPORT_DMA_NONE; /* Let the user (or defaults) steer us away from interrupts and DMA */ if (autoirq == PARPORT_IRQ_NONE) { - irq = PARPORT_IRQ_NONE; - dma = PARPORT_DMA_NONE; + irq = PARPORT_IRQ_NONE; + dma = PARPORT_DMA_NONE; } if (autodma == PARPORT_DMA_NONE) - dma = PARPORT_DMA_NONE; + dma = PARPORT_DMA_NONE; switch (port1) { - case 0x3bc: port2 = 0x7bc; break; - case 0x378: port2 = 0x778; break; - case 0x278: port2 = 0x678; break; + case 0x3bc: + port2 = 0x7bc; break; + case 0x378: + port2 = 0x778; break; + case 0x278: + port2 = 0x678; break; default: - printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n", - port1); + printk(KERN_INFO + "parport_pc: Weird VIA parport base 0x%X, ignoring\n", + port1); return 0; } @@ -2714,17 +2799,17 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, } /* finally, do the probe with values obtained */ - if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) { - printk (KERN_INFO + if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) { + printk(KERN_INFO "parport_pc: VIA parallel port: io=0x%X", port1); if (irq != PARPORT_IRQ_NONE) - printk (", irq=%d", irq); + printk(", irq=%d", irq); if (dma != PARPORT_DMA_NONE) - printk (", dma=%d", dma); - printk ("\n"); + printk(", dma=%d", dma); + printk("\n"); return 1; } - + printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n", port1, irq, dma); return 0; @@ -2732,8 +2817,8 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq, enum parport_pc_sio_types { - sio_via_686a = 0, /* Via VT82C686A motherboard Super I/O */ - sio_via_8231, /* Via VT8231 south bridge integrated Super IO */ + sio_via_686a = 0, /* Via VT82C686A motherboard Super I/O */ + sio_via_8231, /* Via VT8231 south bridge integrated Super IO */ sio_ite_8872, last_sio }; @@ -2804,15 +2889,15 @@ enum parport_pc_pci_cards { }; -/* each element directly indexed from enum list, above +/* each element directly indexed from enum list, above * (but offset by last_sio) */ static struct parport_pc_pci { int numports; struct { /* BAR (base address registers) numbers in the config - space header */ + space header */ int lo; - int hi; /* -1 if not there, >6 for offset-method (max - BAR is 6) */ + int hi; + /* -1 if not there, >6 for offset-method (max BAR is 6) */ } addr[4]; /* If set, this is called immediately after pci_enable_device. @@ -2857,7 +2942,7 @@ static struct parport_pc_pci { /* timedia_4018 */ { 2, { { 0, 1 }, { 2, 3 }, } }, /* timedia_9018a */ { 2, { { 0, 1 }, { 2, 3 }, } }, /* SYBA uses fixed offsets in - a 1K io window */ + a 1K io window */ /* syba_2p_epp AP138B */ { 2, { { 0, 0x078 }, { 0, 0x178 }, } }, /* syba_1p_ecp W83787 */ { 1, { { 0, 0x078 }, } }, /* titan_010l */ { 1, { { 3, -1 }, } }, @@ -2873,11 +2958,14 @@ static struct parport_pc_pci { /* oxsemi_pcie_pport */ { 1, { { 0, 1 }, } }, /* aks_0100 */ { 1, { { 0, -1 }, } }, /* mobility_pp */ { 1, { { 0, 1 }, } }, - /* netmos_9705 */ { 1, { { 0, -1 }, } }, /* untested */ - /* netmos_9715 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ - /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */ - /* netmos_9805 */ { 1, { { 0, -1 }, } }, /* untested */ - /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */ + + /* The netmos entries below are untested */ + /* netmos_9705 */ { 1, { { 0, -1 }, } }, + /* netmos_9715 */ { 2, { { 0, 1 }, { 2, 3 },} }, + /* netmos_9755 */ { 2, { { 0, 1 }, { 2, 3 },} }, + /* netmos_9805 */ { 1, { { 0, -1 }, } }, + /* netmos_9815 */ { 2, { { 0, -1 }, { 2, -1 }, } }, + /* quatech_sppxp100 */ { 1, { { 0, 1 }, } }, }; @@ -2906,7 +2994,7 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR, PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar }, { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050, - PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 }, + PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 }, /* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/ { 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a }, { 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h }, @@ -2940,7 +3028,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { { 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 }, { 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 }, /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/ - { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */ + /* AFAVLAB_TK9902 */ + { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p}, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP, PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 }, @@ -2983,14 +3072,14 @@ static const struct pci_device_id parport_pc_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 }, { 0, } /* terminate list */ }; -MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl); +MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl); struct pci_parport_data { int num; struct parport *ports[2]; }; -static int parport_pc_pci_probe (struct pci_dev *dev, +static int parport_pc_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { int err, count, n, i = id->driver_data; @@ -3003,7 +3092,8 @@ static int parport_pc_pci_probe (struct pci_dev *dev, /* This is a PCI card */ i -= last_sio; count = 0; - if ((err = pci_enable_device (dev)) != 0) + err = pci_enable_device(dev); + if (err) return err; data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL); @@ -3011,7 +3101,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev, return -ENOMEM; if (cards[i].preinit_hook && - cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) { + cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) { kfree(data); return -ENODEV; } @@ -3021,25 +3111,25 @@ static int parport_pc_pci_probe (struct pci_dev *dev, int hi = cards[i].addr[n].hi; int irq; unsigned long io_lo, io_hi; - io_lo = pci_resource_start (dev, lo); + io_lo = pci_resource_start(dev, lo); io_hi = 0; if ((hi >= 0) && (hi <= 6)) - io_hi = pci_resource_start (dev, hi); + io_hi = pci_resource_start(dev, hi); else if (hi > 6) io_lo += hi; /* Reinterpret the meaning of - "hi" as an offset (see SYBA - def.) */ + "hi" as an offset (see SYBA + def.) */ /* TODO: test if sharing interrupts works */ irq = dev->irq; if (irq == IRQ_NONE) { - printk (KERN_DEBUG + printk(KERN_DEBUG "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n", parport_pc_pci_tbl[i + last_sio].vendor, parport_pc_pci_tbl[i + last_sio].device, io_lo, io_hi); irq = PARPORT_IRQ_NONE; } else { - printk (KERN_DEBUG + printk(KERN_DEBUG "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n", parport_pc_pci_tbl[i + last_sio].vendor, parport_pc_pci_tbl[i + last_sio].device, @@ -3056,7 +3146,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev, data->num = count; if (cards[i].postinit_hook) - cards[i].postinit_hook (dev, count == 0); + cards[i].postinit_hook(dev, count == 0); if (count) { pci_set_drvdata(dev, data); @@ -3090,7 +3180,7 @@ static struct pci_driver parport_pc_pci_driver = { .remove = __devexit_p(parport_pc_pci_remove), }; -static int __init parport_pc_init_superio (int autoirq, int autodma) +static int __init parport_pc_init_superio(int autoirq, int autodma) { const struct pci_device_id *id; struct pci_dev *pdev = NULL; @@ -3101,8 +3191,9 @@ static int __init parport_pc_init_superio (int autoirq, int autodma) if (id == NULL || id->driver_data >= last_sio) continue; - if (parport_pc_superio_info[id->driver_data].probe - (pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) { + if (parport_pc_superio_info[id->driver_data].probe( + pdev, autoirq, autodma, + parport_pc_superio_info[id->driver_data].via)) { ret++; } } @@ -3111,7 +3202,10 @@ static int __init parport_pc_init_superio (int autoirq, int autodma) } #else static struct pci_driver parport_pc_pci_driver; -static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;} +static int __init parport_pc_init_superio(int autoirq, int autodma) +{ + return 0; +} #endif /* CONFIG_PCI */ #ifdef CONFIG_PNP @@ -3124,44 +3218,45 @@ static const struct pnp_device_id parport_pc_pnp_tbl[] = { { } }; -MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl); +MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl); -static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id) +static int parport_pc_pnp_probe(struct pnp_dev *dev, + const struct pnp_device_id *id) { struct parport *pdata; unsigned long io_lo, io_hi; int dma, irq; - if (pnp_port_valid(dev,0) && - !(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) { - io_lo = pnp_port_start(dev,0); + if (pnp_port_valid(dev, 0) && + !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) { + io_lo = pnp_port_start(dev, 0); } else return -EINVAL; - if (pnp_port_valid(dev,1) && - !(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) { - io_hi = pnp_port_start(dev,1); + if (pnp_port_valid(dev, 1) && + !(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) { + io_hi = pnp_port_start(dev, 1); } else io_hi = 0; - if (pnp_irq_valid(dev,0) && - !(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) { - irq = pnp_irq(dev,0); + if (pnp_irq_valid(dev, 0) && + !(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) { + irq = pnp_irq(dev, 0); } else irq = PARPORT_IRQ_NONE; - if (pnp_dma_valid(dev,0) && - !(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) { - dma = pnp_dma(dev,0); + if (pnp_dma_valid(dev, 0) && + !(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) { + dma = pnp_dma(dev, 0); } else dma = PARPORT_DMA_NONE; dev_info(&dev->dev, "reported by %s\n", dev->protocol->name); - if (!(pdata = parport_pc_probe_port(io_lo, io_hi, - irq, dma, &dev->dev, 0))) + pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0); + if (pdata == NULL) return -ENODEV; - pnp_set_drvdata(dev,pdata); + pnp_set_drvdata(dev, pdata); return 0; } @@ -3203,7 +3298,7 @@ static struct platform_driver parport_pc_platform_driver = { /* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */ static int __devinit __attribute__((unused)) -parport_pc_find_isa_ports (int autoirq, int autodma) +parport_pc_find_isa_ports(int autoirq, int autodma) { int count = 0; @@ -3227,7 +3322,7 @@ parport_pc_find_isa_ports (int autoirq, int autodma) * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO */ -static void __init parport_pc_find_ports (int autoirq, int autodma) +static void __init parport_pc_find_ports(int autoirq, int autodma) { int count = 0, err; @@ -3261,11 +3356,18 @@ static void __init parport_pc_find_ports (int autoirq, int autodma) * syntax and keep in mind that code below is a cleaned up version. */ -static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 }; -static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = - { [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO }; -static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE }; -static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY }; +static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { + [0 ... PARPORT_PC_MAX_PORTS] = 0 +}; +static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = { + [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO +}; +static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { + [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE +}; +static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { + [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY +}; static int __init parport_parse_param(const char *s, int *val, int automatic, int none, int nofifo) @@ -3306,18 +3408,19 @@ static int __init parport_parse_dma(const char *dmastr, int *val) #ifdef CONFIG_PCI static int __init parport_init_mode_setup(char *str) { - printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str); - - if (!strcmp (str, "spp")) - parport_init_mode=1; - if (!strcmp (str, "ps2")) - parport_init_mode=2; - if (!strcmp (str, "epp")) - parport_init_mode=3; - if (!strcmp (str, "ecp")) - parport_init_mode=4; - if (!strcmp (str, "ecpepp")) - parport_init_mode=5; + printk(KERN_DEBUG + "parport_pc.c: Specified parameter parport_init_mode=%s\n", str); + + if (!strcmp(str, "spp")) + parport_init_mode = 1; + if (!strcmp(str, "ps2")) + parport_init_mode = 2; + if (!strcmp(str, "epp")) + parport_init_mode = 3; + if (!strcmp(str, "ecp")) + parport_init_mode = 4; + if (!strcmp(str, "ecpepp")) + parport_init_mode = 5; return 1; } #endif @@ -3341,7 +3444,8 @@ module_param(verbose_probing, int, 0644); #endif #ifdef CONFIG_PCI static char *init_mode; -MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)"); +MODULE_PARM_DESC(init_mode, + "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)"); module_param(init_mode, charp, 0); #endif @@ -3372,7 +3476,7 @@ static int __init parse_parport_params(void) irqval[0] = val; break; default: - printk (KERN_WARNING + printk(KERN_WARNING "parport_pc: irq specified " "without base address. Use 'io=' " "to specify one\n"); @@ -3385,7 +3489,7 @@ static int __init parse_parport_params(void) dmaval[0] = val; break; default: - printk (KERN_WARNING + printk(KERN_WARNING "parport_pc: dma specified " "without base address. Use 'io=' " "to specify one\n"); @@ -3396,7 +3500,7 @@ static int __init parse_parport_params(void) #else -static int parport_setup_ptr __initdata = 0; +static int parport_setup_ptr __initdata; /* * Acceptable parameters: @@ -3407,7 +3511,7 @@ static int parport_setup_ptr __initdata = 0; * * IRQ/DMA may be numeric or 'auto' or 'none' */ -static int __init parport_setup (char *str) +static int __init parport_setup(char *str) { char *endptr; char *sep; @@ -3419,15 +3523,15 @@ static int __init parport_setup (char *str) return 1; } - if (!strncmp (str, "auto", 4)) { + if (!strncmp(str, "auto", 4)) { irqval[0] = PARPORT_IRQ_AUTO; dmaval[0] = PARPORT_DMA_AUTO; return 1; } - val = simple_strtoul (str, &endptr, 0); + val = simple_strtoul(str, &endptr, 0); if (endptr == str) { - printk (KERN_WARNING "parport=%s not understood\n", str); + printk(KERN_WARNING "parport=%s not understood\n", str); return 1; } @@ -3461,7 +3565,7 @@ static int __init parse_parport_params(void) return io[0] == PARPORT_DISABLE; } -__setup ("parport=", parport_setup); +__setup("parport=", parport_setup); /* * Acceptable parameters: @@ -3469,7 +3573,7 @@ __setup ("parport=", parport_setup); * parport_init_mode=[spp|ps2|epp|ecp|ecpepp] */ #ifdef CONFIG_PCI -__setup("parport_init_mode=",parport_init_mode_setup); +__setup("parport_init_mode=", parport_init_mode_setup); #endif #endif @@ -3493,13 +3597,13 @@ static int __init parport_pc_init(void) for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) { if (!io[i]) break; - if ((io_hi[i]) == PARPORT_IOHI_AUTO) - io_hi[i] = 0x400 + io[i]; + if (io_hi[i] == PARPORT_IOHI_AUTO) + io_hi[i] = 0x400 + io[i]; parport_pc_probe_port(io[i], io_hi[i], - irqval[i], dmaval[i], NULL, 0); + irqval[i], dmaval[i], NULL, 0); } } else - parport_pc_find_ports (irqval[0], dmaval[0]); + parport_pc_find_ports(irqval[0], dmaval[0]); return 0; } @@ -3507,9 +3611,9 @@ static int __init parport_pc_init(void) static void __exit parport_pc_exit(void) { if (pci_registered_parport) - pci_unregister_driver (&parport_pc_pci_driver); + pci_unregister_driver(&parport_pc_pci_driver); if (pnp_registered_parport) - pnp_unregister_driver (&parport_pc_pnp_driver); + pnp_unregister_driver(&parport_pc_pnp_driver); platform_driver_unregister(&parport_pc_platform_driver); while (!list_empty(&ports_list)) { diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index a0127e93ade..fb867a9f55e 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO, }, + [PORT_AR7] = { + .name = "AR7", + .fifo_size = 16, + .tx_loadsz = 16, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00, + .flags = UART_CAP_FIFO | UART_CAP_AFE, + }, }; #if defined (CONFIG_SERIAL_8250_AU1X00) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 938bc1b6c3f..e371a9c1534 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_OXSEMI, 0x950a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_2_1130000 }, + { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950, + PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0, + pbn_b0_1_921600 }, { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b0_4_115200 }, diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 343e3a35b6a..641e800ed69 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -833,6 +833,7 @@ config SERIAL_IMX bool "IMX serial port support" depends on ARM && (ARCH_IMX || ARCH_MXC) select SERIAL_CORE + select RATIONAL help If you have a machine based on a Motorola IMX CPU you can enable its onboard serial port by enabling this option. @@ -1433,4 +1434,11 @@ config SPORT_BAUD_RATE default 19200 if (SERIAL_SPORT_BAUD_RATE_19200) default 9600 if (SERIAL_SPORT_BAUD_RATE_9600) +config SERIAL_TIMBERDALE + tristate "Support for timberdale UART" + depends on MFD_TIMBERDALE + select SERIAL_CORE + ---help--- + Add support for UART controller on timberdale. + endmenu diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile index d438eb2a73d..45a8658f54d 100644 --- a/drivers/serial/Makefile +++ b/drivers/serial/Makefile @@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o obj-$(CONFIG_SERIAL_QE) += ucc_uart.o +obj-$(CONFIG_SERIAL_TIMBERDALE) += timbuart.o diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index d86123e0339..e2f6b1bfac9 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -330,6 +330,11 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart) /* Clear TFI bit */ UART_PUT_LSR(uart, TFI); #endif + /* Anomaly notes: + * 05000215 - we always clear ETBEI within last UART TX + * interrupt to end a string. It is always set + * when start a new tx. + */ UART_CLEAR_IER(uart, ETBEI); return; } @@ -415,6 +420,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart) set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail)); set_dma_x_count(uart->tx_dma_channel, uart->tx_count); set_dma_x_modify(uart->tx_dma_channel, 1); + SSYNC(); enable_dma(uart->tx_dma_channel); UART_SET_IER(uart, ETBEI); @@ -473,27 +479,41 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart) void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart) { int x_pos, pos; - unsigned long flags; - - spin_lock_irqsave(&uart->port.lock, flags); + dma_disable_irq(uart->rx_dma_channel); + spin_lock_bh(&uart->port.lock); + + /* 2D DMA RX buffer ring is used. Because curr_y_count and + * curr_x_count can't be read as an atomic operation, + * curr_y_count should be read before curr_x_count. When + * curr_x_count is read, curr_y_count may already indicate + * next buffer line. But, the position calculated here is + * still indicate the old line. The wrong position data may + * be smaller than current buffer tail, which cause garbages + * are received if it is not prohibit. + */ uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel); x_pos = get_dma_curr_xcount(uart->rx_dma_channel); uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows; - if (uart->rx_dma_nrows == DMA_RX_YCOUNT) + if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0) uart->rx_dma_nrows = 0; x_pos = DMA_RX_XCOUNT - x_pos; if (x_pos == DMA_RX_XCOUNT) x_pos = 0; pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos; - if (pos != uart->rx_dma_buf.tail) { + /* Ignore receiving data if new position is in the same line of + * current buffer tail and small. + */ + if (pos > uart->rx_dma_buf.tail || + uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) { uart->rx_dma_buf.head = pos; bfin_serial_dma_rx_chars(uart); uart->rx_dma_buf.tail = uart->rx_dma_buf.head; } - spin_unlock_irqrestore(&uart->port.lock, flags); + spin_unlock_bh(&uart->port.lock); + dma_enable_irq(uart->rx_dma_channel); mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES); } @@ -514,6 +534,11 @@ static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id) if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) { disable_dma(uart->tx_dma_channel); clear_dma_irqstat(uart->tx_dma_channel); + /* Anomaly notes: + * 05000215 - we always clear ETBEI within last UART TX + * interrupt to end a string. It is always set + * when start a new tx. + */ UART_CLEAR_IER(uart, ETBEI); xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1); uart->port.icount.tx += uart->tx_count; @@ -532,11 +557,26 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id) { struct bfin_serial_port *uart = dev_id; unsigned short irqstat; + int x_pos, pos; spin_lock(&uart->port.lock); irqstat = get_dma_curr_irqstat(uart->rx_dma_channel); clear_dma_irqstat(uart->rx_dma_channel); - bfin_serial_dma_rx_chars(uart); + + uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel); + x_pos = get_dma_curr_xcount(uart->rx_dma_channel); + uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows; + if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0) + uart->rx_dma_nrows = 0; + + pos = uart->rx_dma_nrows * DMA_RX_XCOUNT; + if (pos > uart->rx_dma_buf.tail || + uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) { + uart->rx_dma_buf.head = pos; + bfin_serial_dma_rx_chars(uart); + uart->rx_dma_buf.tail = uart->rx_dma_buf.head; + } + spin_unlock(&uart->port.lock); return IRQ_HANDLED; @@ -789,8 +829,16 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios, __func__); } - if (termios->c_cflag & CSTOPB) - lcr |= STB; + /* Anomaly notes: + * 05000231 - STOP bit is always set to 1 whatever the user is set. + */ + if (termios->c_cflag & CSTOPB) { + if (ANOMALY_05000231) + printk(KERN_WARNING "STOP bits other than 1 is not " + "supported in case of anomaly 05000231.\n"); + else + lcr |= STB; + } if (termios->c_cflag & PARENB) lcr |= PEN; if (!(termios->c_cflag & PARODD)) @@ -940,6 +988,10 @@ static void bfin_serial_reset_irda(struct uart_port *port) } #ifdef CONFIG_CONSOLE_POLL +/* Anomaly notes: + * 05000099 - Because we only use THRE in poll_put and DR in poll_get, + * losing other bits of UART_LSR is not a problem here. + */ static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr) { struct bfin_serial_port *uart = (struct bfin_serial_port *)port; @@ -1245,12 +1297,17 @@ static __init void early_serial_write(struct console *con, const char *s, } } +/* + * This should have a .setup or .early_setup in it, but then things get called + * without the command line options, and the baud rate gets messed up - so + * don't let the common infrastructure play with things. (see calls to setup + * & earlysetup in ./kernel/printk.c:register_console() + */ static struct __initdata console bfin_early_serial_console = { .name = "early_BFuart", .write = early_serial_write, .device = uart_console_device, .flags = CON_PRINTBUFFER, - .setup = bfin_serial_console_setup, .index = -1, .data = &bfin_serial_reg, }; diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c index 529c0ff7952..34b4ae0fe76 100644 --- a/drivers/serial/bfin_sport_uart.c +++ b/drivers/serial/bfin_sport_uart.c @@ -101,15 +101,16 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) { pr_debug("%s value:%x\n", __func__, value); /* Place a Start and Stop bit */ - __asm__ volatile ( - "R2 = b#01111111100;\n\t" - "R3 = b#10000000001;\n\t" - "%0 <<= 2;\n\t" - "%0 = %0 & R2;\n\t" - "%0 = %0 | R3;\n\t" - :"=r"(value) - :"0"(value) - :"R2", "R3"); + __asm__ __volatile__ ( + "R2 = b#01111111100;" + "R3 = b#10000000001;" + "%0 <<= 2;" + "%0 = %0 & R2;" + "%0 = %0 | R3;" + : "=d"(value) + : "d"(value) + : "ASTAT", "R2", "R3" + ); pr_debug("%s value:%x\n", __func__, value); SPORT_PUT_TX(up, value); @@ -118,27 +119,30 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value) static inline unsigned int rx_one_byte(struct sport_uart_port *up) { unsigned int value, extract; + u32 tmp_mask1, tmp_mask2, tmp_shift, tmp; value = SPORT_GET_RX32(up); pr_debug("%s value:%x\n", __func__, value); /* Extract 8 bits data */ - __asm__ volatile ( - "R5 = 0;\n\t" - "P0 = 8;\n\t" - "R1 = 0x1801(Z);\n\t" - "R3 = 0x0300(Z);\n\t" - "R4 = 0;\n\t" - "LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t" - "R2 = extract(%1, R1.L)(Z);\n\t" - "R2 <<= R4;\n\t" - "R5 = R5 | R2;\n\t" - "R1 = R1 - R3;\nloop_e:\t" - "R4 += 1;\n\t" - "%0 = R5;\n\t" - :"=r"(extract) - :"r"(value) - :"P0", "R1", "R2","R3","R4", "R5"); + __asm__ __volatile__ ( + "%[extr] = 0;" + "%[mask1] = 0x1801(Z);" + "%[mask2] = 0x0300(Z);" + "%[shift] = 0;" + "LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];" + ".Lloop_s:" + "%[tmp] = extract(%[val], %[mask1].L)(Z);" + "%[tmp] <<= %[shift];" + "%[extr] = %[extr] | %[tmp];" + "%[mask1] = %[mask1] - %[mask2];" + ".Lloop_e:" + "%[shift] += 1;" + : [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp), + [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2) + : "d"(value), [lc]"a"(8) + : "ASTAT", "LB0", "LC0", "LT0" + ); pr_debug(" extract:%x\n", extract); return extract; @@ -149,7 +153,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate) int tclkdiv, tfsdiv, rclkdiv; /* Set TCR1 and TCR2 */ - SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK)); + SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK)); SPORT_PUT_TCR2(up, 10); pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up)); @@ -419,7 +423,7 @@ static void sport_shutdown(struct uart_port *port) } static void sport_set_termios(struct uart_port *port, - struct termios *termios, struct termios *old) + struct ktermios *termios, struct ktermios *old) { pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag); uart_update_timeout(port, CS8 ,port->uartclk); diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c index a461b3b2c72..9f2891c2c4a 100644 --- a/drivers/serial/icom.c +++ b/drivers/serial/icom.c @@ -137,7 +137,12 @@ static LIST_HEAD(icom_adapter_head); static spinlock_t icom_lock; #ifdef ICOM_TRACE -static inline void trace(struct icom_port *, char *, unsigned long) {}; +static inline void trace(struct icom_port *icom_port, char *trace_pt, + unsigned long trace_data) +{ + dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n", + icom_port->port, trace_pt, trace_data); +} #else static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {}; #endif @@ -408,7 +413,7 @@ static void load_code(struct icom_port *icom_port) release_firmware(fw); /* Set Hardware level */ - if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2) + if (icom_port->adapter->version == ADAPTER_V2) writeb(V2_HARDWARE, &(icom_port->dram->misc_flags)); /* Start the processor in Adapter */ @@ -861,7 +866,7 @@ static irqreturn_t icom_interrupt(int irq, void *dev_id) /* find icom_port for this interrupt */ icom_adapter = (struct icom_adapter *) dev_id; - if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) { + if (icom_adapter->version == ADAPTER_V2) { int_reg = icom_adapter->base_addr + 0x8024; adapter_interrupts = readl(int_reg); @@ -1647,15 +1652,6 @@ static void __exit icom_exit(void) module_init(icom_init); module_exit(icom_exit); -#ifdef ICOM_TRACE -static inline void trace(struct icom_port *icom_port, char *trace_pt, - unsigned long trace_data) -{ - dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n", - icom_port->port, trace_pt, trace_data); -} -#endif - MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>"); MODULE_DESCRIPTION("IBM iSeries Serial IOA driver"); MODULE_SUPPORTED_DEVICE diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 5f0be40dfda..7b5d1de9cfe 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -8,6 +8,9 @@ * Author: Sascha Hauer <sascha@saschahauer.de> * Copyright (C) 2004 Pengutronix * + * Copyright (C) 2009 emlix GmbH + * Author: Fabian Godehardt (added IrDA support for iMX) + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -41,6 +44,8 @@ #include <linux/serial_core.h> #include <linux/serial.h> #include <linux/clk.h> +#include <linux/delay.h> +#include <linux/rational.h> #include <asm/io.h> #include <asm/irq.h> @@ -148,6 +153,7 @@ #define UCR4_DREN (1<<0) /* Recv data ready interrupt enable */ #define UFCR_RXTL_SHF 0 /* Receiver trigger level shift */ #define UFCR_RFDIV (7<<7) /* Reference freq divider mask */ +#define UFCR_RFDIV_REG(x) (((x) < 7 ? 6 - (x) : 6) << 7) #define UFCR_TXTL_SHF 10 /* Transmitter trigger level shift */ #define USR1_PARITYERR (1<<15) /* Parity error interrupt flag */ #define USR1_RTSS (1<<14) /* RTS pin status */ @@ -211,10 +217,20 @@ struct imx_port { struct timer_list timer; unsigned int old_status; int txirq,rxirq,rtsirq; - int have_rtscts:1; + unsigned int have_rtscts:1; + unsigned int use_irda:1; + unsigned int irda_inv_rx:1; + unsigned int irda_inv_tx:1; + unsigned short trcv_delay; /* transceiver delay */ struct clk *clk; }; +#ifdef CONFIG_IRDA +#define USE_IRDA(sport) ((sport)->use_irda) +#else +#define USE_IRDA(sport) (0) +#endif + /* * Handle any change of modem status signal since we were last called. */ @@ -268,6 +284,48 @@ static void imx_stop_tx(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long temp; + if (USE_IRDA(sport)) { + /* half duplex - wait for end of transmission */ + int n = 256; + while ((--n > 0) && + !(readl(sport->port.membase + USR2) & USR2_TXDC)) { + udelay(5); + barrier(); + } + /* + * irda transceiver - wait a bit more to avoid + * cutoff, hardware dependent + */ + udelay(sport->trcv_delay); + + /* + * half duplex - reactivate receive mode, + * flush receive pipe echo crap + */ + if (readl(sport->port.membase + USR2) & USR2_TXDC) { + temp = readl(sport->port.membase + UCR1); + temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN); + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR4); + temp &= ~(UCR4_TCEN); + writel(temp, sport->port.membase + UCR4); + + while (readl(sport->port.membase + URXD0) & + URXD_CHARRDY) + barrier(); + + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_RRDYEN; + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR4); + temp |= UCR4_DREN; + writel(temp, sport->port.membase + UCR4); + } + return; + } + temp = readl(sport->port.membase + UCR1); writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1); } @@ -302,13 +360,15 @@ static inline void imx_transmit_buffer(struct imx_port *sport) /* send xmit->buf[xmit->tail] * out the port here */ writel(xmit->buf[xmit->tail], sport->port.membase + URTX0); - xmit->tail = (xmit->tail + 1) & - (UART_XMIT_SIZE - 1); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); sport->port.icount.tx++; if (uart_circ_empty(xmit)) break; } + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&sport->port); + if (uart_circ_empty(xmit)) imx_stop_tx(&sport->port); } @@ -321,9 +381,30 @@ static void imx_start_tx(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long temp; + if (USE_IRDA(sport)) { + /* half duplex in IrDA mode; have to disable receive mode */ + temp = readl(sport->port.membase + UCR4); + temp &= ~(UCR4_DREN); + writel(temp, sport->port.membase + UCR4); + + temp = readl(sport->port.membase + UCR1); + temp &= ~(UCR1_RRDYEN); + writel(temp, sport->port.membase + UCR1); + } + temp = readl(sport->port.membase + UCR1); writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1); + if (USE_IRDA(sport)) { + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_TRDYEN; + writel(temp, sport->port.membase + UCR1); + + temp = readl(sport->port.membase + UCR4); + temp |= UCR4_TCEN; + writel(temp, sport->port.membase + UCR4); + } + if (readl(sport->port.membase + UTS) & UTS_TXEMPTY) imx_transmit_buffer(sport); } @@ -395,8 +476,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id) continue; } - if (uart_handle_sysrq_char - (&sport->port, (unsigned char)rx)) + if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx)) continue; if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) { @@ -471,26 +551,26 @@ static unsigned int imx_tx_empty(struct uart_port *port) */ static unsigned int imx_get_mctrl(struct uart_port *port) { - struct imx_port *sport = (struct imx_port *)port; - unsigned int tmp = TIOCM_DSR | TIOCM_CAR; + struct imx_port *sport = (struct imx_port *)port; + unsigned int tmp = TIOCM_DSR | TIOCM_CAR; - if (readl(sport->port.membase + USR1) & USR1_RTSS) - tmp |= TIOCM_CTS; + if (readl(sport->port.membase + USR1) & USR1_RTSS) + tmp |= TIOCM_CTS; - if (readl(sport->port.membase + UCR2) & UCR2_CTS) - tmp |= TIOCM_RTS; + if (readl(sport->port.membase + UCR2) & UCR2_CTS) + tmp |= TIOCM_RTS; - return tmp; + return tmp; } static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl) { - struct imx_port *sport = (struct imx_port *)port; + struct imx_port *sport = (struct imx_port *)port; unsigned long temp; temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS; - if (mctrl & TIOCM_RTS) + if (mctrl & TIOCM_RTS) temp |= UCR2_CTS; writel(temp, sport->port.membase + UCR2); @@ -534,12 +614,7 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode) if(!ufcr_rfdiv) ufcr_rfdiv = 1; - if(ufcr_rfdiv >= 7) - ufcr_rfdiv = 6; - else - ufcr_rfdiv = 6 - ufcr_rfdiv; - - val |= UFCR_RFDIV & (ufcr_rfdiv << 7); + val |= UFCR_RFDIV_REG(ufcr_rfdiv); writel(val, sport->port.membase + UFCR); @@ -558,8 +633,24 @@ static int imx_startup(struct uart_port *port) * requesting IRQs */ temp = readl(sport->port.membase + UCR4); + + if (USE_IRDA(sport)) + temp |= UCR4_IRSC; + writel(temp & ~UCR4_DREN, sport->port.membase + UCR4); + if (USE_IRDA(sport)) { + /* reset fifo's and state machines */ + int i = 100; + temp = readl(sport->port.membase + UCR2); + temp &= ~UCR2_SRST; + writel(temp, sport->port.membase + UCR2); + while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) && + (--i > 0)) { + udelay(1); + } + } + /* * Allocate the IRQ(s) i.MX1 has three interrupts whereas later * chips only have one interrupt. @@ -575,12 +666,16 @@ static int imx_startup(struct uart_port *port) if (retval) goto error_out2; - retval = request_irq(sport->rtsirq, imx_rtsint, - (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 : - IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING, - DRIVER_NAME, sport); - if (retval) - goto error_out3; + /* do not use RTS IRQ on IrDA */ + if (!USE_IRDA(sport)) { + retval = request_irq(sport->rtsirq, imx_rtsint, + (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 : + IRQF_TRIGGER_FALLING | + IRQF_TRIGGER_RISING, + DRIVER_NAME, sport); + if (retval) + goto error_out3; + } } else { retval = request_irq(sport->port.irq, imx_int, 0, DRIVER_NAME, sport); @@ -597,18 +692,49 @@ static int imx_startup(struct uart_port *port) temp = readl(sport->port.membase + UCR1); temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN; + + if (USE_IRDA(sport)) { + temp |= UCR1_IREN; + temp &= ~(UCR1_RTSDEN); + } + writel(temp, sport->port.membase + UCR1); temp = readl(sport->port.membase + UCR2); temp |= (UCR2_RXEN | UCR2_TXEN); writel(temp, sport->port.membase + UCR2); + if (USE_IRDA(sport)) { + /* clear RX-FIFO */ + int i = 64; + while ((--i > 0) && + (readl(sport->port.membase + URXD0) & URXD_CHARRDY)) { + barrier(); + } + } + #if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3 temp = readl(sport->port.membase + UCR3); temp |= UCR3_RXDMUXSEL; writel(temp, sport->port.membase + UCR3); #endif + if (USE_IRDA(sport)) { + temp = readl(sport->port.membase + UCR4); + if (sport->irda_inv_rx) + temp |= UCR4_INVR; + else + temp &= ~(UCR4_INVR); + writel(temp | UCR4_DREN, sport->port.membase + UCR4); + + temp = readl(sport->port.membase + UCR3); + if (sport->irda_inv_tx) + temp |= UCR3_INVT; + else + temp &= ~(UCR3_INVT); + writel(temp, sport->port.membase + UCR3); + } + /* * Enable modem status interrupts */ @@ -616,6 +742,16 @@ static int imx_startup(struct uart_port *port) imx_enable_ms(&sport->port); spin_unlock_irqrestore(&sport->port.lock,flags); + if (USE_IRDA(sport)) { + struct imxuart_platform_data *pdata; + pdata = sport->port.dev->platform_data; + sport->irda_inv_rx = pdata->irda_inv_rx; + sport->irda_inv_tx = pdata->irda_inv_tx; + sport->trcv_delay = pdata->transceiver_delay; + if (pdata->irda_enable) + pdata->irda_enable(1); + } + return 0; error_out3: @@ -633,6 +769,17 @@ static void imx_shutdown(struct uart_port *port) struct imx_port *sport = (struct imx_port *)port; unsigned long temp; + temp = readl(sport->port.membase + UCR2); + temp &= ~(UCR2_TXEN); + writel(temp, sport->port.membase + UCR2); + + if (USE_IRDA(sport)) { + struct imxuart_platform_data *pdata; + pdata = sport->port.dev->platform_data; + if (pdata->irda_enable) + pdata->irda_enable(0); + } + /* * Stop our timer. */ @@ -642,7 +789,8 @@ static void imx_shutdown(struct uart_port *port) * Free the interrupts */ if (sport->txirq > 0) { - free_irq(sport->rtsirq, sport); + if (!USE_IRDA(sport)) + free_irq(sport->rtsirq, sport); free_irq(sport->txirq, sport); free_irq(sport->rxirq, sport); } else @@ -654,6 +802,9 @@ static void imx_shutdown(struct uart_port *port) temp = readl(sport->port.membase + UCR1); temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); + if (USE_IRDA(sport)) + temp &= ~(UCR1_IREN); + writel(temp, sport->port.membase + UCR1); } @@ -665,7 +816,9 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, unsigned long flags; unsigned int ucr2, old_ucr1, old_txrxen, baud, quot; unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8; - unsigned int div, num, denom, ufcr; + unsigned int div, ufcr; + unsigned long num, denom; + uint64_t tdiv64; /* * If we don't support modem control lines, don't allow @@ -761,38 +914,39 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios, sport->port.membase + UCR2); old_txrxen &= (UCR2_TXEN | UCR2_RXEN); - div = sport->port.uartclk / (baud * 16); - if (div > 7) - div = 7; - if (!div) + if (USE_IRDA(sport)) { + /* + * use maximum available submodule frequency to + * avoid missing short pulses due to low sampling rate + */ div = 1; - - num = baud; - denom = port->uartclk / div / 16; - - /* shift num and denom right until they fit into 16 bits */ - while (num > 0x10000 || denom > 0x10000) { - num >>= 1; - denom >>= 1; + } else { + div = sport->port.uartclk / (baud * 16); + if (div > 7) + div = 7; + if (!div) + div = 1; } - if (num > 0) - num -= 1; - if (denom > 0) - denom -= 1; - writel(num, sport->port.membase + UBIR); - writel(denom, sport->port.membase + UBMR); + rational_best_approximation(16 * div * baud, sport->port.uartclk, + 1 << 16, 1 << 16, &num, &denom); - if (div == 7) - div = 6; /* 6 in RFDIV means divide by 7 */ - else - div = 6 - div; + tdiv64 = sport->port.uartclk; + tdiv64 *= num; + do_div(tdiv64, denom * 16 * div); + tty_encode_baud_rate(sport->port.info->port.tty, + (speed_t)tdiv64, (speed_t)tdiv64); + + num -= 1; + denom -= 1; ufcr = readl(sport->port.membase + UFCR); - ufcr = (ufcr & (~UFCR_RFDIV)) | - (div << 7); + ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div); writel(ufcr, sport->port.membase + UFCR); + writel(num, sport->port.membase + UBIR); + writel(denom, sport->port.membase + UBMR); + #ifdef ONEMS writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS); #endif @@ -1072,22 +1226,22 @@ static struct uart_driver imx_reg = { static int serial_imx_suspend(struct platform_device *dev, pm_message_t state) { - struct imx_port *sport = platform_get_drvdata(dev); + struct imx_port *sport = platform_get_drvdata(dev); - if (sport) - uart_suspend_port(&imx_reg, &sport->port); + if (sport) + uart_suspend_port(&imx_reg, &sport->port); - return 0; + return 0; } static int serial_imx_resume(struct platform_device *dev) { - struct imx_port *sport = platform_get_drvdata(dev); + struct imx_port *sport = platform_get_drvdata(dev); - if (sport) - uart_resume_port(&imx_reg, &sport->port); + if (sport) + uart_resume_port(&imx_reg, &sport->port); - return 0; + return 0; } static int serial_imx_probe(struct platform_device *pdev) @@ -1143,19 +1297,29 @@ static int serial_imx_probe(struct platform_device *pdev) imx_ports[pdev->id] = sport; pdata = pdev->dev.platform_data; - if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS)) + if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS)) sport->have_rtscts = 1; +#ifdef CONFIG_IRDA + if (pdata && (pdata->flags & IMXUART_IRDA)) + sport->use_irda = 1; +#endif + if (pdata->init) { ret = pdata->init(pdev); if (ret) goto clkput; } - uart_add_one_port(&imx_reg, &sport->port); + ret = uart_add_one_port(&imx_reg, &sport->port); + if (ret) + goto deinit; platform_set_drvdata(pdev, &sport->port); return 0; +deinit: + if (pdata->exit) + pdata->exit(pdev); clkput: clk_put(sport->clk); clk_disable(sport->clk); @@ -1193,13 +1357,13 @@ static int serial_imx_remove(struct platform_device *pdev) } static struct platform_driver serial_imx_driver = { - .probe = serial_imx_probe, - .remove = serial_imx_remove, + .probe = serial_imx_probe, + .remove = serial_imx_remove, .suspend = serial_imx_suspend, .resume = serial_imx_resume, .driver = { - .name = "imx-uart", + .name = "imx-uart", .owner = THIS_MODULE, }, }; diff --git a/drivers/serial/jsm/jsm.h b/drivers/serial/jsm/jsm.h index c0a3e2734e2..4e5f3bde046 100644 --- a/drivers/serial/jsm/jsm.h +++ b/drivers/serial/jsm/jsm.h @@ -61,6 +61,7 @@ enum { if ((DBG_##nlevel & jsm_debug)) \ dev_printk(KERN_##klevel, pdev->dev, fmt, ## args) +#define MAXLINES 256 #define MAXPORTS 8 #define MAX_STOPS_SENT 5 diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c index 31496dc0a0d..107ce2e187b 100644 --- a/drivers/serial/jsm/jsm_tty.c +++ b/drivers/serial/jsm/jsm_tty.c @@ -33,6 +33,8 @@ #include "jsm.h" +static DECLARE_BITMAP(linemap, MAXLINES); + static void jsm_carrier(struct jsm_channel *ch); static inline int jsm_get_mstat(struct jsm_channel *ch) @@ -433,6 +435,7 @@ int __devinit jsm_tty_init(struct jsm_board *brd) int __devinit jsm_uart_port_init(struct jsm_board *brd) { int i; + unsigned int line; struct jsm_channel *ch; if (!brd) @@ -459,9 +462,15 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd) brd->channels[i]->uart_port.membase = brd->re_map_membase; brd->channels[i]->uart_port.fifosize = 16; brd->channels[i]->uart_port.ops = &jsm_ops; - brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2; + line = find_first_zero_bit(linemap, MAXLINES); + if (line >= MAXLINES) { + printk(KERN_INFO "jsm: linemap is full, added device failed\n"); + continue; + } else + set_bit((int)line, linemap); + brd->channels[i]->uart_port.line = line; if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port)) - printk(KERN_INFO "Added device failed\n"); + printk(KERN_INFO "jsm: add device failed\n"); else printk(KERN_INFO "Added device \n"); } @@ -494,6 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd) ch = brd->channels[i]; + clear_bit((int)(ch->uart_port.line), linemap); uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port); } diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c new file mode 100644 index 00000000000..ac9e5d5f742 --- /dev/null +++ b/drivers/serial/timbuart.c @@ -0,0 +1,526 @@ +/* + * timbuart.c timberdale FPGA UART driver + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA UART + */ + +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/serial_core.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/ioport.h> + +#include "timbuart.h" + +struct timbuart_port { + struct uart_port port; + struct tasklet_struct tasklet; + int usedma; + u8 last_ier; + struct platform_device *dev; +}; + +static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800, + 921600, 1843200, 3250000}; + +static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier); + +static irqreturn_t timbuart_handleinterrupt(int irq, void *devid); + +static void timbuart_stop_rx(struct uart_port *port) +{ + /* spin lock held by upper layer, disable all RX interrupts */ + u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS; + iowrite8(ier, port->membase + TIMBUART_IER); +} + +static void timbuart_stop_tx(struct uart_port *port) +{ + /* spinlock held by upper layer, disable TX interrupt */ + u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE; + iowrite8(ier, port->membase + TIMBUART_IER); +} + +static void timbuart_start_tx(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + + /* do not transfer anything here -> fire off the tasklet */ + tasklet_schedule(&uart->tasklet); +} + +static void timbuart_flush_buffer(struct uart_port *port) +{ + u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX; + + iowrite8(ctl, port->membase + TIMBUART_CTRL); + iowrite8(TXBF, port->membase + TIMBUART_ISR); +} + +static void timbuart_rx_chars(struct uart_port *port) +{ + struct tty_struct *tty = port->info->port.tty; + + while (ioread8(port->membase + TIMBUART_ISR) & RXDP) { + u8 ch = ioread8(port->membase + TIMBUART_RXFIFO); + port->icount.rx++; + tty_insert_flip_char(tty, ch, TTY_NORMAL); + } + + spin_unlock(&port->lock); + tty_flip_buffer_push(port->info->port.tty); + spin_lock(&port->lock); + + dev_dbg(port->dev, "%s - total read %d bytes\n", + __func__, port->icount.rx); +} + +static void timbuart_tx_chars(struct uart_port *port) +{ + struct circ_buf *xmit = &port->info->xmit; + + while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) && + !uart_circ_empty(xmit)) { + iowrite8(xmit->buf[xmit->tail], + port->membase + TIMBUART_TXFIFO); + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + port->icount.tx++; + } + + dev_dbg(port->dev, + "%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n", + __func__, + port->icount.tx, + ioread8(port->membase + TIMBUART_CTRL), + port->mctrl & TIOCM_RTS, + ioread8(port->membase + TIMBUART_BAUDRATE)); +} + +static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + struct circ_buf *xmit = &port->info->xmit; + + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) + return; + + if (port->x_char) + return; + + if (isr & TXFLAGS) { + timbuart_tx_chars(port); + /* clear all TX interrupts */ + iowrite8(TXFLAGS, port->membase + TIMBUART_ISR); + + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + } else + /* Re-enable any tx interrupt */ + *ier |= uart->last_ier & TXFLAGS; + + /* enable interrupts if there are chars in the transmit buffer, + * Or if we delivered some bytes and want the almost empty interrupt + * we wake up the upper layer later when we got the interrupt + * to give it some time to go out... + */ + if (!uart_circ_empty(xmit)) + *ier |= TXBAE; + + dev_dbg(port->dev, "%s - leaving\n", __func__); +} + +void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier) +{ + if (isr & RXFLAGS) { + /* Some RX status is set */ + if (isr & RXBF) { + u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | + TIMBUART_CTRL_FLSHRX; + iowrite8(ctl, port->membase + TIMBUART_CTRL); + port->icount.overrun++; + } else if (isr & (RXDP)) + timbuart_rx_chars(port); + + /* ack all RX interrupts */ + iowrite8(RXFLAGS, port->membase + TIMBUART_ISR); + } + + /* always have the RX interrupts enabled */ + *ier |= RXBAF | RXBF | RXTT; + + dev_dbg(port->dev, "%s - leaving\n", __func__); +} + +void timbuart_tasklet(unsigned long arg) +{ + struct timbuart_port *uart = (struct timbuart_port *)arg; + u8 isr, ier = 0; + + spin_lock(&uart->port.lock); + + isr = ioread8(uart->port.membase + TIMBUART_ISR); + dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr); + + if (!uart->usedma) + timbuart_handle_tx_port(&uart->port, isr, &ier); + + timbuart_mctrl_check(&uart->port, isr, &ier); + + if (!uart->usedma) + timbuart_handle_rx_port(&uart->port, isr, &ier); + + iowrite8(ier, uart->port.membase + TIMBUART_IER); + + spin_unlock(&uart->port.lock); + dev_dbg(uart->port.dev, "%s leaving\n", __func__); +} + +static unsigned int timbuart_tx_empty(struct uart_port *port) +{ + u8 isr = ioread8(port->membase + TIMBUART_ISR); + + return (isr & TXBAE) ? TIOCSER_TEMT : 0; +} + +static unsigned int timbuart_get_mctrl(struct uart_port *port) +{ + u8 cts = ioread8(port->membase + TIMBUART_CTRL); + dev_dbg(port->dev, "%s - cts %x\n", __func__, cts); + + if (cts & TIMBUART_CTRL_CTS) + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; + else + return TIOCM_DSR | TIOCM_CAR; +} + +static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + dev_dbg(port->dev, "%s - %x\n", __func__, mctrl); + + if (mctrl & TIOCM_RTS) + iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL); + else + iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL); +} + +static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier) +{ + unsigned int cts; + + if (isr & CTS_DELTA) { + /* ack */ + iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR); + cts = timbuart_get_mctrl(port); + uart_handle_cts_change(port, cts & TIOCM_CTS); + wake_up_interruptible(&port->info->delta_msr_wait); + } + + *ier |= CTS_DELTA; +} + +static void timbuart_enable_ms(struct uart_port *port) +{ + /* N/A */ +} + +static void timbuart_break_ctl(struct uart_port *port, int ctl) +{ + /* N/A */ +} + +static int timbuart_startup(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + + dev_dbg(port->dev, "%s\n", __func__); + + iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL); + iowrite8(0xff, port->membase + TIMBUART_ISR); + /* Enable all but TX interrupts */ + iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA, + port->membase + TIMBUART_IER); + + return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED, + "timb-uart", uart); +} + +static void timbuart_shutdown(struct uart_port *port) +{ + struct timbuart_port *uart = + container_of(port, struct timbuart_port, port); + dev_dbg(port->dev, "%s\n", __func__); + free_irq(port->irq, uart); + iowrite8(0, port->membase + TIMBUART_IER); +} + +static int get_bindex(int baud) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(baudrates); i++) + if (baud <= baudrates[i]) + return i; + + return -1; +} + +static void timbuart_set_termios(struct uart_port *port, + struct ktermios *termios, + struct ktermios *old) +{ + unsigned int baud; + short bindex; + unsigned long flags; + + baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16); + bindex = get_bindex(baud); + dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex); + + if (bindex < 0) + bindex = 0; + baud = baudrates[bindex]; + + /* The serial layer calls into this once with old = NULL when setting + up initially */ + if (old) + tty_termios_copy_hw(termios, old); + tty_termios_encode_baud_rate(termios, baud, baud); + + spin_lock_irqsave(&port->lock, flags); + iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE); + uart_update_timeout(port, termios->c_cflag, baud); + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *timbuart_type(struct uart_port *port) +{ + return port->type == PORT_UNKNOWN ? "timbuart" : NULL; +} + +/* We do not request/release mappings of the registers here, + * currently it's done in the proble function. + */ +static void timbuart_release_port(struct uart_port *port) +{ + struct platform_device *pdev = to_platform_device(port->dev); + int size = + resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0)); + + if (port->flags & UPF_IOREMAP) { + iounmap(port->membase); + port->membase = NULL; + } + + release_mem_region(port->mapbase, size); +} + +static int timbuart_request_port(struct uart_port *port) +{ + struct platform_device *pdev = to_platform_device(port->dev); + int size = + resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0)); + + if (!request_mem_region(port->mapbase, size, "timb-uart")) + return -EBUSY; + + if (port->flags & UPF_IOREMAP) { + port->membase = ioremap(port->mapbase, size); + if (port->membase == NULL) { + release_mem_region(port->mapbase, size); + return -ENOMEM; + } + } + + return 0; +} + +static irqreturn_t timbuart_handleinterrupt(int irq, void *devid) +{ + struct timbuart_port *uart = (struct timbuart_port *)devid; + + if (ioread8(uart->port.membase + TIMBUART_IPR)) { + uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER); + + /* disable interrupts, the tasklet enables them again */ + iowrite8(0, uart->port.membase + TIMBUART_IER); + + /* fire off bottom half */ + tasklet_schedule(&uart->tasklet); + + return IRQ_HANDLED; + } else + return IRQ_NONE; +} + +/* + * Configure/autoconfigure the port. + */ +static void timbuart_config_port(struct uart_port *port, int flags) +{ + if (flags & UART_CONFIG_TYPE) { + port->type = PORT_TIMBUART; + timbuart_request_port(port); + } +} + +static int timbuart_verify_port(struct uart_port *port, + struct serial_struct *ser) +{ + /* we don't want the core code to modify any port params */ + return -EINVAL; +} + +static struct uart_ops timbuart_ops = { + .tx_empty = timbuart_tx_empty, + .set_mctrl = timbuart_set_mctrl, + .get_mctrl = timbuart_get_mctrl, + .stop_tx = timbuart_stop_tx, + .start_tx = timbuart_start_tx, + .flush_buffer = timbuart_flush_buffer, + .stop_rx = timbuart_stop_rx, + .enable_ms = timbuart_enable_ms, + .break_ctl = timbuart_break_ctl, + .startup = timbuart_startup, + .shutdown = timbuart_shutdown, + .set_termios = timbuart_set_termios, + .type = timbuart_type, + .release_port = timbuart_release_port, + .request_port = timbuart_request_port, + .config_port = timbuart_config_port, + .verify_port = timbuart_verify_port +}; + +static struct uart_driver timbuart_driver = { + .owner = THIS_MODULE, + .driver_name = "timberdale_uart", + .dev_name = "ttyTU", + .major = TIMBUART_MAJOR, + .minor = TIMBUART_MINOR, + .nr = 1 +}; + +static int timbuart_probe(struct platform_device *dev) +{ + int err; + struct timbuart_port *uart; + struct resource *iomem; + + dev_dbg(&dev->dev, "%s\n", __func__); + + uart = kzalloc(sizeof(*uart), GFP_KERNEL); + if (!uart) { + err = -EINVAL; + goto err_mem; + } + + uart->usedma = 0; + + uart->port.uartclk = 3250000 * 16; + uart->port.fifosize = TIMBUART_FIFO_SIZE; + uart->port.regshift = 2; + uart->port.iotype = UPIO_MEM; + uart->port.ops = &timbuart_ops; + uart->port.irq = 0; + uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP; + uart->port.line = 0; + uart->port.dev = &dev->dev; + + iomem = platform_get_resource(dev, IORESOURCE_MEM, 0); + if (!iomem) { + err = -ENOMEM; + goto err_register; + } + uart->port.mapbase = iomem->start; + uart->port.membase = NULL; + + uart->port.irq = platform_get_irq(dev, 0); + if (uart->port.irq < 0) { + err = -EINVAL; + goto err_register; + } + + tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart); + + err = uart_register_driver(&timbuart_driver); + if (err) + goto err_register; + + err = uart_add_one_port(&timbuart_driver, &uart->port); + if (err) + goto err_add_port; + + platform_set_drvdata(dev, uart); + + return 0; + +err_add_port: + uart_unregister_driver(&timbuart_driver); +err_register: + kfree(uart); +err_mem: + printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n", + err); + + return err; +} + +static int timbuart_remove(struct platform_device *dev) +{ + struct timbuart_port *uart = platform_get_drvdata(dev); + + tasklet_kill(&uart->tasklet); + uart_remove_one_port(&timbuart_driver, &uart->port); + uart_unregister_driver(&timbuart_driver); + kfree(uart); + + return 0; +} + +static struct platform_driver timbuart_platform_driver = { + .driver = { + .name = "timb-uart", + .owner = THIS_MODULE, + }, + .probe = timbuart_probe, + .remove = timbuart_remove, +}; + +/*--------------------------------------------------------------------------*/ + +static int __init timbuart_init(void) +{ + return platform_driver_register(&timbuart_platform_driver); +} + +static void __exit timbuart_exit(void) +{ + platform_driver_unregister(&timbuart_platform_driver); +} + +module_init(timbuart_init); +module_exit(timbuart_exit); + +MODULE_DESCRIPTION("Timberdale UART driver"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:timb-uart"); + diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h new file mode 100644 index 00000000000..7e566766bc4 --- /dev/null +++ b/drivers/serial/timbuart.h @@ -0,0 +1,58 @@ +/* + * timbuart.c timberdale FPGA GPIO driver + * Copyright (c) 2009 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* Supports: + * Timberdale FPGA UART + */ + +#ifndef _TIMBUART_H +#define _TIMBUART_H + +#define TIMBUART_FIFO_SIZE 2048 + +#define TIMBUART_RXFIFO 0x08 +#define TIMBUART_TXFIFO 0x0c +#define TIMBUART_IER 0x10 +#define TIMBUART_IPR 0x14 +#define TIMBUART_ISR 0x18 +#define TIMBUART_CTRL 0x1c +#define TIMBUART_BAUDRATE 0x20 + +#define TIMBUART_CTRL_RTS 0x01 +#define TIMBUART_CTRL_CTS 0x02 +#define TIMBUART_CTRL_FLSHTX 0x40 +#define TIMBUART_CTRL_FLSHRX 0x80 + +#define TXBF 0x01 +#define TXBAE 0x02 +#define CTS_DELTA 0x04 +#define RXDP 0x08 +#define RXBAF 0x10 +#define RXBF 0x20 +#define RXTT 0x40 +#define RXBNAE 0x80 +#define TXBE 0x100 + +#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE) +#define TXFLAGS (TXBF | TXBAE) + +#define TIMBUART_MAJOR 204 +#define TIMBUART_MINOR 192 + +#endif /* _TIMBUART_H */ + diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7a1164dd1d3..ddeb6919253 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -16,7 +16,8 @@ * v0.9 - thorough cleaning, URBification, almost a rewrite * v0.10 - some more cleanups * v0.11 - fixed flow control, read error doesn't stop reads - * v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced + * v0.12 - added TIOCM ioctls, added break handling, made struct acm + * kmalloced * v0.13 - added termios, added hangup * v0.14 - sized down struct acm * v0.15 - fixed flow control again - characters could be lost @@ -62,7 +63,7 @@ #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/mutex.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <asm/byteorder.h> @@ -87,7 +88,10 @@ static struct acm *acm_table[ACM_TTY_MINORS]; static DEFINE_MUTEX(open_mutex); -#define ACM_READY(acm) (acm && acm->dev && acm->used) +#define ACM_READY(acm) (acm && acm->dev && acm->port.count) + +static const struct tty_port_operations acm_port_ops = { +}; #ifdef VERBOSE_DEBUG #define verbose 1 @@ -99,13 +103,15 @@ static DEFINE_MUTEX(open_mutex); * Functions for ACM control messages. */ -static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len) +static int acm_ctrl_msg(struct acm *acm, int request, int value, + void *buf, int len) { int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0), request, USB_RT_ACM, value, acm->control->altsetting[0].desc.bInterfaceNumber, buf, len, 5000); - dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval); + dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", + request, value, len, retval); return retval < 0 ? retval : 0; } @@ -150,9 +156,8 @@ static int acm_wb_is_avail(struct acm *acm) n = ACM_NW; spin_lock_irqsave(&acm->write_lock, flags); - for (i = 0; i < ACM_NW; i++) { + for (i = 0; i < ACM_NW; i++) n -= acm->wb[i].use; - } spin_unlock_irqrestore(&acm->write_lock, flags); return n; } @@ -183,7 +188,8 @@ static int acm_start_wb(struct acm *acm, struct acm_wb *wb) wb->urb->transfer_buffer_length = wb->len; wb->urb->dev = acm->dev; - if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) { + rc = usb_submit_urb(wb->urb, GFP_ATOMIC); + if (rc < 0) { dbg("usb_submit_urb(write bulk) failed: %d", rc); acm_write_done(acm, wb); } @@ -262,6 +268,7 @@ static void acm_ctrl_irq(struct urb *urb) { struct acm *acm = urb->context; struct usb_cdc_notification *dr = urb->transfer_buffer; + struct tty_struct *tty; unsigned char *data; int newctrl; int retval; @@ -287,40 +294,45 @@ static void acm_ctrl_irq(struct urb *urb) data = (unsigned char *)(dr + 1); switch (dr->bNotificationType) { + case USB_CDC_NOTIFY_NETWORK_CONNECTION: + dbg("%s network", dr->wValue ? + "connected to" : "disconnected from"); + break; - case USB_CDC_NOTIFY_NETWORK_CONNECTION: - - dbg("%s network", dr->wValue ? "connected to" : "disconnected from"); - break; - - case USB_CDC_NOTIFY_SERIAL_STATE: - - newctrl = get_unaligned_le16(data); + case USB_CDC_NOTIFY_SERIAL_STATE: + tty = tty_port_tty_get(&acm->port); + newctrl = get_unaligned_le16(data); - if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { + if (tty) { + if (!acm->clocal && + (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) { dbg("calling hangup"); - tty_hangup(acm->tty); + tty_hangup(tty); } + tty_kref_put(tty); + } - acm->ctrlin = newctrl; - - dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c", - acm->ctrlin & ACM_CTRL_DCD ? '+' : '-', acm->ctrlin & ACM_CTRL_DSR ? '+' : '-', - acm->ctrlin & ACM_CTRL_BRK ? '+' : '-', acm->ctrlin & ACM_CTRL_RI ? '+' : '-', - acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-', acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-', - acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-'); + acm->ctrlin = newctrl; + dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c", + acm->ctrlin & ACM_CTRL_DCD ? '+' : '-', + acm->ctrlin & ACM_CTRL_DSR ? '+' : '-', + acm->ctrlin & ACM_CTRL_BRK ? '+' : '-', + acm->ctrlin & ACM_CTRL_RI ? '+' : '-', + acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-', + acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-', + acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-'); break; - default: - dbg("unknown notification %d received: index %d len %d data0 %d data1 %d", - dr->bNotificationType, dr->wIndex, - dr->wLength, data[0], data[1]); - break; + default: + dbg("unknown notification %d received: index %d len %d data0 %d data1 %d", + dr->bNotificationType, dr->wIndex, + dr->wLength, data[0], data[1]); + break; } exit: usb_mark_last_busy(acm->dev); - retval = usb_submit_urb (urb, GFP_ATOMIC); + retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with " "result %d", __func__, retval); @@ -371,15 +383,14 @@ static void acm_rx_tasklet(unsigned long _acm) { struct acm *acm = (void *)_acm; struct acm_rb *buf; - struct tty_struct *tty = acm->tty; + struct tty_struct *tty; struct acm_ru *rcv; unsigned long flags; unsigned char throttled; dbg("Entering acm_rx_tasklet"); - if (!ACM_READY(acm)) - { + if (!ACM_READY(acm)) { dbg("acm_rx_tasklet: ACM not ready"); return; } @@ -387,12 +398,13 @@ static void acm_rx_tasklet(unsigned long _acm) spin_lock_irqsave(&acm->throttle_lock, flags); throttled = acm->throttle; spin_unlock_irqrestore(&acm->throttle_lock, flags); - if (throttled) - { + if (throttled) { dbg("acm_rx_tasklet: throttled"); return; } + tty = tty_port_tty_get(&acm->port); + next_buffer: spin_lock_irqsave(&acm->read_lock, flags); if (list_empty(&acm->filled_read_bufs)) { @@ -406,20 +418,22 @@ next_buffer: dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size); - tty_buffer_request_room(tty, buf->size); - spin_lock_irqsave(&acm->throttle_lock, flags); - throttled = acm->throttle; - spin_unlock_irqrestore(&acm->throttle_lock, flags); - if (!throttled) - tty_insert_flip_string(tty, buf->base, buf->size); - tty_flip_buffer_push(tty); - - if (throttled) { - dbg("Throttling noticed"); - spin_lock_irqsave(&acm->read_lock, flags); - list_add(&buf->list, &acm->filled_read_bufs); - spin_unlock_irqrestore(&acm->read_lock, flags); - return; + if (tty) { + spin_lock_irqsave(&acm->throttle_lock, flags); + throttled = acm->throttle; + spin_unlock_irqrestore(&acm->throttle_lock, flags); + if (!throttled) { + tty_buffer_request_room(tty, buf->size); + tty_insert_flip_string(tty, buf->base, buf->size); + tty_flip_buffer_push(tty); + } else { + tty_kref_put(tty); + dbg("Throttling noticed"); + spin_lock_irqsave(&acm->read_lock, flags); + list_add(&buf->list, &acm->filled_read_bufs); + spin_unlock_irqrestore(&acm->read_lock, flags); + return; + } } spin_lock_irqsave(&acm->read_lock, flags); @@ -428,6 +442,8 @@ next_buffer: goto next_buffer; urbs: + tty_kref_put(tty); + while (!list_empty(&acm->spare_read_bufs)) { spin_lock_irqsave(&acm->read_lock, flags); if (list_empty(&acm->spare_read_urbs)) { @@ -454,10 +470,11 @@ urbs: rcv->urb->transfer_dma = buf->dma; rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; - /* This shouldn't kill the driver as unsuccessful URBs are returned to the - free-urbs-pool and resubmited ASAP */ + /* This shouldn't kill the driver as unsuccessful URBs are + returned to the free-urbs-pool and resubmited ASAP */ spin_lock_irqsave(&acm->read_lock, flags); - if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) { + if (acm->susp_count || + usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) { list_add(&buf->list, &acm->spare_read_bufs); list_add(&rcv->list, &acm->spare_read_urbs); acm->processing = 0; @@ -499,11 +516,14 @@ static void acm_write_bulk(struct urb *urb) static void acm_softint(struct work_struct *work) { struct acm *acm = container_of(work, struct acm, work); + struct tty_struct *tty; dev_vdbg(&acm->data->dev, "tx work\n"); if (!ACM_READY(acm)) return; - tty_wakeup(acm->tty); + tty = tty_port_tty_get(&acm->port); + tty_wakeup(tty); + tty_kref_put(tty); } static void acm_waker(struct work_struct *waker) @@ -543,8 +563,9 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp) rv = 0; set_bit(TTY_NO_WRITE_SPLIT, &tty->flags); + tty->driver_data = acm; - acm->tty = tty; + tty_port_tty_set(&acm->port, tty); if (usb_autopm_get_interface(acm->control) < 0) goto early_bail; @@ -552,11 +573,10 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp) acm->control->needs_remote_wakeup = 1; mutex_lock(&acm->mutex); - if (acm->used++) { + if (acm->port.count++) { usb_autopm_put_interface(acm->control); goto done; - } - + } acm->ctrlurb->dev = acm->dev; if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) { @@ -567,22 +587,22 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp) if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) && (acm->ctrl_caps & USB_CDC_CAP_LINE)) goto full_bailout; + usb_autopm_put_interface(acm->control); INIT_LIST_HEAD(&acm->spare_read_urbs); INIT_LIST_HEAD(&acm->spare_read_bufs); INIT_LIST_HEAD(&acm->filled_read_bufs); - for (i = 0; i < acm->rx_buflimit; i++) { + + for (i = 0; i < acm->rx_buflimit; i++) list_add(&(acm->ru[i].list), &acm->spare_read_urbs); - } - for (i = 0; i < acm->rx_buflimit; i++) { + for (i = 0; i < acm->rx_buflimit; i++) list_add(&(acm->rb[i].list), &acm->spare_read_bufs); - } acm->throttle = 0; tasklet_schedule(&acm->urb_task); - + rv = tty_port_block_til_ready(&acm->port, tty, filp); done: mutex_unlock(&acm->mutex); err_out: @@ -593,16 +613,17 @@ full_bailout: usb_kill_urb(acm->ctrlurb); bail_out: usb_autopm_put_interface(acm->control); - acm->used--; + acm->port.count--; mutex_unlock(&acm->mutex); early_bail: mutex_unlock(&open_mutex); + tty_port_tty_set(&acm->port, NULL); return -EIO; } static void acm_tty_unregister(struct acm *acm) { - int i,nr; + int i, nr; nr = acm->rx_buflimit; tty_unregister_device(acm_tty_driver, acm->minor); @@ -619,41 +640,56 @@ static void acm_tty_unregister(struct acm *acm) static int acm_tty_chars_in_buffer(struct tty_struct *tty); +static void acm_port_down(struct acm *acm, int drain) +{ + int i, nr = acm->rx_buflimit; + mutex_lock(&open_mutex); + if (acm->dev) { + usb_autopm_get_interface(acm->control); + acm_set_control(acm, acm->ctrlout = 0); + /* try letting the last writes drain naturally */ + if (drain) { + wait_event_interruptible_timeout(acm->drain_wait, + (ACM_NW == acm_wb_is_avail(acm)) || !acm->dev, + ACM_CLOSE_TIMEOUT * HZ); + } + usb_kill_urb(acm->ctrlurb); + for (i = 0; i < ACM_NW; i++) + usb_kill_urb(acm->wb[i].urb); + for (i = 0; i < nr; i++) + usb_kill_urb(acm->ru[i].urb); + acm->control->needs_remote_wakeup = 0; + usb_autopm_put_interface(acm->control); + } + mutex_unlock(&open_mutex); +} + +static void acm_tty_hangup(struct tty_struct *tty) +{ + struct acm *acm = tty->driver_data; + tty_port_hangup(&acm->port); + acm_port_down(acm, 0); +} + static void acm_tty_close(struct tty_struct *tty, struct file *filp) { struct acm *acm = tty->driver_data; - int i,nr; - if (!acm || !acm->used) + /* Perform the closing process and see if we need to do the hardware + shutdown */ + if (tty_port_close_start(&acm->port, tty, filp) == 0) return; - - nr = acm->rx_buflimit; + acm_port_down(acm, 0); + tty_port_close_end(&acm->port, tty); mutex_lock(&open_mutex); - if (!--acm->used) { - if (acm->dev) { - usb_autopm_get_interface(acm->control); - acm_set_control(acm, acm->ctrlout = 0); - - /* try letting the last writes drain naturally */ - wait_event_interruptible_timeout(acm->drain_wait, - (ACM_NW == acm_wb_is_avail(acm)) - || !acm->dev, - ACM_CLOSE_TIMEOUT * HZ); - - usb_kill_urb(acm->ctrlurb); - for (i = 0; i < ACM_NW; i++) - usb_kill_urb(acm->wb[i].urb); - for (i = 0; i < nr; i++) - usb_kill_urb(acm->ru[i].urb); - acm->control->needs_remote_wakeup = 0; - usb_autopm_put_interface(acm->control); - } else - acm_tty_unregister(acm); - } + tty_port_tty_set(&acm->port, NULL); + if (!acm->dev) + acm_tty_unregister(acm); mutex_unlock(&open_mutex); } -static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count) +static int acm_tty_write(struct tty_struct *tty, + const unsigned char *buf, int count) { struct acm *acm = tty->driver_data; int stat; @@ -669,7 +705,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c return 0; spin_lock_irqsave(&acm->write_lock, flags); - if ((wbn = acm_wb_alloc(acm)) < 0) { + wbn = acm_wb_alloc(acm); + if (wbn < 0) { spin_unlock_irqrestore(&acm->write_lock, flags); return 0; } @@ -681,7 +718,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c wb->len = count; spin_unlock_irqrestore(&acm->write_lock, flags); - if ((stat = acm_write_start(acm, wbn)) < 0) + stat = acm_write_start(acm, wbn); + if (stat < 0) return stat; return count; } @@ -767,8 +805,10 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file, return -EINVAL; newctrl = acm->ctrlout; - set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0); - clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0); + set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | + (set & TIOCM_RTS ? ACM_CTRL_RTS : 0); + clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | + (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0); newctrl = (newctrl & ~clear) | set; @@ -777,7 +817,8 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file, return acm_set_control(acm, acm->ctrlout = newctrl); } -static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) +static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg) { struct acm *acm = tty->driver_data; @@ -799,7 +840,8 @@ static const __u8 acm_tty_size[] = { 5, 6, 7, 8 }; -static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old) +static void acm_tty_set_termios(struct tty_struct *tty, + struct ktermios *termios_old) { struct acm *acm = tty->driver_data; struct ktermios *termios = tty->termios; @@ -809,19 +851,23 @@ static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios if (!ACM_READY(acm)) return; + /* FIXME: Needs to support the tty_baud interface */ + /* FIXME: Broken on sparc */ newline.dwDTERate = cpu_to_le32p(acm_tty_speed + (termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0)); newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0; newline.bParityType = termios->c_cflag & PARENB ? - (termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0; + (termios->c_cflag & PARODD ? 1 : 2) + + (termios->c_cflag & CMSPAR ? 2 : 0) : 0; newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4]; - + /* FIXME: Needs to clear unsupported bits in the termios */ acm->clocal = ((termios->c_cflag & CLOCAL) != 0); if (!newline.dwDTERate) { newline.dwDTERate = acm->line.dwDTERate; newctrl &= ~ACM_CTRL_DTR; - } else newctrl |= ACM_CTRL_DTR; + } else + newctrl |= ACM_CTRL_DTR; if (newctrl != acm->ctrlout) acm_set_control(acm, acm->ctrlout = newctrl); @@ -846,9 +892,8 @@ static void acm_write_buffers_free(struct acm *acm) struct acm_wb *wb; struct usb_device *usb_dev = interface_to_usbdev(acm->control); - for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) { + for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah); - } } static void acm_read_buffers_free(struct acm *acm) @@ -857,7 +902,8 @@ static void acm_read_buffers_free(struct acm *acm) int i, n = acm->rx_buflimit; for (i = 0; i < n; i++) - usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma); + usb_buffer_free(usb_dev, acm->readsize, + acm->rb[i].base, acm->rb[i].dma); } /* Little helper: write buffers allocate */ @@ -882,8 +928,8 @@ static int acm_write_buffers_alloc(struct acm *acm) return 0; } -static int acm_probe (struct usb_interface *intf, - const struct usb_device_id *id) +static int acm_probe(struct usb_interface *intf, + const struct usb_device_id *id) { struct usb_cdc_union_desc *union_header = NULL; struct usb_cdc_country_functional_desc *cfd = NULL; @@ -897,7 +943,7 @@ static int acm_probe (struct usb_interface *intf, struct usb_device *usb_dev = interface_to_usbdev(intf); struct acm *acm; int minor; - int ctrlsize,readsize; + int ctrlsize, readsize; u8 *buf; u8 ac_management_function = 0; u8 call_management_function = 0; @@ -917,7 +963,7 @@ static int acm_probe (struct usb_interface *intf, control_interface = usb_ifnum_to_if(usb_dev, 0); goto skip_normal_probe; } - + /* normal probing*/ if (!buffer) { dev_err(&intf->dev, "Weird descriptor references\n"); @@ -925,8 +971,10 @@ static int acm_probe (struct usb_interface *intf, } if (!buflen) { - if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) { - dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n"); + if (intf->cur_altsetting->endpoint->extralen && + intf->cur_altsetting->endpoint->extra) { + dev_dbg(&intf->dev, + "Seeking extra descriptors on endpoint\n"); buflen = intf->cur_altsetting->endpoint->extralen; buffer = intf->cur_altsetting->endpoint->extra; } else { @@ -937,47 +985,43 @@ static int acm_probe (struct usb_interface *intf, } while (buflen > 0) { - if (buffer [1] != USB_DT_CS_INTERFACE) { + if (buffer[1] != USB_DT_CS_INTERFACE) { dev_err(&intf->dev, "skipping garbage\n"); goto next_desc; } - switch (buffer [2]) { - case USB_CDC_UNION_TYPE: /* we've found it */ - if (union_header) { - dev_err(&intf->dev, "More than one " - "union descriptor, " - "skipping ...\n"); - goto next_desc; - } - union_header = (struct usb_cdc_union_desc *) - buffer; - break; - case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/ - cfd = (struct usb_cdc_country_functional_desc *)buffer; - break; - case USB_CDC_HEADER_TYPE: /* maybe check version */ - break; /* for now we ignore it */ - case USB_CDC_ACM_TYPE: - ac_management_function = buffer[3]; - break; - case USB_CDC_CALL_MANAGEMENT_TYPE: - call_management_function = buffer[3]; - call_interface_num = buffer[4]; - if ((call_management_function & 3) != 3) - dev_err(&intf->dev, "This device " - "cannot do calls on its own. " - "It is no modem.\n"); - break; - default: - /* there are LOTS more CDC descriptors that - * could legitimately be found here. - */ - dev_dbg(&intf->dev, "Ignoring descriptor: " - "type %02x, length %d\n", - buffer[2], buffer[0]); - break; + switch (buffer[2]) { + case USB_CDC_UNION_TYPE: /* we've found it */ + if (union_header) { + dev_err(&intf->dev, "More than one " + "union descriptor, skipping ...\n"); + goto next_desc; } + union_header = (struct usb_cdc_union_desc *)buffer; + break; + case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/ + cfd = (struct usb_cdc_country_functional_desc *)buffer; + break; + case USB_CDC_HEADER_TYPE: /* maybe check version */ + break; /* for now we ignore it */ + case USB_CDC_ACM_TYPE: + ac_management_function = buffer[3]; + break; + case USB_CDC_CALL_MANAGEMENT_TYPE: + call_management_function = buffer[3]; + call_interface_num = buffer[4]; + if ((call_management_function & 3) != 3) + dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n"); + break; + default: + /* there are LOTS more CDC descriptors that + * could legitimately be found here. + */ + dev_dbg(&intf->dev, "Ignoring descriptor: " + "type %02x, length %d\n", + buffer[2], buffer[0]); + break; + } next_desc: buflen -= buffer[0]; buffer += buffer[0]; @@ -985,33 +1029,36 @@ next_desc: if (!union_header) { if (call_interface_num > 0) { - dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n"); + dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n"); data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num)); control_interface = intf; } else { - dev_dbg(&intf->dev,"No union descriptor, giving up\n"); + dev_dbg(&intf->dev, + "No union descriptor, giving up\n"); return -ENODEV; } } else { control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0); data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0)); if (!control_interface || !data_interface) { - dev_dbg(&intf->dev,"no interfaces\n"); + dev_dbg(&intf->dev, "no interfaces\n"); return -ENODEV; } } - + if (data_interface_num != call_interface_num) - dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n"); + dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n"); skip_normal_probe: /*workaround for switched interfaces */ - if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) { - if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) { + if (data_interface->cur_altsetting->desc.bInterfaceClass + != CDC_DATA_INTERFACE_TYPE) { + if (control_interface->cur_altsetting->desc.bInterfaceClass + == CDC_DATA_INTERFACE_TYPE) { struct usb_interface *t; - dev_dbg(&intf->dev,"Your device has switched interfaces.\n"); - + dev_dbg(&intf->dev, + "Your device has switched interfaces.\n"); t = control_interface; control_interface = data_interface; data_interface = t; @@ -1023,9 +1070,9 @@ skip_normal_probe: /* Accept probe requests only for the control interface */ if (intf != control_interface) return -ENODEV; - + if (usb_interface_claimed(data_interface)) { /* valid in this context */ - dev_dbg(&intf->dev,"The data interface isn't available\n"); + dev_dbg(&intf->dev, "The data interface isn't available\n"); return -EBUSY; } @@ -1042,8 +1089,8 @@ skip_normal_probe: if (!usb_endpoint_dir_in(epread)) { /* descriptors are swapped */ struct usb_endpoint_descriptor *t; - dev_dbg(&intf->dev,"The data interface has switched endpoints\n"); - + dev_dbg(&intf->dev, + "The data interface has switched endpoints\n"); t = epread; epread = epwrite; epwrite = t; @@ -1056,13 +1103,15 @@ skip_normal_probe: return -ENODEV; } - if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) { + acm = kzalloc(sizeof(struct acm), GFP_KERNEL); + if (acm == NULL) { dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n"); goto alloc_fail; } ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize); - readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2); + readsize = le16_to_cpu(epread->wMaxPacketSize) * + (quirks == SINGLE_RX_URB ? 1 : 2); acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20; acm->control = control_interface; acm->data = data_interface; @@ -1082,6 +1131,8 @@ skip_normal_probe: spin_lock_init(&acm->read_lock); mutex_init(&acm->mutex); acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); + tty_port_init(&acm->port); + acm->port.ops = &acm_port_ops; buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma); if (!buf) { @@ -1103,8 +1154,10 @@ skip_normal_probe: for (i = 0; i < num_rx_buf; i++) { struct acm_ru *rcv = &(acm->ru[i]); - if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) { - dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n"); + rcv->urb = usb_alloc_urb(0, GFP_KERNEL); + if (rcv->urb == NULL) { + dev_dbg(&intf->dev, + "out of memory (read urbs usb_alloc_urb)\n"); goto alloc_fail7; } @@ -1117,26 +1170,29 @@ skip_normal_probe: rb->base = usb_buffer_alloc(acm->dev, readsize, GFP_KERNEL, &rb->dma); if (!rb->base) { - dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n"); + dev_dbg(&intf->dev, + "out of memory (read bufs usb_buffer_alloc)\n"); goto alloc_fail7; } } - for(i = 0; i < ACM_NW; i++) - { + for (i = 0; i < ACM_NW; i++) { struct acm_wb *snd = &(acm->wb[i]); - if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) { - dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)"); + snd->urb = usb_alloc_urb(0, GFP_KERNEL); + if (snd->urb == NULL) { + dev_dbg(&intf->dev, + "out of memory (write urbs usb_alloc_urb)"); goto alloc_fail7; } - usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), - NULL, acm->writesize, acm_write_bulk, snd); + usb_fill_bulk_urb(snd->urb, usb_dev, + usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress), + NULL, acm->writesize, acm_write_bulk, snd); snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; snd->instance = acm; } - usb_set_intfdata (intf, acm); + usb_set_intfdata(intf, acm); i = device_create_file(&intf->dev, &dev_attr_bmCapabilities); if (i < 0) @@ -1147,7 +1203,8 @@ skip_normal_probe: if (!acm->country_codes) goto skip_countries; acm->country_code_size = cfd->bLength - 4; - memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4); + memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, + cfd->bLength - 4); acm->country_rel_date = cfd->iCountryCodeRelDate; i = device_create_file(&intf->dev, &dev_attr_wCountryCodes); @@ -1156,7 +1213,8 @@ skip_normal_probe: goto skip_countries; } - i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate); + i = device_create_file(&intf->dev, + &dev_attr_iCountryCodeRelDate); if (i < 0) { kfree(acm->country_codes); goto skip_countries; @@ -1164,8 +1222,10 @@ skip_normal_probe: } skip_countries: - usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress), - acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval); + usb_fill_int_urb(acm->ctrlurb, usb_dev, + usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress), + acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, + epctrl->bInterval); acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; acm->ctrlurb->transfer_dma = acm->ctrl_dma; @@ -1212,7 +1272,7 @@ static void stop_data_traffic(struct acm *acm) tasklet_disable(&acm->urb_task); usb_kill_urb(acm->ctrlurb); - for(i = 0; i < ACM_NW; i++) + for (i = 0; i < ACM_NW; i++) usb_kill_urb(acm->wb[i].urb); for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->ru[i].urb); @@ -1227,13 +1287,14 @@ static void acm_disconnect(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); struct usb_device *usb_dev = interface_to_usbdev(intf); + struct tty_struct *tty; /* sibling interface is already cleaning up */ if (!acm) return; mutex_lock(&open_mutex); - if (acm->country_codes){ + if (acm->country_codes) { device_remove_file(&acm->control->dev, &dev_attr_wCountryCodes); device_remove_file(&acm->control->dev, @@ -1247,22 +1308,25 @@ static void acm_disconnect(struct usb_interface *intf) stop_data_traffic(acm); acm_write_buffers_free(acm); - usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); + usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, + acm->ctrl_dma); acm_read_buffers_free(acm); usb_driver_release_interface(&acm_driver, intf == acm->control ? acm->data : acm->control); - if (!acm->used) { + if (acm->port.count == 0) { acm_tty_unregister(acm); mutex_unlock(&open_mutex); return; } mutex_unlock(&open_mutex); - - if (acm->tty) - tty_hangup(acm->tty); + tty = tty_port_tty_get(&acm->port); + if (tty) { + tty_hangup(tty); + tty_kref_put(tty); + } } #ifdef CONFIG_PM @@ -1297,7 +1361,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message) */ mutex_lock(&acm->mutex); - if (acm->used) + if (acm->port.count) stop_data_traffic(acm); mutex_unlock(&acm->mutex); @@ -1319,7 +1383,7 @@ static int acm_resume(struct usb_interface *intf) return 0; mutex_lock(&acm->mutex); - if (acm->used) { + if (acm->port.count) { rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO); if (rv < 0) goto err_out; @@ -1398,7 +1462,7 @@ static struct usb_device_id acm_ids[] = { { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_GSM) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, - USB_CDC_ACM_PROTO_AT_3G ) }, + USB_CDC_ACM_PROTO_AT_3G) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_CDMA) }, @@ -1406,7 +1470,7 @@ static struct usb_device_id acm_ids[] = { { } }; -MODULE_DEVICE_TABLE (usb, acm_ids); +MODULE_DEVICE_TABLE(usb, acm_ids); static struct usb_driver acm_driver = { .name = "cdc_acm", @@ -1429,6 +1493,7 @@ static struct usb_driver acm_driver = { static const struct tty_operations acm_ops = { .open = acm_tty_open, .close = acm_tty_close, + .hangup = acm_tty_hangup, .write = acm_tty_write, .write_room = acm_tty_write_room, .ioctl = acm_tty_ioctl, @@ -1460,7 +1525,8 @@ static int __init acm_init(void) acm_tty_driver->subtype = SERIAL_TYPE_NORMAL, acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV; acm_tty_driver->init_termios = tty_std_termios; - acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; + acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | + HUPCL | CLOCAL; tty_set_operations(acm_tty_driver, &acm_ops); retval = tty_register_driver(acm_tty_driver); @@ -1492,7 +1558,7 @@ static void __exit acm_exit(void) module_init(acm_init); module_exit(acm_exit); -MODULE_AUTHOR( DRIVER_AUTHOR ); -MODULE_DESCRIPTION( DRIVER_DESC ); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR); diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 1f95e7aa1b6..4c3856420ad 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -89,8 +89,8 @@ struct acm { struct usb_device *dev; /* the corresponding usb device */ struct usb_interface *control; /* control interface */ struct usb_interface *data; /* data interface */ - struct tty_struct *tty; /* the corresponding tty */ - struct urb *ctrlurb; /* urbs */ + struct tty_port port; /* our tty port data */ + struct urb *ctrlurb; /* urbs */ u8 *ctrl_buffer; /* buffers of urbs */ dma_addr_t ctrl_dma; /* dma handles of buffers */ u8 *country_codes; /* country codes from device */ @@ -120,7 +120,6 @@ struct acm { unsigned int ctrlout; /* output control lines (DTR, RTS) */ unsigned int writesize; /* max packet size for the output bulk endpoint */ unsigned int readsize,ctrlsize; /* buffer sizes for freeing */ - unsigned int used; /* someone has this acm's device open */ unsigned int minor; /* acm minor number */ unsigned char throttle; /* throttled by tty layer */ unsigned char clocal; /* termios CLOCAL */ diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c index b7eacad4d48..2bfd6dd85b5 100644 --- a/drivers/usb/serial/belkin_sa.c +++ b/drivers/usb/serial/belkin_sa.c @@ -93,8 +93,7 @@ static int belkin_sa_startup(struct usb_serial *serial); static void belkin_sa_shutdown(struct usb_serial *serial); static int belkin_sa_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void belkin_sa_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void belkin_sa_close(struct usb_serial_port *port); static void belkin_sa_read_int_callback(struct urb *urb); static void belkin_sa_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios * old); @@ -244,8 +243,7 @@ exit: } /* belkin_sa_open */ -static void belkin_sa_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void belkin_sa_close(struct usb_serial_port *port) { dbg("%s port %d", __func__, port->number); diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index ab4cc277aa6..2830766f5b3 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -262,32 +262,40 @@ error: kfree(priv); return r; } -static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static int ch341_carrier_raised(struct usb_serial_port *port) +{ + struct ch341_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & CH341_BIT_DCD) + return 1; + return 0; +} + +static void ch341_dtr_rts(struct usb_serial_port *port, int on) { struct ch341_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; dbg("%s - port %d", __func__, port->number); + /* drop DTR and RTS */ + spin_lock_irqsave(&priv->lock, flags); + if (on) + priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR; + else + priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR); + spin_unlock_irqrestore(&priv->lock, flags); + ch341_set_handshake(port->serial->dev, priv->line_control); + wake_up_interruptible(&priv->delta_msr_wait); +} + +static void ch341_close(struct usb_serial_port *port) +{ + dbg("%s - port %d", __func__, port->number); /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - ch341_set_handshake(port->serial->dev, 0); - } - } - wake_up_interruptible(&priv->delta_msr_wait); } @@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port, dbg("ch341_open()"); priv->baud_rate = DEFAULT_BAUD_RATE; - priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR; r = ch341_configure(serial->dev, priv); if (r) @@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port, if (r) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, r); - ch341_close(tty, port, NULL); + ch341_close(port); return -EPROTO; } @@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty, dbg("ch341_set_termios()"); - if (!tty || !tty->termios) - return; - baud_rate = tty_get_baud_rate(tty); priv->baud_rate = baud_rate; @@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = { .usb_driver = &ch341_driver, .num_ports = 1, .open = ch341_open, + .dtr_rts = ch341_dtr_rts, + .carrier_raised = ch341_carrier_raised, .close = ch341_close, .ioctl = ch341_ioctl, .set_termios = ch341_set_termios, diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c index 19e24045b13..247b61bfb7f 100644 --- a/drivers/usb/serial/console.c +++ b/drivers/usb/serial/console.c @@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options) kfree(tty); } } - + /* So we know not to kill the hardware on a hangup on this + port. We have also bumped the use count by one so it won't go + idle */ port->console = 1; retval = 0; @@ -182,7 +184,7 @@ free_tty: kfree(tty); reset_open_count: port->port.count = 0; -goto out; + goto out; } static void usb_console_write(struct console *co, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index e8d5133ce9c..16a154d3b2f 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -1,5 +1,5 @@ /* - * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver + * Silicon Laboratories CP210x USB to RS232 serial adaptor driver * * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk) * @@ -27,44 +27,46 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.08" -#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver" +#define DRIVER_VERSION "v0.09" +#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver" /* * Function Prototypes */ -static int cp2101_open(struct tty_struct *, struct usb_serial_port *, +static int cp210x_open(struct tty_struct *, struct usb_serial_port *, struct file *); -static void cp2101_cleanup(struct usb_serial_port *); -static void cp2101_close(struct tty_struct *, struct usb_serial_port *, - struct file*); -static void cp2101_get_termios(struct tty_struct *, +static void cp210x_cleanup(struct usb_serial_port *); +static void cp210x_close(struct usb_serial_port *); +static void cp210x_get_termios(struct tty_struct *, struct usb_serial_port *port); -static void cp2101_get_termios_port(struct usb_serial_port *port, +static void cp210x_get_termios_port(struct usb_serial_port *port, unsigned int *cflagp, unsigned int *baudp); -static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *, +static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *, struct ktermios*); -static int cp2101_tiocmget(struct tty_struct *, struct file *); -static int cp2101_tiocmset(struct tty_struct *, struct file *, +static int cp210x_tiocmget(struct tty_struct *, struct file *); +static int cp210x_tiocmset(struct tty_struct *, struct file *, unsigned int, unsigned int); -static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *, +static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *, unsigned int, unsigned int); -static void cp2101_break_ctl(struct tty_struct *, int); -static int cp2101_startup(struct usb_serial *); -static void cp2101_shutdown(struct usb_serial *); +static void cp210x_break_ctl(struct tty_struct *, int); +static int cp210x_startup(struct usb_serial *); +static void cp210x_shutdown(struct usb_serial *); static int debug; static struct usb_device_id id_table [] = { { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ + { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ + { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */ { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ + { USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */ { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */ { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */ { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ @@ -85,10 +87,12 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */ { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */ { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */ + { USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */ { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */ { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */ { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */ { USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */ + { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */ { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */ @@ -99,7 +103,9 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */ { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */ { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */ + { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */ { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */ + { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */ { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ @@ -108,53 +114,70 @@ static struct usb_device_id id_table [] = { MODULE_DEVICE_TABLE(usb, id_table); -static struct usb_driver cp2101_driver = { - .name = "cp2101", +static struct usb_driver cp210x_driver = { + .name = "cp210x", .probe = usb_serial_probe, .disconnect = usb_serial_disconnect, .id_table = id_table, .no_dynamic_id = 1, }; -static struct usb_serial_driver cp2101_device = { +static struct usb_serial_driver cp210x_device = { .driver = { .owner = THIS_MODULE, - .name = "cp2101", + .name = "cp210x", }, - .usb_driver = &cp2101_driver, + .usb_driver = &cp210x_driver, .id_table = id_table, .num_ports = 1, - .open = cp2101_open, - .close = cp2101_close, - .break_ctl = cp2101_break_ctl, - .set_termios = cp2101_set_termios, - .tiocmget = cp2101_tiocmget, - .tiocmset = cp2101_tiocmset, - .attach = cp2101_startup, - .shutdown = cp2101_shutdown, + .open = cp210x_open, + .close = cp210x_close, + .break_ctl = cp210x_break_ctl, + .set_termios = cp210x_set_termios, + .tiocmget = cp210x_tiocmget, + .tiocmset = cp210x_tiocmset, + .attach = cp210x_startup, + .shutdown = cp210x_shutdown, }; /* Config request types */ #define REQTYPE_HOST_TO_DEVICE 0x41 #define REQTYPE_DEVICE_TO_HOST 0xc1 -/* Config SET requests. To GET, add 1 to the request number */ -#define CP2101_UART 0x00 /* Enable / Disable */ -#define CP2101_BAUDRATE 0x01 /* (BAUD_RATE_GEN_FREQ / baudrate) */ -#define CP2101_BITS 0x03 /* 0x(0)(databits)(parity)(stopbits) */ -#define CP2101_BREAK 0x05 /* On / Off */ -#define CP2101_CONTROL 0x07 /* Flow control line states */ -#define CP2101_MODEMCTL 0x13 /* Modem controls */ -#define CP2101_CONFIG_6 0x19 /* 6 bytes of config data ??? */ - -/* CP2101_UART */ +/* Config request codes */ +#define CP210X_IFC_ENABLE 0x00 +#define CP210X_SET_BAUDDIV 0x01 +#define CP210X_GET_BAUDDIV 0x02 +#define CP210X_SET_LINE_CTL 0x03 +#define CP210X_GET_LINE_CTL 0x04 +#define CP210X_SET_BREAK 0x05 +#define CP210X_IMM_CHAR 0x06 +#define CP210X_SET_MHS 0x07 +#define CP210X_GET_MDMSTS 0x08 +#define CP210X_SET_XON 0x09 +#define CP210X_SET_XOFF 0x0A +#define CP210X_SET_EVENTMASK 0x0B +#define CP210X_GET_EVENTMASK 0x0C +#define CP210X_SET_CHAR 0x0D +#define CP210X_GET_CHARS 0x0E +#define CP210X_GET_PROPS 0x0F +#define CP210X_GET_COMM_STATUS 0x10 +#define CP210X_RESET 0x11 +#define CP210X_PURGE 0x12 +#define CP210X_SET_FLOW 0x13 +#define CP210X_GET_FLOW 0x14 +#define CP210X_EMBED_EVENTS 0x15 +#define CP210X_GET_EVENTSTATE 0x16 +#define CP210X_SET_CHARS 0x19 + +/* CP210X_IFC_ENABLE */ #define UART_ENABLE 0x0001 #define UART_DISABLE 0x0000 -/* CP2101_BAUDRATE */ +/* CP210X_(SET|GET)_BAUDDIV */ #define BAUD_RATE_GEN_FREQ 0x384000 -/* CP2101_BITS */ +/* CP210X_(SET|GET)_LINE_CTL */ #define BITS_DATA_MASK 0X0f00 #define BITS_DATA_5 0X0500 #define BITS_DATA_6 0X0600 @@ -174,11 +197,11 @@ static struct usb_serial_driver cp2101_device = { #define BITS_STOP_1_5 0x0001 #define BITS_STOP_2 0x0002 -/* CP2101_BREAK */ +/* CP210X_SET_BREAK */ #define BREAK_ON 0x0000 #define BREAK_OFF 0x0001 -/* CP2101_CONTROL */ +/* CP210X_(SET_MHS|GET_MDMSTS) */ #define CONTROL_DTR 0x0001 #define CONTROL_RTS 0x0002 #define CONTROL_CTS 0x0010 @@ -189,13 +212,13 @@ static struct usb_serial_driver cp2101_device = { #define CONTROL_WRITE_RTS 0x0200 /* - * cp2101_get_config - * Reads from the CP2101 configuration registers + * cp210x_get_config + * Reads from the CP210x configuration registers * 'size' is specified in bytes. * 'data' is a pointer to a pre-allocated array of integers large * enough to hold 'size' bytes (with 4 bytes to each integer) */ -static int cp2101_get_config(struct usb_serial_port *port, u8 request, +static int cp210x_get_config(struct usb_serial_port *port, u8 request, unsigned int *data, int size) { struct usb_serial *serial = port->serial; @@ -211,9 +234,6 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request, return -ENOMEM; } - /* For get requests, the request number must be incremented */ - request++; - /* Issue the request, attempting to read 'size' bytes */ result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), request, REQTYPE_DEVICE_TO_HOST, 0x0000, @@ -236,12 +256,12 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request, } /* - * cp2101_set_config - * Writes to the CP2101 configuration registers + * cp210x_set_config + * Writes to the CP210x configuration registers * Values less than 16 bits wide are sent directly * 'size' is specified in bytes. */ -static int cp2101_set_config(struct usb_serial_port *port, u8 request, +static int cp210x_set_config(struct usb_serial_port *port, u8 request, unsigned int *data, int size) { struct usb_serial *serial = port->serial; @@ -292,21 +312,21 @@ static int cp2101_set_config(struct usb_serial_port *port, u8 request, } /* - * cp2101_set_config_single - * Convenience function for calling cp2101_set_config on single data values + * cp210x_set_config_single + * Convenience function for calling cp210x_set_config on single data values * without requiring an integer pointer */ -static inline int cp2101_set_config_single(struct usb_serial_port *port, +static inline int cp210x_set_config_single(struct usb_serial_port *port, u8 request, unsigned int data) { - return cp2101_set_config(port, request, &data, 2); + return cp210x_set_config(port, request, &data, 2); } /* - * cp2101_quantise_baudrate + * cp210x_quantise_baudrate * Quantises the baud rate as per AN205 Table 1 */ -static unsigned int cp2101_quantise_baudrate(unsigned int baud) { +static unsigned int cp210x_quantise_baudrate(unsigned int baud) { if (baud <= 56) baud = 0; else if (baud <= 300) baud = 300; else if (baud <= 600) baud = 600; @@ -343,7 +363,7 @@ static unsigned int cp2101_quantise_baudrate(unsigned int baud) { return baud; } -static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, +static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { struct usb_serial *serial = port->serial; @@ -351,7 +371,7 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, dbg("%s - port %d", __func__, port->number); - if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) { + if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) { dev_err(&port->dev, "%s - Unable to enable UART\n", __func__); return -EPROTO; @@ -373,17 +393,17 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port, } /* Configure the termios structure */ - cp2101_get_termios(tty, port); + cp210x_get_termios(tty, port); /* Set the DTR and RTS pins low */ - cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data + cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data : port, NULL, TIOCM_DTR | TIOCM_RTS, 0); return 0; } -static void cp2101_cleanup(struct usb_serial_port *port) +static void cp210x_cleanup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; @@ -398,8 +418,7 @@ static void cp2101_cleanup(struct usb_serial_port *port) } } -static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void cp210x_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); @@ -410,23 +429,23 @@ static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port, mutex_lock(&port->serial->disc_mutex); if (!port->serial->disconnected) - cp2101_set_config_single(port, CP2101_UART, UART_DISABLE); + cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE); mutex_unlock(&port->serial->disc_mutex); } /* - * cp2101_get_termios + * cp210x_get_termios * Reads the baud rate, data bits, parity, stop bits and flow control mode * from the device, corrects any unsupported values, and configures the * termios structure to reflect the state of the device */ -static void cp2101_get_termios(struct tty_struct *tty, +static void cp210x_get_termios(struct tty_struct *tty, struct usb_serial_port *port) { unsigned int baud; if (tty) { - cp2101_get_termios_port(tty->driver_data, + cp210x_get_termios_port(tty->driver_data, &tty->termios->c_cflag, &baud); tty_encode_baud_rate(tty, baud, baud); } @@ -434,15 +453,15 @@ static void cp2101_get_termios(struct tty_struct *tty, else { unsigned int cflag; cflag = 0; - cp2101_get_termios_port(port, &cflag, &baud); + cp210x_get_termios_port(port, &cflag, &baud); } } /* - * cp2101_get_termios_port - * This is the heart of cp2101_get_termios which always uses a &usb_serial_port. + * cp210x_get_termios_port + * This is the heart of cp210x_get_termios which always uses a &usb_serial_port. */ -static void cp2101_get_termios_port(struct usb_serial_port *port, +static void cp210x_get_termios_port(struct usb_serial_port *port, unsigned int *cflagp, unsigned int *baudp) { unsigned int cflag, modem_ctl[4]; @@ -451,17 +470,17 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, dbg("%s - port %d", __func__, port->number); - cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2); + cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2); /* Convert to baudrate */ if (baud) - baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud); + baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud); dbg("%s - baud rate = %d", __func__, baud); *baudp = baud; cflag = *cflagp; - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); cflag &= ~CSIZE; switch (bits & BITS_DATA_MASK) { case BITS_DATA_5: @@ -486,14 +505,14 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, cflag |= CS8; bits &= ~BITS_DATA_MASK; bits |= BITS_DATA_8; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; default: dbg("%s - Unknown number of data bits, using 8", __func__); cflag |= CS8; bits &= ~BITS_DATA_MASK; bits |= BITS_DATA_8; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; } @@ -516,20 +535,20 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, __func__); cflag &= ~PARENB; bits &= ~BITS_PARITY_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; case BITS_PARITY_SPACE: dbg("%s - parity = SPACE (not supported, disabling parity)", __func__); cflag &= ~PARENB; bits &= ~BITS_PARITY_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; default: dbg("%s - Unknown parity mode, disabling parity", __func__); cflag &= ~PARENB; bits &= ~BITS_PARITY_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; } @@ -542,7 +561,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)", __func__); bits &= ~BITS_STOP_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; case BITS_STOP_2: dbg("%s - stop bits = 2", __func__); @@ -552,11 +571,11 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, dbg("%s - Unknown number of stop bits, using 1 stop bit", __func__); bits &= ~BITS_STOP_MASK; - cp2101_set_config(port, CP2101_BITS, &bits, 2); + cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2); break; } - cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16); + cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16); if (modem_ctl[0] & 0x0008) { dbg("%s - flow control = CRTSCTS", __func__); cflag |= CRTSCTS; @@ -568,7 +587,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port, *cflagp = cflag; } -static void cp2101_set_termios(struct tty_struct *tty, +static void cp210x_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios) { unsigned int cflag, old_cflag; @@ -583,13 +602,13 @@ static void cp2101_set_termios(struct tty_struct *tty, tty->termios->c_cflag &= ~CMSPAR; cflag = tty->termios->c_cflag; old_cflag = old_termios->c_cflag; - baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty)); + baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty)); /* If the baud rate is to be updated*/ if (baud != tty_termios_baud_rate(old_termios) && baud != 0) { dbg("%s - Setting baud rate to %d baud", __func__, baud); - if (cp2101_set_config_single(port, CP2101_BAUDRATE, + if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV, ((BAUD_RATE_GEN_FREQ + baud/2) / baud))) { dbg("Baud rate requested not supported by device\n"); baud = tty_termios_baud_rate(old_termios); @@ -600,7 +619,7 @@ static void cp2101_set_termios(struct tty_struct *tty, /* If the number of data bits is to be updated */ if ((cflag & CSIZE) != (old_cflag & CSIZE)) { - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); bits &= ~BITS_DATA_MASK; switch (cflag & CSIZE) { case CS5: @@ -624,19 +643,19 @@ static void cp2101_set_termios(struct tty_struct *tty, dbg("%s - data bits = 9", __func__); break;*/ default: - dbg("cp2101 driver does not " + dbg("cp210x driver does not " "support the number of bits requested," " using 8 bit mode\n"); bits |= BITS_DATA_8; break; } - if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) + if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2)) dbg("Number of data bits requested " "not supported by device\n"); } if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) { - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); bits &= ~BITS_PARITY_MASK; if (cflag & PARENB) { if (cflag & PARODD) { @@ -647,13 +666,13 @@ static void cp2101_set_termios(struct tty_struct *tty, dbg("%s - parity = EVEN", __func__); } } - if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) + if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2)) dbg("Parity mode not supported " "by device\n"); } if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) { - cp2101_get_config(port, CP2101_BITS, &bits, 2); + cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2); bits &= ~BITS_STOP_MASK; if (cflag & CSTOPB) { bits |= BITS_STOP_2; @@ -662,13 +681,13 @@ static void cp2101_set_termios(struct tty_struct *tty, bits |= BITS_STOP_1; dbg("%s - stop bits = 1", __func__); } - if (cp2101_set_config(port, CP2101_BITS, &bits, 2)) + if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2)) dbg("Number of stop bits requested " "not supported by device\n"); } if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) { - cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16); + cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16); dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x", __func__, modem_ctl[0], modem_ctl[1], modem_ctl[2], modem_ctl[3]); @@ -688,19 +707,19 @@ static void cp2101_set_termios(struct tty_struct *tty, dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x", __func__, modem_ctl[0], modem_ctl[1], modem_ctl[2], modem_ctl[3]); - cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16); + cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16); } } -static int cp2101_tiocmset (struct tty_struct *tty, struct file *file, +static int cp210x_tiocmset (struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; - return cp2101_tiocmset_port(port, file, set, clear); + return cp210x_tiocmset_port(port, file, set, clear); } -static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file, +static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file, unsigned int set, unsigned int clear) { unsigned int control = 0; @@ -726,10 +745,10 @@ static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file, dbg("%s - control = 0x%.4x", __func__, control); - return cp2101_set_config(port, CP2101_CONTROL, &control, 2); + return cp210x_set_config(port, CP210X_SET_MHS, &control, 2); } -static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) +static int cp210x_tiocmget (struct tty_struct *tty, struct file *file) { struct usb_serial_port *port = tty->driver_data; unsigned int control; @@ -737,7 +756,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) dbg("%s - port %d", __func__, port->number); - cp2101_get_config(port, CP2101_CONTROL, &control, 1); + cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1); result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0) |((control & CONTROL_RTS) ? TIOCM_RTS : 0) @@ -751,7 +770,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file) return result; } -static void cp2101_break_ctl (struct tty_struct *tty, int break_state) +static void cp210x_break_ctl (struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; unsigned int state; @@ -763,17 +782,17 @@ static void cp2101_break_ctl (struct tty_struct *tty, int break_state) state = BREAK_ON; dbg("%s - turning break %s", __func__, state == BREAK_OFF ? "off" : "on"); - cp2101_set_config(port, CP2101_BREAK, &state, 2); + cp210x_set_config(port, CP210X_SET_BREAK, &state, 2); } -static int cp2101_startup(struct usb_serial *serial) +static int cp210x_startup(struct usb_serial *serial) { - /* CP2101 buffers behave strangely unless device is reset */ + /* cp210x buffers behave strangely unless device is reset */ usb_reset_device(serial->dev); return 0; } -static void cp2101_shutdown(struct usb_serial *serial) +static void cp210x_shutdown(struct usb_serial *serial) { int i; @@ -781,21 +800,21 @@ static void cp2101_shutdown(struct usb_serial *serial) /* Stop reads and writes on all ports */ for (i = 0; i < serial->num_ports; ++i) - cp2101_cleanup(serial->port[i]); + cp210x_cleanup(serial->port[i]); } -static int __init cp2101_init(void) +static int __init cp210x_init(void) { int retval; - retval = usb_serial_register(&cp2101_device); + retval = usb_serial_register(&cp210x_device); if (retval) return retval; /* Failed to register */ - retval = usb_register(&cp2101_driver); + retval = usb_register(&cp210x_driver); if (retval) { /* Failed to register */ - usb_serial_deregister(&cp2101_device); + usb_serial_deregister(&cp210x_device); return retval; } @@ -805,14 +824,14 @@ static int __init cp2101_init(void) return 0; } -static void __exit cp2101_exit(void) +static void __exit cp210x_exit(void) { - usb_deregister(&cp2101_driver); - usb_serial_deregister(&cp2101_device); + usb_deregister(&cp210x_driver); + usb_serial_deregister(&cp210x_device); } -module_init(cp2101_init); -module_exit(cp2101_exit); +module_init(cp210x_init); +module_exit(cp210x_exit); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_VERSION(DRIVER_VERSION); diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c index dd501bb63ed..933ba913e66 100644 --- a/drivers/usb/serial/cyberjack.c +++ b/drivers/usb/serial/cyberjack.c @@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial); static void cyberjack_shutdown(struct usb_serial *serial); static int cyberjack_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void cyberjack_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void cyberjack_close(struct usb_serial_port *port); static int cyberjack_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int cyberjack_write_room(struct tty_struct *tty); @@ -185,8 +184,7 @@ static int cyberjack_open(struct tty_struct *tty, return result; } -static void cyberjack_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void cyberjack_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index e568710b263..669f9384853 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -174,8 +174,8 @@ static int cypress_ca42v2_startup(struct usb_serial *serial); static void cypress_shutdown(struct usb_serial *serial); static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void cypress_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void cypress_close(struct usb_serial_port *port); +static void cypress_dtr_rts(struct usb_serial_port *port, int on); static int cypress_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void cypress_send(struct usb_serial_port *port); @@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = { .shutdown = cypress_shutdown, .open = cypress_open, .close = cypress_close, + .dtr_rts = cypress_dtr_rts, .write = cypress_write, .write_room = cypress_write_room, .ioctl = cypress_ioctl, @@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty, priv->rx_flags = 0; spin_unlock_irqrestore(&priv->lock, flags); - /* raise both lines and set termios */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = CONTROL_DTR | CONTROL_RTS; - priv->cmd_ctrl = 1; - spin_unlock_irqrestore(&priv->lock, flags); + /* Set termios */ result = cypress_write(tty, port, NULL, 0); if (result) { @@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty, __func__, result); cypress_set_dead(port); } - + port->port.drain_delay = 256; return result; } /* cypress_open */ +static void cypress_dtr_rts(struct usb_serial_port *port, int on) +{ + struct cypress_private *priv = usb_get_serial_port_data(port); + /* drop dtr and rts */ + priv = usb_get_serial_port_data(port); + spin_lock_irq(&priv->lock); + if (on == 0) + priv->line_control = 0; + else + priv->line_control = CONTROL_DTR | CONTROL_RTS; + priv->cmd_ctrl = 1; + spin_unlock_irq(&priv->lock); + cypress_write(NULL, port, NULL, 0); +} -static void cypress_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void cypress_close(struct usb_serial_port *port) { struct cypress_private *priv = usb_get_serial_port_data(port); - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from buffer */ - spin_lock_irq(&priv->lock); - timeout = CYPRESS_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (cypress_buf_data_avail(priv->buf) == 0 - || timeout == 0 || signal_pending(current) - /* without mutex, allowed due to harmless failure mode */ - || port->serial->disconnected) - break; - spin_unlock_irq(&priv->lock); - timeout = schedule_timeout(timeout); - spin_lock_irq(&priv->lock); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - /* clear out any remaining data in the buffer */ - cypress_buf_clear(priv->buf); - spin_unlock_irq(&priv->lock); - /* writing is potentially harmful, lock must be taken */ mutex_lock(&port->serial->disc_mutex); if (port->serial->disconnected) { mutex_unlock(&port->serial->disc_mutex); return; } - /* wait for characters to drain from device */ - if (tty) { - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ * 2560) / bps, HZ / 10); - else - timeout = 2 * HZ; - schedule_timeout_interruptible(timeout); - } - + cypress_buf_clear(priv->buf); dbg("%s - stopping urbs", __func__); usb_kill_urb(port->interrupt_in_urb); usb_kill_urb(port->interrupt_out_urb); - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop dtr and rts */ - priv = usb_get_serial_port_data(port); - spin_lock_irq(&priv->lock); - priv->line_control = 0; - priv->cmd_ctrl = 1; - spin_unlock_irq(&priv->lock); - cypress_write(tty, port, NULL, 0); - } - } if (stats) dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n", diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index 38ba4ea8b6b..30f5140eff0 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -422,7 +422,6 @@ struct digi_port { int dp_throttled; int dp_throttle_restart; wait_queue_head_t dp_flush_wait; - int dp_in_close; /* close in progress */ wait_queue_head_t dp_close_wait; /* wait queue for close */ struct work_struct dp_wakeup_work; struct usb_serial_port *dp_port; @@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty); static int digi_chars_in_buffer(struct tty_struct *tty); static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void digi_close(struct usb_serial_port *port); +static int digi_carrier_raised(struct usb_serial_port *port); +static void digi_dtr_rts(struct usb_serial_port *port, int on); static int digi_startup_device(struct usb_serial *serial); static int digi_startup(struct usb_serial *serial); static void digi_shutdown(struct usb_serial *serial); @@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = { .num_ports = 3, .open = digi_open, .close = digi_close, + .dtr_rts = digi_dtr_rts, + .carrier_raised = digi_carrier_raised, .write = digi_write, .write_room = digi_write_room, .write_bulk_callback = digi_write_bulk_callback, @@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty) } +static void digi_dtr_rts(struct usb_serial_port *port, int on) +{ + /* Adjust DTR and RTS */ + digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1); +} + +static int digi_carrier_raised(struct usb_serial_port *port) +{ + struct digi_port *priv = usb_get_serial_port_data(port); + if (priv->dp_modem_signals & TIOCM_CD) + return 1; + return 0; +} static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) @@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, unsigned char buf[32]; struct digi_port *priv = usb_get_serial_port_data(port); struct ktermios not_termios; - unsigned long flags = 0; dbg("digi_open: TOP: port=%d, open_count=%d", priv->dp_port_num, port->port.count); @@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, if (digi_startup_device(port->serial) != 0) return -ENXIO; - spin_lock_irqsave(&priv->dp_port_lock, flags); - - /* don't wait on a close in progress for non-blocking opens */ - if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) { - spin_unlock_irqrestore(&priv->dp_port_lock, flags); - return -EAGAIN; - } - - /* wait for a close in progress to finish */ - while (priv->dp_in_close) { - cond_wait_interruptible_timeout_irqrestore( - &priv->dp_close_wait, DIGI_RETRY_TIMEOUT, - &priv->dp_port_lock, flags); - if (signal_pending(current)) - return -EINTR; - spin_lock_irqsave(&priv->dp_port_lock, flags); - } - - spin_unlock_irqrestore(&priv->dp_port_lock, flags); - /* read modem signals automatically whenever they change */ buf[0] = DIGI_CMD_READ_INPUT_SIGNALS; buf[1] = priv->dp_port_num; @@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port, not_termios.c_iflag = ~tty->termios->c_iflag; digi_set_termios(tty, port, ¬_termios); } - - /* set DTR and RTS */ - digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1); - return 0; } -static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void digi_close(struct usb_serial_port *port) { DEFINE_WAIT(wait); int ret; @@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, if (port->serial->disconnected) goto exit; - /* do cleanup only after final close on this port */ - spin_lock_irq(&priv->dp_port_lock); - priv->dp_in_close = 1; - spin_unlock_irq(&priv->dp_port_lock); - - /* tell line discipline to process only XON/XOFF */ - tty->closing = 1; - - /* wait for output to drain */ - if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) - tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT); - - /* flush driver and line discipline buffers */ - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - if (port->serial->dev) { - /* wait for transmit idle */ - if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) - digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT); - /* drop DTR and RTS */ - digi_set_modem_signals(port, 0, 0); + /* FIXME: Transmit idle belongs in the wait_unti_sent path */ + digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT); /* disable input flow control */ buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL; @@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port, /* shutdown any outstanding bulk writes */ usb_kill_urb(port->write_urb); } - tty->closing = 0; exit: spin_lock_irq(&priv->dp_port_lock); priv->dp_write_urb_in_use = 0; - priv->dp_in_close = 0; wake_up_interruptible(&priv->dp_close_wait); spin_unlock_irq(&priv->dp_port_lock); mutex_unlock(&port->serial->disc_mutex); @@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial) priv->dp_throttled = 0; priv->dp_throttle_restart = 0; init_waitqueue_head(&priv->dp_flush_wait); - priv->dp_in_close = 0; init_waitqueue_head(&priv->dp_close_wait); INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock); priv->dp_port = serial->port[i]; diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c index c709ec474a8..2b141ccb0cd 100644 --- a/drivers/usb/serial/empeg.c +++ b/drivers/usb/serial/empeg.c @@ -81,8 +81,7 @@ static int debug; /* function prototypes for an empeg-car player */ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void empeg_close(struct usb_serial_port *port); static int empeg_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); @@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port, } -static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void empeg_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index d9fcdaedf38..683304d6061 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -89,6 +89,7 @@ struct ftdi_private { int force_rtscts; /* if non-zero, force RTS-CTS to always be enabled */ + unsigned int latency; /* latency setting in use */ spinlock_t tx_lock; /* spinlock for transmit state */ unsigned long tx_bytes; unsigned long tx_outstanding_bytes; @@ -719,8 +720,8 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port); static int ftdi_sio_port_remove(struct usb_serial_port *port); static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void ftdi_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void ftdi_close(struct usb_serial_port *port); +static void ftdi_dtr_rts(struct usb_serial_port *port, int on); static int ftdi_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int ftdi_write_room(struct tty_struct *tty); @@ -758,6 +759,7 @@ static struct usb_serial_driver ftdi_sio_device = { .port_remove = ftdi_sio_port_remove, .open = ftdi_open, .close = ftdi_close, + .dtr_rts = ftdi_dtr_rts, .throttle = ftdi_throttle, .unthrottle = ftdi_unthrottle, .write = ftdi_write, @@ -1037,7 +1039,54 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port) return rv; } +static int write_latency_timer(struct usb_serial_port *port) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + struct usb_device *udev = port->serial->dev; + char buf[1]; + int rv = 0; + int l = priv->latency; + + if (priv->flags & ASYNC_LOW_LATENCY) + l = 1; + + dbg("%s: setting latency timer = %i", __func__, l); + + rv = usb_control_msg(udev, + usb_sndctrlpipe(udev, 0), + FTDI_SIO_SET_LATENCY_TIMER_REQUEST, + FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, + l, priv->interface, + buf, 0, WDR_TIMEOUT); + + if (rv < 0) + dev_err(&port->dev, "Unable to write latency timer: %i\n", rv); + return rv; +} + +static int read_latency_timer(struct usb_serial_port *port) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + struct usb_device *udev = port->serial->dev; + unsigned short latency = 0; + int rv = 0; + + dbg("%s", __func__); + + rv = usb_control_msg(udev, + usb_rcvctrlpipe(udev, 0), + FTDI_SIO_GET_LATENCY_TIMER_REQUEST, + FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, + 0, priv->interface, + (char *) &latency, 1, WDR_TIMEOUT); + + if (rv < 0) { + dev_err(&port->dev, "Unable to read latency timer: %i\n", rv); + return -EIO; + } + return latency; +} static int get_serial_info(struct usb_serial_port *port, struct serial_struct __user *retinfo) @@ -1097,6 +1146,7 @@ static int set_serial_info(struct tty_struct *tty, priv->custom_divisor = new_serial.custom_divisor; tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + write_latency_timer(port); check_and_exit: if ((old_priv.flags & ASYNC_SPD_MASK) != @@ -1192,27 +1242,13 @@ static ssize_t show_latency_timer(struct device *dev, { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); - struct usb_device *udev = port->serial->dev; - unsigned short latency = 0; - int rv = 0; - - - dbg("%s", __func__); - - rv = usb_control_msg(udev, - usb_rcvctrlpipe(udev, 0), - FTDI_SIO_GET_LATENCY_TIMER_REQUEST, - FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE, - 0, priv->interface, - (char *) &latency, 1, WDR_TIMEOUT); - - if (rv < 0) { - dev_err(dev, "Unable to read latency timer: %i\n", rv); - return -EIO; - } - return sprintf(buf, "%i\n", latency); + if (priv->flags & ASYNC_LOW_LATENCY) + return sprintf(buf, "1\n"); + else + return sprintf(buf, "%i\n", priv->latency); } + /* Write a new value of the latency timer, in units of milliseconds. */ static ssize_t store_latency_timer(struct device *dev, struct device_attribute *attr, const char *valbuf, @@ -1220,25 +1256,13 @@ static ssize_t store_latency_timer(struct device *dev, { struct usb_serial_port *port = to_usb_serial_port(dev); struct ftdi_private *priv = usb_get_serial_port_data(port); - struct usb_device *udev = port->serial->dev; - char buf[1]; int v = simple_strtoul(valbuf, NULL, 10); int rv = 0; - dbg("%s: setting latency timer = %i", __func__, v); - - rv = usb_control_msg(udev, - usb_sndctrlpipe(udev, 0), - FTDI_SIO_SET_LATENCY_TIMER_REQUEST, - FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE, - v, priv->interface, - buf, 0, WDR_TIMEOUT); - - if (rv < 0) { - dev_err(dev, "Unable to write latency timer: %i\n", rv); + priv->latency = v; + rv = write_latency_timer(port); + if (rv < 0) return -EIO; - } - return count; } @@ -1392,6 +1416,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port) usb_set_serial_port_data(port, priv); ftdi_determine_type(port); + read_latency_timer(port); create_sysfs_attrs(port); return 0; } @@ -1514,6 +1539,8 @@ static int ftdi_open(struct tty_struct *tty, if (tty) tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0; + write_latency_timer(port); + /* No error checking for this (will get errors later anyway) */ /* See ftdi_sio.h for description of what is reset */ usb_control_msg(dev, usb_sndctrlpipe(dev, 0), @@ -1529,11 +1556,6 @@ static int ftdi_open(struct tty_struct *tty, if (tty) ftdi_set_termios(tty, port, tty->termios); - /* FIXME: Flow control might be enabled, so it should be checked - - we have no control of defaults! */ - /* Turn on RTS and DTR since we are not flow controlling by default */ - set_mctrl(port, TIOCM_DTR | TIOCM_RTS); - /* Not throttled */ spin_lock_irqsave(&priv->rx_lock, flags); priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED); @@ -1558,6 +1580,30 @@ static int ftdi_open(struct tty_struct *tty, } /* ftdi_open */ +static void ftdi_dtr_rts(struct usb_serial_port *port, int on) +{ + struct ftdi_private *priv = usb_get_serial_port_data(port); + char buf[1]; + + mutex_lock(&port->serial->disc_mutex); + if (!port->serial->disconnected) { + /* Disable flow control */ + if (!on && usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + FTDI_SIO_SET_FLOW_CTRL_REQUEST, + FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, + 0, priv->interface, buf, 0, + WDR_TIMEOUT) < 0) { + dev_err(&port->dev, "error from flowcontrol urb\n"); + } + /* drop RTS and DTR */ + if (on) + set_mctrl(port, TIOCM_DTR | TIOCM_RTS); + else + clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); + } + mutex_unlock(&port->serial->disc_mutex); +} /* * usbserial:__serial_close only calls ftdi_close if the point is open @@ -1567,31 +1613,12 @@ static int ftdi_open(struct tty_struct *tty, * */ -static void ftdi_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ftdi_close(struct usb_serial_port *port) { /* ftdi_close */ - unsigned int c_cflag = tty->termios->c_cflag; struct ftdi_private *priv = usb_get_serial_port_data(port); - char buf[1]; dbg("%s", __func__); - mutex_lock(&port->serial->disc_mutex); - if (c_cflag & HUPCL && !port->serial->disconnected) { - /* Disable flow control */ - if (usb_control_msg(port->serial->dev, - usb_sndctrlpipe(port->serial->dev, 0), - FTDI_SIO_SET_FLOW_CTRL_REQUEST, - FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE, - 0, priv->interface, buf, 0, - WDR_TIMEOUT) < 0) { - dev_err(&port->dev, "error from flowcontrol urb\n"); - } - - /* drop RTS and DTR */ - clear_mctrl(port, TIOCM_DTR | TIOCM_RTS); - } /* Note change no line if hupcl is off */ - mutex_unlock(&port->serial->disc_mutex); /* cancel any scheduled reading */ cancel_delayed_work_sync(&priv->rx_work); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 586d30ff450..ee25a3fe3b0 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty, } -static void garmin_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void garmin_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct garmin_data *garmin_data_p = usb_get_serial_port_data(port); diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 4cec9906ccf..be82ea95672 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial) } EXPORT_SYMBOL_GPL(usb_serial_generic_resume); -void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +void usb_serial_generic_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); generic_cleanup(port); diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index fb4a73d090f..53ef5996e33 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb); /* function prototypes for the usbserial callbacks */ static int edge_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void edge_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void edge_close(struct usb_serial_port *port); static int edge_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int edge_write_room(struct tty_struct *tty); @@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty, if (!edge_port->txfifo.fifo) { dbg("%s - no memory", __func__); - edge_close(tty, port, filp); + edge_close(port); return -ENOMEM; } @@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty, if (!edge_port->write_urb) { dbg("%s - no memory", __func__); - edge_close(tty, port, filp); + edge_close(port); return -ENOMEM; } @@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port) * edge_close * this function is called by the tty driver when a port is closed *****************************************************************************/ -static void edge_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c index 513b25e044c..eabf20eeb37 100644 --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c @@ -2009,8 +2009,7 @@ release_es_lock: return status; } -static void edge_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void edge_close(struct usb_serial_port *port) { struct edgeport_serial *edge_serial; struct edgeport_port *edge_port; diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c index cd62825a9ac..c610a99fa47 100644 --- a/drivers/usb/serial/ipaq.c +++ b/drivers/usb/serial/ipaq.c @@ -76,8 +76,7 @@ static int initial_wait; /* Function prototypes for an ipaq */ static int ipaq_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void ipaq_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void ipaq_close(struct usb_serial_port *port); static int ipaq_calc_num_ports(struct usb_serial *serial); static int ipaq_startup(struct usb_serial *serial); static void ipaq_shutdown(struct usb_serial *serial); @@ -714,8 +713,7 @@ error: } -static void ipaq_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ipaq_close(struct usb_serial_port *port) { struct ipaq_private *priv = usb_get_serial_port_data(port); diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c index da2a2b46644..29ad038b9c8 100644 --- a/drivers/usb/serial/ipw.c +++ b/drivers/usb/serial/ipw.c @@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty, return 0; } -static void ipw_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void ipw_dtr_rts(struct usb_serial_port *port, int on) { struct usb_device *dev = port->serial->dev; int result; - if (tty_hung_up_p(filp)) { - dbg("%s: tty_hung_up_p ...", __func__); - return; - } - /*--1: drop the dtr */ dbg("%s:dropping dtr", __func__); result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), IPW_SIO_SET_PIN, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - IPW_PIN_CLRDTR, + on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR, 0, NULL, 0, @@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty, result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), IPW_SIO_SET_PIN, USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT, - IPW_PIN_CLRRTS, + on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS, 0, NULL, 0, @@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty, if (result < 0) dev_err(&port->dev, "dropping rts failed (error = %d)\n", result); +} +static void ipw_close(struct usb_serial_port *port) +{ + struct usb_device *dev = port->serial->dev; + int result; /*--3: purge */ dbg("%s:sending purge", __func__); @@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = { .num_ports = 1, .open = ipw_open, .close = ipw_close, + .dtr_rts = ipw_dtr_rts, .port_probe = ipw_probe, .port_remove = ipw_disconnect, .write = ipw_write, diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index 4e2cda93da5..66009b6b763 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -88,8 +88,7 @@ static int xbof = -1; static int ir_startup (struct usb_serial *serial); static int ir_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filep); -static void ir_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filep); +static void ir_close(struct usb_serial_port *port); static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void ir_write_bulk_callback (struct urb *urb); @@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty, return result; } -static void ir_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file * filp) +static void ir_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c index 4473d442b2a..76a3cc327bb 100644 --- a/drivers/usb/serial/iuu_phoenix.c +++ b/drivers/usb/serial/iuu_phoenix.c @@ -40,7 +40,7 @@ static int debug; /* * Version Information */ -#define DRIVER_VERSION "v0.5" +#define DRIVER_VERSION "v0.10" #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver" static struct usb_device_id id_table[] = { @@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb); struct iuu_private { spinlock_t lock; /* store irq state */ wait_queue_head_t delta_msr_wait; - u8 line_control; u8 line_status; u8 termios_initialized; int tiostatus; /* store IUART SIGNAL for tiocmget call */ @@ -651,32 +650,33 @@ static int iuu_bulk_write(struct usb_serial_port *port) unsigned long flags; int result; int i; + int buf_len; char *buf_ptr = port->write_urb->transfer_buffer; dbg("%s - enter", __func__); + spin_lock_irqsave(&priv->lock, flags); *buf_ptr++ = IUU_UART_ESC; *buf_ptr++ = IUU_UART_TX; *buf_ptr++ = priv->writelen; - memcpy(buf_ptr, priv->writebuf, - priv->writelen); + memcpy(buf_ptr, priv->writebuf, priv->writelen); + buf_len = priv->writelen; + priv->writelen = 0; + spin_unlock_irqrestore(&priv->lock, flags); if (debug == 1) { - for (i = 0; i < priv->writelen; i++) + for (i = 0; i < buf_len; i++) sprintf(priv->dbgbuf + i*2 , "%02X", priv->writebuf[i]); - priv->dbgbuf[priv->writelen+i*2] = 0; + priv->dbgbuf[buf_len+i*2] = 0; dbg("%s - writing %i chars : %s", __func__, - priv->writelen, priv->dbgbuf); + buf_len, priv->dbgbuf); } usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), - port->write_urb->transfer_buffer, priv->writelen + 3, + port->write_urb->transfer_buffer, buf_len + 3, iuu_rxcmd, port); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); - spin_lock_irqsave(&priv->lock, flags); - priv->writelen = 0; - spin_unlock_irqrestore(&priv->lock, flags); usb_serial_port_softint(port); return result; } @@ -770,14 +770,10 @@ static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port, return -ENOMEM; spin_lock_irqsave(&priv->lock, flags); - if (priv->writelen > 0) { - /* buffer already filled but not commited */ - spin_unlock_irqrestore(&priv->lock, flags); - return 0; - } + /* fill the buffer */ - memcpy(priv->writebuf, buf, count); - priv->writelen = count; + memcpy(priv->writebuf + priv->writelen, buf, count); + priv->writelen += count; spin_unlock_irqrestore(&priv->lock, flags); return count; @@ -819,7 +815,7 @@ static int iuu_uart_on(struct usb_serial_port *port) buf[0] = IUU_UART_ENABLE; buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF); buf[2] = (u8) (0x00FF & IUU_BAUD_9600); - buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN); + buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN); status = bulk_immediate(port, buf, 4); if (status != IUU_OPERATION_OK) { @@ -946,19 +942,59 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud, return status; } -static int set_control_lines(struct usb_device *dev, u8 value) +static void iuu_set_termios(struct tty_struct *tty, + struct usb_serial_port *port, struct ktermios *old_termios) { - return 0; + const u32 supported_mask = CMSPAR|PARENB|PARODD; + + unsigned int cflag = tty->termios->c_cflag; + int status; + u32 actual; + u32 parity; + int csize = CS7; + int baud = 9600; /* Fixed for the moment */ + u32 newval = cflag & supported_mask; + + /* compute the parity parameter */ + parity = 0; + if (cflag & CMSPAR) { /* Using mark space */ + if (cflag & PARODD) + parity |= IUU_PARITY_SPACE; + else + parity |= IUU_PARITY_MARK; + } else if (!(cflag & PARENB)) { + parity |= IUU_PARITY_NONE; + csize = CS8; + } else if (cflag & PARODD) + parity |= IUU_PARITY_ODD; + else + parity |= IUU_PARITY_EVEN; + + parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT); + + /* set it */ + status = iuu_uart_baud(port, + (clockmode == 2) ? 16457 : 9600 * boost / 100, + &actual, parity); + + /* set the termios value to the real one, so the user now what has + * changed. We support few fields so its easies to copy the old hw + * settings back over and then adjust them + */ + if (old_termios) + tty_termios_copy_hw(tty->termios, old_termios); + if (status != 0) /* Set failed - return old bits */ + return; + /* Re-encode speed, parity and csize */ + tty_encode_baud_rate(tty, baud, baud); + tty->termios->c_cflag &= ~(supported_mask|CSIZE); + tty->termios->c_cflag |= newval | csize; } -static void iuu_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void iuu_close(struct usb_serial_port *port) { /* iuu_led (port,255,0,0,0); */ struct usb_serial *serial; - struct iuu_private *priv = usb_get_serial_port_data(port); - unsigned long flags; - unsigned int c_cflag; serial = port->serial; if (!serial) @@ -968,17 +1004,6 @@ static void iuu_close(struct tty_struct *tty, iuu_uart_off(port); if (serial->dev) { - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - priv = usb_get_serial_port_data(port); - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - set_control_lines(port->serial->dev, 0); - } - } /* free writebuf */ /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); @@ -1154,7 +1179,7 @@ static int iuu_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting read urb," " error %d\n", __func__, result); - iuu_close(tty, port, NULL); + iuu_close(port); return -EPROTO; } else { dbg("%s - rxcmd OK", __func__); @@ -1175,6 +1200,7 @@ static struct usb_serial_driver iuu_device = { .read_bulk_callback = iuu_uart_read_callback, .tiocmget = iuu_tiocmget, .tiocmset = iuu_tiocmset, + .set_termios = iuu_set_termios, .attach = iuu_startup, .shutdown = iuu_shutdown, }; diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 00daa8f7759..f1195a98f31 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb) usb_kill_urb(urb); } -static void keyspan_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void keyspan_dtr_rts(struct usb_serial_port *port, int on) +{ + struct keyspan_port_private *p_priv = usb_get_serial_port_data(port); + + p_priv->rts_state = on; + p_priv->dtr_state = on; + keyspan_send_setup(port, 0); +} + +static void keyspan_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; @@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty, stop_urb(p_priv->out_urbs[i]); } } - tty_port_tty_set(&port->port, NULL); } /* download the firmware to a pre-renumeration device */ diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h index 38b4582e073..0d4569b6076 100644 --- a/drivers/usb/serial/keyspan.h +++ b/drivers/usb/serial/keyspan.h @@ -38,9 +38,8 @@ static int keyspan_open (struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void keyspan_close (struct tty_struct *tty, - struct usb_serial_port *port, - struct file *filp); +static void keyspan_close (struct usb_serial_port *port); +static void keyspan_dtr_rts (struct usb_serial_port *port, int on); static int keyspan_startup (struct usb_serial *serial); static void keyspan_shutdown (struct usb_serial *serial); static int keyspan_write_room (struct tty_struct *tty); @@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = { .num_ports = 1, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, @@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = { .num_ports = 2, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, @@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = { .num_ports = 4, .open = keyspan_open, .close = keyspan_close, + .dtr_rts = keyspan_dtr_rts, .write = keyspan_write, .write_room = keyspan_write_room, .set_termios = keyspan_set_termios, diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c index bf1ae247da6..ab769dbea1b 100644 --- a/drivers/usb/serial/keyspan_pda.c +++ b/drivers/usb/serial/keyspan_pda.c @@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty) } +static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on) +{ + struct usb_serial *serial = port->serial; + + if (serial->dev) { + if (on) + keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2)); + else + keyspan_pda_set_modem_info(serial, 0); + } +} + +static int keyspan_pda_carrier_raised(struct usb_serial_port *port) +{ + struct usb_serial *serial = port->serial; + unsigned char modembits; + + /* If we can read the modem status and the DCD is low then + carrier is not raised yet */ + if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) { + if (!(modembits & (1>>6))) + return 0; + } + /* Carrier raised, or we failed (eg disconnected) so + progress accordingly */ + return 1; +} + + static int keyspan_pda_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp) { @@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty, priv->tx_room = room; priv->tx_throttled = room ? 0 : 1; - /* the normal serial device seems to always turn on DTR and RTS here, - so do the same */ - if (tty && (tty->termios->c_cflag & CBAUD)) - keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2)); - else - keyspan_pda_set_modem_info(serial, 0); - /*Start reading from the device*/ port->interrupt_in_urb->dev = serial->dev; rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); @@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty, error: return rc; } - - -static void keyspan_pda_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void keyspan_pda_close(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; if (serial->dev) { - /* the normal serial device seems to always shut - off DTR and RTS now */ - if (tty->termios->c_cflag & HUPCL) - keyspan_pda_set_modem_info(serial, 0); - /* shutdown our bulk reads and writes */ usb_kill_urb(port->write_urb); usb_kill_urb(port->interrupt_in_urb); @@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = { .usb_driver = &keyspan_pda_driver, .id_table = id_table_std, .num_ports = 1, + .dtr_rts = keyspan_pda_dtr_rts, + .carrier_raised = keyspan_pda_carrier_raised, .open = keyspan_pda_open, .close = keyspan_pda_close, .write = keyspan_pda_write, diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c index fcd9082f3e7..fa817c66b3e 100644 --- a/drivers/usb/serial/kl5kusb105.c +++ b/drivers/usb/serial/kl5kusb105.c @@ -76,8 +76,7 @@ static int klsi_105_startup(struct usb_serial *serial); static void klsi_105_shutdown(struct usb_serial *serial); static int klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void klsi_105_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void klsi_105_close(struct usb_serial_port *port); static int klsi_105_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static void klsi_105_write_bulk_callback(struct urb *urb); @@ -447,8 +446,7 @@ exit: } /* klsi_105_open */ -static void klsi_105_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void klsi_105_close(struct usb_serial_port *port) { struct klsi_105_private *priv = usb_get_serial_port_data(port); int rc; diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c index c148544953b..6b570498287 100644 --- a/drivers/usb/serial/kobil_sct.c +++ b/drivers/usb/serial/kobil_sct.c @@ -72,8 +72,7 @@ static int kobil_startup(struct usb_serial *serial); static void kobil_shutdown(struct usb_serial *serial); static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void kobil_close(struct usb_serial_port *port); static int kobil_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int kobil_write_room(struct tty_struct *tty); @@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial) for (i = 0; i < serial->num_ports; ++i) { while (serial->port[i]->port.count > 0) - kobil_close(NULL, serial->port[i], NULL); + kobil_close(serial->port[i]); kfree(usb_get_serial_port_data(serial->port[i])); usb_set_serial_port_data(serial->port[i], NULL); } @@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty, } -static void kobil_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void kobil_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); + /* FIXME: Add rts/dtr methods */ if (port->write_urb) { usb_kill_urb(port->write_urb); usb_free_urb(port->write_urb); diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c index 82930a7d509..873795548fc 100644 --- a/drivers/usb/serial/mct_u232.c +++ b/drivers/usb/serial/mct_u232.c @@ -95,8 +95,8 @@ static int mct_u232_startup(struct usb_serial *serial); static void mct_u232_shutdown(struct usb_serial *serial); static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void mct_u232_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void mct_u232_close(struct usb_serial_port *port); +static void mct_u232_dtr_rts(struct usb_serial_port *port, int on); static void mct_u232_read_int_callback(struct urb *urb); static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); @@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = { .num_ports = 1, .open = mct_u232_open, .close = mct_u232_close, + .dtr_rts = mct_u232_dtr_rts, .throttle = mct_u232_throttle, .unthrottle = mct_u232_unthrottle, .read_int_callback = mct_u232_read_int_callback, @@ -496,29 +497,29 @@ error: return retval; } /* mct_u232_open */ - -static void mct_u232_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mct_u232_dtr_rts(struct usb_serial_port *port, int on) { - unsigned int c_cflag; unsigned int control_state; struct mct_u232_private *priv = usb_get_serial_port_data(port); - dbg("%s port %d", __func__, port->number); - if (tty) { - c_cflag = tty->termios->c_cflag; - mutex_lock(&port->serial->disc_mutex); - if (c_cflag & HUPCL && !port->serial->disconnected) { - /* drop DTR and RTS */ - spin_lock_irq(&priv->lock); + mutex_lock(&port->serial->disc_mutex); + if (!port->serial->disconnected) { + /* drop DTR and RTS */ + spin_lock_irq(&priv->lock); + if (on) + priv->control_state |= TIOCM_DTR | TIOCM_RTS; + else priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS); - control_state = priv->control_state; - spin_unlock_irq(&priv->lock); - mct_u232_set_modem_ctrl(port->serial, control_state); - } - mutex_unlock(&port->serial->disc_mutex); + control_state = priv->control_state; + spin_unlock_irq(&priv->lock); + mct_u232_set_modem_ctrl(port->serial, control_state); } + mutex_unlock(&port->serial->disc_mutex); +} +static void mct_u232_close(struct usb_serial_port *port) +{ + dbg("%s port %d", __func__, port->number); if (port->serial->dev) { /* shutdown our urbs */ diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 24e3b5d4b4d..9e1a013ee7f 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty) return chars; } -static void mos7720_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mos7720_close(struct usb_serial_port *port) { struct usb_serial *serial; struct moschip_port *mos7720_port; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 84fb1dcd30d..10b78a37214 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty) } -/************************************************************************ - * - * mos7840_block_until_tx_empty - * - * This function will block the close until one of the following: - * 1. TX count are 0 - * 2. The mos7840 has stopped - * 3. A timeout of 3 seconds without activity has expired - * - ************************************************************************/ -static void mos7840_block_until_tx_empty(struct tty_struct *tty, - struct moschip_port *mos7840_port) -{ - int timeout = HZ / 10; - int wait = 30; - int count; - - while (1) { - - count = mos7840_chars_in_buffer(tty); - - /* Check for Buffer status */ - if (count <= 0) - return; - - /* Block the thread for a while */ - interruptible_sleep_on_timeout(&mos7840_port->wait_chase, - timeout); - - /* No activity.. count down section */ - wait--; - if (wait == 0) { - dbg("%s - TIMEOUT", __func__); - return; - } else { - /* Reset timeout value back to seconds */ - wait = 30; - } - } -} - /***************************************************************************** * mos7840_close * this function is called by the tty driver when a port is closed *****************************************************************************/ -static void mos7840_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void mos7840_close(struct usb_serial_port *port) { struct usb_serial *serial; struct moschip_port *mos7840_port; @@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty, } } - if (serial->dev) - /* flush and block until tx is empty */ - mos7840_block_until_tx_empty(tty, mos7840_port); - /* While closing port, shutdown all bulk read, write * * and interrupt read if they exists */ if (serial->dev) { diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index bcdcbb82270..f5f3751a888 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty, return result; } -static void navman_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void navman_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c index df653971272..1104617334f 100644 --- a/drivers/usb/serial/omninet.c +++ b/drivers/usb/serial/omninet.c @@ -66,8 +66,7 @@ static int debug; /* function prototypes */ static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void omninet_close(struct usb_serial_port *port); static void omninet_read_bulk_callback(struct urb *urb); static void omninet_write_bulk_callback(struct urb *urb); static int omninet_write(struct tty_struct *tty, struct usb_serial_port *port, @@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty, return result; } -static void omninet_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void omninet_close(struct usb_serial_port *port) { dbg("%s - port %d", __func__, port->number); usb_kill_urb(port->read_urb); diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index b500ad10b75..c20480aa975 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port, return result; } -static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void opticon_close(struct usb_serial_port *port) { struct opticon_private *priv = usb_get_serial_data(port->serial); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7817b82889c..a16d69fadba 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -45,8 +45,9 @@ /* Function prototypes */ static int option_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void option_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void option_close(struct usb_serial_port *port); +static void option_dtr_rts(struct usb_serial_port *port, int on); + static int option_startup(struct usb_serial *serial); static void option_shutdown(struct usb_serial *serial); static int option_write_room(struct tty_struct *tty); @@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty, static int option_tiocmget(struct tty_struct *tty, struct file *file); static int option_tiocmset(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); -static int option_send_setup(struct tty_struct *tty, struct usb_serial_port *port); +static int option_send_setup(struct usb_serial_port *port); static int option_suspend(struct usb_serial *serial, pm_message_t message); static int option_resume(struct usb_serial *serial); @@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = { .num_ports = 1, .open = option_open, .close = option_close, + .dtr_rts = option_dtr_rts, .write = option_write, .write_room = option_write_room, .chars_in_buffer = option_chars_in_buffer, @@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty, dbg("%s", __func__); /* Doesn't support option setting */ tty_termios_copy_hw(tty->termios, old_termios); - option_send_setup(tty, port); + option_send_setup(port); } static int option_tiocmget(struct tty_struct *tty, struct file *file) @@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file, portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; - return option_send_setup(tty, port); + return option_send_setup(port); } /* Write */ @@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty, dbg("%s", __func__); - /* Set some sane defaults */ - portdata->rts_state = 1; - portdata->dtr_state = 1; - /* Reset low level data toggle and start reading from endpoints */ for (i = 0; i < N_IN_URB; i++) { urb = portdata->in_urbs[i]; @@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty, usb_pipeout(urb->pipe), 0); */ } - option_send_setup(tty, port); + option_send_setup(port); return 0; } -static void option_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void option_dtr_rts(struct usb_serial_port *port, int on) { - int i; struct usb_serial *serial = port->serial; struct option_port_private *portdata; dbg("%s", __func__); portdata = usb_get_serial_port_data(port); + mutex_lock(&serial->disc_mutex); + portdata->rts_state = on; + portdata->dtr_state = on; + if (serial->dev) + option_send_setup(port); + mutex_unlock(&serial->disc_mutex); +} - portdata->rts_state = 0; - portdata->dtr_state = 0; - if (serial->dev) { - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - option_send_setup(tty, port); - mutex_unlock(&serial->disc_mutex); +static void option_close(struct usb_serial_port *port) +{ + int i; + struct usb_serial *serial = port->serial; + struct option_port_private *portdata; + + dbg("%s", __func__); + portdata = usb_get_serial_port_data(port); + if (serial->dev) { /* Stop reading/writing urbs */ for (i = 0; i < N_IN_URB; i++) usb_kill_urb(portdata->in_urbs[i]); for (i = 0; i < N_OUT_URB; i++) usb_kill_urb(portdata->out_urbs[i]); } - tty_port_tty_set(&port->port, NULL); } /* Helper functions used by option_setup_urbs */ @@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial) * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN * CDC. */ -static int option_send_setup(struct tty_struct *tty, - struct usb_serial_port *port) +static int option_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct option_port_private *portdata; int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber; + int val = 0; dbg("%s", __func__); portdata = usb_get_serial_port_data(port); - if (tty) { - int val = 0; - if (portdata->dtr_state) - val |= 0x01; - if (portdata->rts_state) - val |= 0x02; + if (portdata->dtr_state) + val |= 0x01; + if (portdata->rts_state) + val |= 0x02; - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); - } - return 0; + return usb_control_msg(serial->dev, + usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT); } static int option_startup(struct usb_serial *serial) diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c index ba551f00f16..7de54781fe6 100644 --- a/drivers/usb/serial/oti6858.c +++ b/drivers/usb/serial/oti6858.c @@ -143,8 +143,7 @@ struct oti6858_control_pkt { /* function prototypes */ static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void oti6858_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void oti6858_close(struct usb_serial_port *port); static void oti6858_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old); static int oti6858_ioctl(struct tty_struct *tty, struct file *file, @@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty, if (result != 0) { dev_err(&port->dev, "%s(): usb_submit_urb() failed" " with error %d\n", __func__, result); - oti6858_close(tty, port, NULL); + oti6858_close(port); return -EPROTO; } /* setup termios */ if (tty) oti6858_set_termios(tty, port, &tmp_termios); - + port->port.drain_delay = 256; /* FIXME: check the FIFO length */ return 0; } -static void oti6858_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void oti6858_close(struct usb_serial_port *port) { struct oti6858_private *priv = usb_get_serial_port_data(port); unsigned long flags; - long timeout; - wait_queue_t wait; dbg("%s(port = %d)", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = 30 * HZ; /* PL2303_CLOSING_WAIT */ - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - dbg("%s(): entering wait loop", __func__); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (oti6858_buf_data_avail(priv->buf) == 0 - || timeout == 0 || signal_pending(current) - || port->serial->disconnected) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - dbg("%s(): after wait loop", __func__); - /* clear out any remaining data in the buffer */ oti6858_buf_clear(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device */ - /* (this is long enough for the entire 256 byte */ - /* pl2303 hardware buffer to drain with no flow */ - /* control for data rates of 1200 bps or more, */ - /* for lower rates we should really know how much */ - /* data is in the buffer to compute a delay */ - /* that is not unnecessarily long) */ - /* FIXME - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560)/bps,HZ/10); - else - */ - timeout = 2*HZ; - schedule_timeout_interruptible(timeout); - dbg("%s(): after schedule_timeout_interruptible()", __func__); + dbg("%s(): after buf_clear()", __func__); /* cancel scheduled setup */ cancel_delayed_work(&priv->delayed_setup_work); @@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty, usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - - /* - if (tty && (tty->termios->c_cflag) & HUPCL) { - // drop DTR and RTS - spin_lock_irqsave(&priv->lock, flags); - priv->pending_setup.control &= ~CONTROL_MASK; - spin_unlock_irqrestore(&priv->lock, flags); - } - */ } static int oti6858_tiocmset(struct tty_struct *tty, struct file *file, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index 751a533a434..e02dc3d643c 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty, kfree(buf); } -static void pl2303_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void pl2303_dtr_rts(struct usb_serial_port *port, int on) +{ + struct pl2303_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + u8 control; + + spin_lock_irqsave(&priv->lock, flags); + /* Change DTR and RTS */ + if (on) + priv->line_control |= (CONTROL_DTR | CONTROL_RTS); + else + priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS); + control = priv->line_control; + spin_unlock_irqrestore(&priv->lock, flags); + set_control_lines(port->serial->dev, control); +} + +static void pl2303_close(struct usb_serial_port *port) { struct pl2303_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = PL2303_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (pl2303_buf_data_avail(priv->buf) == 0 || - timeout == 0 || signal_pending(current) || - port->serial->disconnected) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); /* clear out any remaining data in the buffer */ pl2303_buf_clear(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device */ - /* (this is long enough for the entire 256 byte */ - /* pl2303 hardware buffer to drain with no flow */ - /* control for data rates of 1200 bps or more, */ - /* for lower rates we should really know how much */ - /* data is in the buffer to compute a delay */ - /* that is not unnecessarily long) */ - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560)/bps, HZ/10); - else - timeout = 2*HZ; - schedule_timeout_interruptible(timeout); - /* shutdown our urbs */ dbg("%s - shutting down urbs", __func__); usb_kill_urb(port->write_urb); usb_kill_urb(port->read_urb); usb_kill_urb(port->interrupt_in_urb); - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - /* drop DTR and RTS */ - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - set_control_lines(port->serial->dev, 0); - } - } } static int pl2303_open(struct tty_struct *tty, @@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting read urb," " error %d\n", __func__, result); - pl2303_close(tty, port, NULL); + pl2303_close(port); return -EPROTO; } @@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty, if (result) { dev_err(&port->dev, "%s - failed submitting interrupt urb," " error %d\n", __func__, result); - pl2303_close(tty, port, NULL); + pl2303_close(port); return -EPROTO; } + port->port.drain_delay = 256; return 0; } @@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file) return result; } +static int pl2303_carrier_raised(struct usb_serial_port *port) +{ + struct pl2303_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & UART_DCD) + return 1; + return 0; +} + static int wait_modem_info(struct usb_serial_port *port, unsigned int arg) { struct pl2303_private *priv = usb_get_serial_port_data(port); @@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = { .num_ports = 1, .open = pl2303_open, .close = pl2303_close, + .dtr_rts = pl2303_dtr_rts, + .carrier_raised = pl2303_carrier_raised, .write = pl2303_write, .ioctl = pl2303_ioctl, .break_ctl = pl2303_break_ctl, diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 913225c6161..17ac34f4d66 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -26,12 +26,10 @@ #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> -#include <linux/usb/ch9.h> #define SWIMS_USB_REQUEST_SetPower 0x00 #define SWIMS_USB_REQUEST_SetNmea 0x07 -/* per port private data */ #define N_IN_URB 4 #define N_OUT_URB 4 #define IN_BUFLEN 4096 @@ -39,6 +37,12 @@ static int debug; static int nmea; +/* Used in interface blacklisting */ +struct sierra_iface_info { + const u32 infolen; /* number of interface numbers on blacklist */ + const u8 *ifaceinfo; /* pointer to the array holding the numbers */ +}; + static int sierra_set_power_state(struct usb_device *udev, __u16 swiState) { int result; @@ -85,6 +89,23 @@ static int sierra_calc_num_ports(struct usb_serial *serial) return result; } +static int is_blacklisted(const u8 ifnum, + const struct sierra_iface_info *blacklist) +{ + const u8 *info; + int i; + + if (blacklist) { + info = blacklist->ifaceinfo; + + for (i = 0; i < blacklist->infolen; i++) { + if (info[i] == ifnum) + return 1; + } + } + return 0; +} + static int sierra_calc_interface(struct usb_serial *serial) { int interface; @@ -153,9 +174,25 @@ static int sierra_probe(struct usb_serial *serial, */ usb_set_serial_data(serial, (void *)num_ports); + /* ifnum could have changed - by calling usb_set_interface */ + ifnum = sierra_calc_interface(serial); + + if (is_blacklisted(ifnum, + (struct sierra_iface_info *)id->driver_info)) { + dev_dbg(&serial->dev->dev, + "Ignoring blacklisted interface #%d\n", ifnum); + return -ENODEV; + } + return result; } +static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 }; +static const struct sierra_iface_info direct_ip_interface_blacklist = { + .infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces), + .ifaceinfo = direct_ip_non_serial_ifaces, +}; + static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x0017) }, /* Sierra Wireless EM5625 */ { USB_DEVICE(0x1199, 0x0018) }, /* Sierra Wireless MC5720 */ @@ -188,9 +225,11 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x6833) }, /* Sierra Wireless MC8781 */ { USB_DEVICE(0x1199, 0x683A) }, /* Sierra Wireless MC8785 */ { USB_DEVICE(0x1199, 0x683B) }, /* Sierra Wireless MC8785 Composite */ - { USB_DEVICE(0x1199, 0x683C) }, /* Sierra Wireless MC8790 */ - { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8790 */ - { USB_DEVICE(0x1199, 0x683E) }, /* Sierra Wireless MC8790 */ + /* Sierra Wireless MC8790, MC8791, MC8792 Composite */ + { USB_DEVICE(0x1199, 0x683C) }, + { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8791 Composite */ + /* Sierra Wireless MC8790, MC8791, MC8792 */ + { USB_DEVICE(0x1199, 0x683E) }, { USB_DEVICE(0x1199, 0x6850) }, /* Sierra Wireless AirCard 880 */ { USB_DEVICE(0x1199, 0x6851) }, /* Sierra Wireless AirCard 881 */ { USB_DEVICE(0x1199, 0x6852) }, /* Sierra Wireless AirCard 880 E */ @@ -211,6 +250,10 @@ static struct usb_device_id id_table [] = { { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */ { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */ + { USB_DEVICE(0x1199, 0x68A3), /* Sierra Wireless Direct IP modems */ + .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist + }, + { } }; MODULE_DEVICE_TABLE(usb, id_table); @@ -229,7 +272,6 @@ struct sierra_port_private { /* Input endpoints and buffers for this port */ struct urb *in_urbs[N_IN_URB]; - char *in_buffer[N_IN_URB]; /* Settings for the port */ int rts_state; /* Handshaking pins (outputs) */ @@ -240,57 +282,50 @@ struct sierra_port_private { int ri_state; }; -static int sierra_send_setup(struct tty_struct *tty, - struct usb_serial_port *port) +static int sierra_send_setup(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; __u16 interface = 0; + int val = 0; dev_dbg(&port->dev, "%s", __func__); portdata = usb_get_serial_port_data(port); - if (tty) { - int val = 0; - if (portdata->dtr_state) - val |= 0x01; - if (portdata->rts_state) - val |= 0x02; - - /* If composite device then properly report interface */ - if (serial->num_ports == 1) { - interface = sierra_calc_interface(serial); - - /* Control message is sent only to interfaces with - * interrupt_in endpoints - */ - if (port->interrupt_in_urb) { - /* send control message */ - return usb_control_msg(serial->dev, - usb_rcvctrlpipe(serial->dev, 0), - 0x22, 0x21, val, interface, - NULL, 0, USB_CTRL_SET_TIMEOUT); - } - } - - /* Otherwise the need to do non-composite mapping */ - else { - if (port->bulk_out_endpointAddress == 2) - interface = 0; - else if (port->bulk_out_endpointAddress == 4) - interface = 1; - else if (port->bulk_out_endpointAddress == 5) - interface = 2; + if (portdata->dtr_state) + val |= 0x01; + if (portdata->rts_state) + val |= 0x02; + /* If composite device then properly report interface */ + if (serial->num_ports == 1) { + interface = sierra_calc_interface(serial); + /* Control message is sent only to interfaces with + * interrupt_in endpoints + */ + if (port->interrupt_in_urb) { + /* send control message */ return usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), 0x22, 0x21, val, interface, NULL, 0, USB_CTRL_SET_TIMEOUT); - } } + /* Otherwise the need to do non-composite mapping */ + else { + if (port->bulk_out_endpointAddress == 2) + interface = 0; + else if (port->bulk_out_endpointAddress == 4) + interface = 1; + else if (port->bulk_out_endpointAddress == 5) + interface = 2; + return usb_control_msg(serial->dev, + usb_rcvctrlpipe(serial->dev, 0), + 0x22, 0x21, val, interface, + NULL, 0, USB_CTRL_SET_TIMEOUT); + } return 0; } @@ -299,7 +334,7 @@ static void sierra_set_termios(struct tty_struct *tty, { dev_dbg(&port->dev, "%s", __func__); tty_termios_copy_hw(tty->termios, old_termios); - sierra_send_setup(tty, port); + sierra_send_setup(port); } static int sierra_tiocmget(struct tty_struct *tty, struct file *file) @@ -338,7 +373,18 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file, portdata->rts_state = 0; if (clear & TIOCM_DTR) portdata->dtr_state = 0; - return sierra_send_setup(tty, port); + return sierra_send_setup(port); +} + +static void sierra_release_urb(struct urb *urb) +{ + struct usb_serial_port *port; + if (urb) { + port = urb->context; + dev_dbg(&port->dev, "%s: %p\n", __func__, urb); + kfree(urb->transfer_buffer); + usb_free_urb(urb); + } } static void sierra_outdat_callback(struct urb *urb) @@ -465,7 +511,7 @@ static void sierra_indat_callback(struct urb *urb) " received", __func__); /* Resubmit urb so we continue receiving */ - if (port->port.count && status != -ESHUTDOWN) { + if (port->port.count && status != -ESHUTDOWN && status != -EPERM) { err = usb_submit_urb(urb, GFP_ATOMIC); if (err) dev_err(&port->dev, "resubmit read urb failed." @@ -557,67 +603,99 @@ static int sierra_write_room(struct tty_struct *tty) return 2048; } -static int sierra_open(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void sierra_stop_rx_urbs(struct usb_serial_port *port) { - struct sierra_port_private *portdata; - struct usb_serial *serial = port->serial; int i; - struct urb *urb; - int result; + struct sierra_port_private *portdata = usb_get_serial_port_data(port); - portdata = usb_get_serial_port_data(port); + for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) + usb_kill_urb(portdata->in_urbs[i]); - dev_dbg(&port->dev, "%s", __func__); + usb_kill_urb(port->interrupt_in_urb); +} - /* Set some sane defaults */ - portdata->rts_state = 1; - portdata->dtr_state = 1; +static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags) +{ + int ok_cnt; + int err = -EINVAL; + int i; + struct urb *urb; + struct sierra_port_private *portdata = usb_get_serial_port_data(port); - /* Reset low level data toggle and start reading from endpoints */ - for (i = 0; i < N_IN_URB; i++) { + ok_cnt = 0; + for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) { urb = portdata->in_urbs[i]; if (!urb) continue; - if (urb->dev != serial->dev) { - dev_dbg(&port->dev, "%s: dev %p != %p", - __func__, urb->dev, serial->dev); - continue; + err = usb_submit_urb(urb, mem_flags); + if (err) { + dev_err(&port->dev, "%s: submit urb failed: %d\n", + __func__, err); + } else { + ok_cnt++; } + } - /* - * make sure endpoint data toggle is synchronized with the - * device - */ - usb_clear_halt(urb->dev, urb->pipe); - - result = usb_submit_urb(urb, GFP_KERNEL); - if (result) { - dev_err(&port->dev, "submit urb %d failed (%d) %d\n", - i, result, urb->transfer_buffer_length); + if (ok_cnt && port->interrupt_in_urb) { + err = usb_submit_urb(port->interrupt_in_urb, mem_flags); + if (err) { + dev_err(&port->dev, "%s: submit intr urb failed: %d\n", + __func__, err); } } - sierra_send_setup(tty, port); + if (ok_cnt > 0) /* at least one rx urb submitted */ + return 0; + else + return err; +} + +static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint, + int dir, void *ctx, int len, + gfp_t mem_flags, + usb_complete_t callback) +{ + struct urb *urb; + u8 *buf; + + if (endpoint == -1) + return NULL; - /* start up the interrupt endpoint if we have one */ - if (port->interrupt_in_urb) { - result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); - if (result) - dev_err(&port->dev, "submit irq_in urb failed %d\n", - result); + urb = usb_alloc_urb(0, mem_flags); + if (urb == NULL) { + dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n", + __func__, endpoint); + return NULL; } - return 0; + + buf = kmalloc(len, mem_flags); + if (buf) { + /* Fill URB using supplied data */ + usb_fill_bulk_urb(urb, serial->dev, + usb_sndbulkpipe(serial->dev, endpoint) | dir, + buf, len, callback, ctx); + + /* debug */ + dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__, + dir == USB_DIR_IN ? 'i' : 'o', urb, buf); + } else { + dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__, + dir == USB_DIR_IN ? 'i' : 'o', urb, buf); + + sierra_release_urb(urb); + urb = NULL; + } + + return urb; } -static void sierra_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void sierra_close(struct usb_serial_port *port) { int i; struct usb_serial *serial = port->serial; struct sierra_port_private *portdata; - dev_dbg(&port->dev, "%s", __func__); + dev_dbg(&port->dev, "%s\n", __func__); portdata = usb_get_serial_port_data(port); portdata->rts_state = 0; @@ -626,25 +704,83 @@ static void sierra_close(struct tty_struct *tty, if (serial->dev) { mutex_lock(&serial->disc_mutex); if (!serial->disconnected) - sierra_send_setup(tty, port); + sierra_send_setup(port); mutex_unlock(&serial->disc_mutex); - /* Stop reading/writing urbs */ - for (i = 0; i < N_IN_URB; i++) - usb_kill_urb(portdata->in_urbs[i]); + /* Stop reading urbs */ + sierra_stop_rx_urbs(port); + /* .. and release them */ + for (i = 0; i < N_IN_URB; i++) { + sierra_release_urb(portdata->in_urbs[i]); + portdata->in_urbs[i] = NULL; + } } +} - usb_kill_urb(port->interrupt_in_urb); - tty_port_tty_set(&port->port, NULL); +static int sierra_open(struct tty_struct *tty, + struct usb_serial_port *port, struct file *filp) +{ + struct sierra_port_private *portdata; + struct usb_serial *serial = port->serial; + int i; + int err; + int endpoint; + struct urb *urb; + + portdata = usb_get_serial_port_data(port); + + dev_dbg(&port->dev, "%s", __func__); + + /* Set some sane defaults */ + portdata->rts_state = 1; + portdata->dtr_state = 1; + + + endpoint = port->bulk_in_endpointAddress; + for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) { + urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port, + IN_BUFLEN, GFP_KERNEL, + sierra_indat_callback); + portdata->in_urbs[i] = urb; + } + /* clear halt condition */ + usb_clear_halt(serial->dev, + usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN); + + err = sierra_submit_rx_urbs(port, GFP_KERNEL); + if (err) { + /* get rid of everything as in close */ + sierra_close(port); + return err; + } + sierra_send_setup(port); + + return 0; +} + + +static void sierra_dtr_rts(struct usb_serial_port *port, int on) +{ + struct usb_serial *serial = port->serial; + struct sierra_port_private *portdata; + + portdata = usb_get_serial_port_data(port); + portdata->rts_state = on; + portdata->dtr_state = on; + + if (serial->dev) { + mutex_lock(&serial->disc_mutex); + if (!serial->disconnected) + sierra_send_setup(port); + mutex_unlock(&serial->disc_mutex); + } } static int sierra_startup(struct usb_serial *serial) { struct usb_serial_port *port; struct sierra_port_private *portdata; - struct urb *urb; int i; - int j; dev_dbg(&serial->dev->dev, "%s", __func__); @@ -666,34 +802,8 @@ static int sierra_startup(struct usb_serial *serial) return -ENOMEM; } spin_lock_init(&portdata->lock); - for (j = 0; j < N_IN_URB; j++) { - portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL); - if (!portdata->in_buffer[j]) { - for (--j; j >= 0; j--) - kfree(portdata->in_buffer[j]); - kfree(portdata); - return -ENOMEM; - } - } - + /* Set the port private data pointer */ usb_set_serial_port_data(port, portdata); - - /* initialize the in urbs */ - for (j = 0; j < N_IN_URB; ++j) { - urb = usb_alloc_urb(0, GFP_KERNEL); - if (urb == NULL) { - dev_dbg(&port->dev, "%s: alloc for in " - "port failed.", __func__); - continue; - } - /* Fill URB using supplied data. */ - usb_fill_bulk_urb(urb, serial->dev, - usb_rcvbulkpipe(serial->dev, - port->bulk_in_endpointAddress), - portdata->in_buffer[j], IN_BUFLEN, - sierra_indat_callback, port); - portdata->in_urbs[j] = urb; - } } return 0; @@ -701,7 +811,7 @@ static int sierra_startup(struct usb_serial *serial) static void sierra_shutdown(struct usb_serial *serial) { - int i, j; + int i; struct usb_serial_port *port; struct sierra_port_private *portdata; @@ -714,12 +824,6 @@ static void sierra_shutdown(struct usb_serial *serial) portdata = usb_get_serial_port_data(port); if (!portdata) continue; - - for (j = 0; j < N_IN_URB; j++) { - usb_kill_urb(portdata->in_urbs[j]); - usb_free_urb(portdata->in_urbs[j]); - kfree(portdata->in_buffer[j]); - } kfree(portdata); usb_set_serial_port_data(port, NULL); } @@ -737,6 +841,7 @@ static struct usb_serial_driver sierra_device = { .probe = sierra_probe, .open = sierra_open, .close = sierra_close, + .dtr_rts = sierra_dtr_rts, .write = sierra_write, .write_room = sierra_write_room, .set_termios = sierra_set_termios, diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 5e7528cc81a..8f7ed8f1399 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value, "RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret); } +static int spcp8x5_carrier_raised(struct usb_serial_port *port) +{ + struct spcp8x5_private *priv = usb_get_serial_port_data(port); + if (priv->line_status & MSR_STATUS_LINE_DCD) + return 1; + return 0; +} + +static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on) +{ + struct spcp8x5_private *priv = usb_get_serial_port_data(port); + unsigned long flags; + u8 control; + + spin_lock_irqsave(&priv->lock, flags); + if (on) + priv->line_control = MCR_CONTROL_LINE_DTR + | MCR_CONTROL_LINE_RTS; + else + priv->line_control &= ~ (MCR_CONTROL_LINE_DTR + | MCR_CONTROL_LINE_RTS); + control = priv->line_control; + spin_unlock_irqrestore(&priv->lock, flags); + spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type); +} + /* close the serial port. We should wait for data sending to device 1st and * then kill all urb. */ -static void spcp8x5_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void spcp8x5_close(struct usb_serial_port *port) { struct spcp8x5_private *priv = usb_get_serial_port_data(port); unsigned long flags; - unsigned int c_cflag; - int bps; - long timeout; - wait_queue_t wait; int result; dbg("%s - port %d", __func__, port->number); - /* wait for data to drain from the buffer */ spin_lock_irqsave(&priv->lock, flags); - timeout = SPCP8x5_CLOSING_WAIT; - init_waitqueue_entry(&wait, current); - add_wait_queue(&tty->write_wait, &wait); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (ringbuf_avail_data(priv->buf) == 0 || - timeout == 0 || signal_pending(current)) - break; - spin_unlock_irqrestore(&priv->lock, flags); - timeout = schedule_timeout(timeout); - spin_lock_irqsave(&priv->lock, flags); - } - set_current_state(TASK_RUNNING); - remove_wait_queue(&tty->write_wait, &wait); - /* clear out any remaining data in the buffer */ clear_ringbuf(priv->buf); spin_unlock_irqrestore(&priv->lock, flags); - /* wait for characters to drain from the device (this is long enough - * for the entire all byte spcp8x5 hardware buffer to drain with no - * flow control for data rates of 1200 bps or more, for lower rates we - * should really know how much data is in the buffer to compute a delay - * that is not unnecessarily long) */ - bps = tty_get_baud_rate(tty); - if (bps > 1200) - timeout = max((HZ*2560) / bps, HZ/10); - else - timeout = 2*HZ; - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(timeout); - - /* clear control lines */ - if (tty) { - c_cflag = tty->termios->c_cflag; - if (c_cflag & HUPCL) { - spin_lock_irqsave(&priv->lock, flags); - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type); - } - } - /* kill urb */ if (port->write_urb != NULL) { result = usb_unlink_urb(port->write_urb); @@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty, if (ret) return ret; - spin_lock_irqsave(&priv->lock, flags); - if (tty && (tty->termios->c_cflag & CBAUD)) - priv->line_control = MCR_DTR | MCR_RTS; - else - priv->line_control = 0; - spin_unlock_irqrestore(&priv->lock, flags); - spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type); /* Setup termios */ @@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty, port->read_urb->dev = serial->dev; ret = usb_submit_urb(port->read_urb, GFP_KERNEL); if (ret) { - spcp8x5_close(tty, port, NULL); + spcp8x5_close(port); return -EPROTO; } + port->port.drain_delay = 256; return 0; } @@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = { .num_ports = 1, .open = spcp8x5_open, .close = spcp8x5_close, + .dtr_rts = spcp8x5_dtr_rts, + .carrier_raised = spcp8x5_carrier_raised, .write = spcp8x5_write, .set_termios = spcp8x5_set_termios, .ioctl = spcp8x5_ioctl, diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index 69879e43794..8b07ebc6bae 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port, return result; } -static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp) +static void symbol_close(struct usb_serial_port *port) { struct symbol_private *priv = usb_get_serial_data(port->serial); diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 0a64bac306e..42cb04c403b 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial); static void ti_shutdown(struct usb_serial *serial); static int ti_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *file); -static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *file); +static void ti_close(struct usb_serial_port *port); static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count); static int ti_write_room(struct tty_struct *tty); @@ -647,8 +646,7 @@ release_lock: } -static void ti_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *file) +static void ti_close(struct usb_serial_port *port) { struct ti_device *tdev; struct ti_port *tport; diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index f331e2bde88..1967a7edc10 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp) goto bailout_interface_put; mutex_unlock(&serial->disc_mutex); } - mutex_unlock(&port->mutex); - return 0; + /* Now do the correct tty layer semantics */ + retval = tty_port_block_til_ready(&port->port, tty, filp); + if (retval == 0) + return 0; bailout_interface_put: usb_autopm_put_interface(serial->interface); @@ -259,64 +261,89 @@ bailout_serial_put: return retval; } -static void serial_close(struct tty_struct *tty, struct file *filp) +/** + * serial_do_down - shut down hardware + * @port: port to shut down + * + * Shut down a USB port unless it is the console. We never shut down the + * console hardware as it will always be in use. + * + * Don't free any resources at this point + */ +static void serial_do_down(struct usb_serial_port *port) { - struct usb_serial_port *port = tty->driver_data; + struct usb_serial_driver *drv = port->serial->type; struct usb_serial *serial; struct module *owner; - int count; - if (!port) + /* The console is magical, do not hang up the console hardware + or there will be tears */ + if (port->console) return; - dbg("%s - port %d", __func__, port->number); - mutex_lock(&port->mutex); serial = port->serial; owner = serial->type->driver.owner; - if (port->port.count == 0) { - mutex_unlock(&port->mutex); - return; - } - - if (port->port.count == 1) - /* only call the device specific close if this - * port is being closed by the last owner. Ensure we do - * this before we drop the port count. The call is protected - * by the port mutex - */ - serial->type->close(tty, port, filp); - - if (port->port.count == (port->console ? 2 : 1)) { - struct tty_struct *tty = tty_port_tty_get(&port->port); - if (tty) { - /* We must do this before we drop the port count to - zero. */ - if (tty->driver_data) - tty->driver_data = NULL; - tty_port_tty_set(&port->port, NULL); - tty_kref_put(tty); - } - } + if (drv->close) + drv->close(port); - --port->port.count; - count = port->port.count; mutex_unlock(&port->mutex); - put_device(&port->dev); +} + +/** + * serial_do_free - free resources post close/hangup + * @port: port to free up + * + * Do the resource freeing and refcount dropping for the port. We must + * be careful about ordering and we must avoid freeing up the console. + */ +static void serial_do_free(struct usb_serial_port *port) +{ + struct usb_serial *serial; + struct module *owner; + + /* The console is magical, do not hang up the console hardware + or there will be tears */ + if (port->console) + return; + + serial = port->serial; + owner = serial->type->driver.owner; + put_device(&port->dev); /* Mustn't dereference port any more */ - if (count == 0) { - mutex_lock(&serial->disc_mutex); - if (!serial->disconnected) - usb_autopm_put_interface(serial->interface); - mutex_unlock(&serial->disc_mutex); - } + mutex_lock(&serial->disc_mutex); + if (!serial->disconnected) + usb_autopm_put_interface(serial->interface); + mutex_unlock(&serial->disc_mutex); usb_serial_put(serial); - /* Mustn't dereference serial any more */ - if (count == 0) - module_put(owner); + module_put(owner); +} + +static void serial_close(struct tty_struct *tty, struct file *filp) +{ + struct usb_serial_port *port = tty->driver_data; + + dbg("%s - port %d", __func__, port->number); + + + if (tty_port_close_start(&port->port, tty, filp) == 0) + return; + + serial_do_down(port); + tty_port_close_end(&port->port, tty); + tty_port_tty_set(&port->port, NULL); + serial_do_free(port); +} + +static void serial_hangup(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + serial_do_down(port); + tty_port_hangup(&port->port); + serial_do_free(port); } static int serial_write(struct tty_struct *tty, const unsigned char *buf, @@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device( return NULL; } +static int serial_carrier_raised(struct tty_port *port) +{ + struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); + struct usb_serial_driver *drv = p->serial->type; + if (drv->carrier_raised) + return drv->carrier_raised(p); + /* No carrier control - don't block */ + return 1; +} + +static void serial_dtr_rts(struct tty_port *port, int on) +{ + struct usb_serial_port *p = container_of(port, struct usb_serial_port, port); + struct usb_serial_driver *drv = p->serial->type; + if (drv->dtr_rts) + drv->dtr_rts(p, on); +} + +static const struct tty_port_operations serial_port_ops = { + .carrier_raised = serial_carrier_raised, + .dtr_rts = serial_dtr_rts, +}; + int usb_serial_probe(struct usb_interface *interface, const struct usb_device_id *id) { @@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface, if (!port) goto probe_error; tty_port_init(&port->port); + port->port.ops = &serial_port_ops; port->serial = serial; spin_lock_init(&port->lock); mutex_init(&port->mutex); @@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface) if (port) { struct tty_struct *tty = tty_port_tty_get(&port->port); if (tty) { + /* The hangup will occur asynchronously but + the object refcounts will sort out all the + cleanup */ tty_hangup(tty); tty_kref_put(tty); } @@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = { .open = serial_open, .close = serial_close, .write = serial_write, + .hangup = serial_hangup, .write_room = serial_write_room, .ioctl = serial_ioctl, .set_termios = serial_set_termios, @@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = { .proc_fops = &serial_proc_fops, }; + struct tty_driver *usb_serial_tty_driver; static int __init usb_serial_init(void) diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c index 5ac414bda71..b15f1c0e1d4 100644 --- a/drivers/usb/serial/visor.c +++ b/drivers/usb/serial/visor.c @@ -38,8 +38,7 @@ /* function prototypes for a handspring visor */ static int visor_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void visor_close(struct tty_struct *tty, struct usb_serial_port *port, - struct file *filp); +static void visor_close(struct usb_serial_port *port); static int visor_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static int visor_write_room(struct tty_struct *tty); @@ -324,8 +323,7 @@ exit: } -static void visor_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void visor_close(struct usb_serial_port *port) { struct visor_private *priv = usb_get_serial_port_data(port); unsigned char *transfer_buffer; diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 5335d3211c0..7c7295d09f3 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -147,8 +147,7 @@ static int whiteheat_attach(struct usb_serial *serial); static void whiteheat_shutdown(struct usb_serial *serial); static int whiteheat_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); -static void whiteheat_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +static void whiteheat_close(struct usb_serial_port *port); static int whiteheat_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); @@ -712,8 +711,7 @@ exit: } -static void whiteheat_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp) +static void whiteheat_close(struct usb_serial_port *port) { struct whiteheat_private *info = usb_get_serial_port_data(port); struct whiteheat_urb_wrap *wrap; @@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty, dbg("%s - port %d", __func__, port->number); - mutex_lock(&port->serial->disc_mutex); - /* filp is NULL when called from usb_serial_disconnect */ - if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) { - mutex_unlock(&port->serial->disc_mutex); - return; - } - mutex_unlock(&port->serial->disc_mutex); - - tty->closing = 1; - -/* - * Not currently in use; tty_wait_until_sent() calls - * serial_chars_in_buffer() which deadlocks on the second semaphore - * acquisition. This should be fixed at some point. Greg's been - * notified. - if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) { - tty_wait_until_sent(tty, CLOSING_DELAY); - } -*/ - - tty_driver_flush_buffer(tty); - tty_ldisc_flush(tty); - firm_report_tx_done(port); - firm_close(port); /* shutdown our bulk reads and writes */ @@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty, } spin_unlock_irq(&info->lock); mutex_unlock(&info->deathwarrant); - stop_command_port(port->serial); - - tty->closing = 0; } diff --git a/fs/buffer.c b/fs/buffer.c index 49106127a4a..1864d0b6308 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2935,6 +2935,8 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(!buffer_locked(bh)); BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); + BUG_ON(buffer_delay(bh)); + BUG_ON(buffer_unwritten(bh)); /* * Mask in barrier bit for a write (could be either a WRITE or a diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f20c4069c22..b4868983942 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,12 @@ +Version 1.59 +------------ +Client uses server inode numbers (which are persistent) rather than +client generated ones by default (mount option "serverino" turned +on by default if server supports it). Add forceuid and forcegid +mount options (so that when negotiating unix extensions specifying +which uid mounted does not immediately force the server's reported +uids to be overridden). + Version 1.58 ------------ Guard against buffer overruns in various UCS-2 to UTF-8 string conversions @@ -10,6 +19,8 @@ we converted from). Fix endianness of the vcnum field used during session setup to distinguish multiple mounts to same server from different userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental flag to be set to 2, and mount must enable krb5 to turn on extended security). +Performance of file create to Samba improved (posix create on lookup +removes 1 of 2 network requests sent on file create) Version 1.57 ------------ diff --git a/fs/cifs/README b/fs/cifs/README index db208ddb989..ad92921dbde 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -262,7 +262,8 @@ A partial list of the supported mount options follows: mount. domain Set the SMB/CIFS workgroup name prepended to the username during CIFS session establishment - uid Set the default uid for inodes. For mounts to servers + forceuid Set the default uid for inodes based on the uid + passed in. For mounts to servers which do support the CIFS Unix extensions, such as a properly configured Samba server, the server provides the uid, gid and mode so this parameter should not be @@ -292,6 +293,12 @@ A partial list of the supported mount options follows: the client. Note that the mount.cifs helper must be at version 1.10 or higher to support specifying the uid (or gid) in non-numeric form. + forcegid (similar to above but for the groupid instead of uid) + uid Set the default uid for inodes, and indicate to the + cifs kernel driver which local user mounted . If the server + supports the unix extensions the default uid is + not used to fill in the owner fields of inodes (files) + unless the "forceuid" parameter is specified. gid Set the default gid for inodes (similar to above). file_mode If CIFS Unix extensions are not supported by the server this overrides the default mode for file inodes. @@ -388,8 +395,13 @@ A partial list of the supported mount options follows: or the CIFS Unix Extensions equivalent and for those this mount option will have no effect. Exporting cifs mounts under nfsd requires this mount option on the cifs mount. + This is now the default if server supports the + required network operation. noserverino Client generates inode numbers (rather than using the actual one - from the server) by default. + from the server). These inode numbers will vary after + unmount or reboot which can confuse some applications, + but not all server filesystems support unique inode + numbers. setuids If the CIFS Unix extensions are negotiated with the server the client will attempt to set the effective uid and gid of the local process on newly created files, directories, and diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 67bf93a40d2..4a4581cb2b5 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -23,6 +23,7 @@ #include <linux/string.h> #include <keys/user-type.h> #include <linux/key-type.h> +#include <linux/inet.h> #include "cifsglob.h" #include "cifs_spnego.h" #include "cifs_debug.h" @@ -73,9 +74,6 @@ struct key_type cifs_spnego_key_type = { * strlen(";sec=ntlmsspi") */ #define MAX_MECH_STR_LEN 13 -/* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/128 */ -#define MAX_IPV6_ADDR_LEN 43 - /* strlen of "host=" */ #define HOST_KEY_LEN 5 @@ -102,7 +100,7 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) host=hostname sec=mechanism uid=0xFF user=username */ desc_len = MAX_VER_STR_LEN + HOST_KEY_LEN + strlen(hostname) + - IP_KEY_LEN + MAX_IPV6_ADDR_LEN + + IP_KEY_LEN + INET6_ADDRSTRLEN + MAX_MECH_STR_LEN + UID_KEY_LEN + (sizeof(uid_t) * 2) + USER_KEY_LEN + strlen(sesInfo->userName) + 1; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 57ecdc83c26..1403b5d86a7 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -552,130 +552,138 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, return rc; } - -/* Retrieve an ACL from the server */ -static struct cifs_ntsd *get_cifs_acl(u32 *pacllen, struct inode *inode, - const char *path, const __u16 *pfid) +static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, + __u16 fid, u32 *pacllen) { - struct cifsFileInfo *open_file = NULL; - bool unlock_file = false; - int xid; - int rc = -EIO; - __u16 fid; - struct super_block *sb; - struct cifs_sb_info *cifs_sb; struct cifs_ntsd *pntsd = NULL; + int xid, rc; + + xid = GetXid(); + rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); + FreeXid(xid); - cFYI(1, ("get mode from ACL for %s", path)); - if (inode == NULL) - return NULL; + cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); + return pntsd; +} + +static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, + const char *path, u32 *pacllen) +{ + struct cifs_ntsd *pntsd = NULL; + int oplock = 0; + int xid, rc; + __u16 fid; xid = GetXid(); - if (pfid == NULL) - open_file = find_readable_file(CIFS_I(inode)); - else - fid = *pfid; - sb = inode->i_sb; - if (sb == NULL) { - FreeXid(xid); - return NULL; - } - cifs_sb = CIFS_SB(sb); - - if (open_file) { - unlock_file = true; - fid = open_file->netfid; - } else if (pfid == NULL) { - int oplock = 0; - /* open file */ - rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, - READ_CONTROL, 0, &fid, &oplock, NULL, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc != 0) { - cERROR(1, ("Unable to open file to get ACL")); - FreeXid(xid); - return NULL; - } + rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0, + &fid, &oplock, NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) { + cERROR(1, ("Unable to open file to get ACL")); + goto out; } rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); cFYI(1, ("GetCIFSACL rc = %d ACL len %d", rc, *pacllen)); - if (unlock_file == true) /* find_readable_file increments ref count */ - atomic_dec(&open_file->wrtPending); - else if (pfid == NULL) /* if opened above we have to close the handle */ - CIFSSMBClose(xid, cifs_sb->tcon, fid); - /* else handle was passed in by caller */ + CIFSSMBClose(xid, cifs_sb->tcon, fid); + out: FreeXid(xid); return pntsd; } -/* Set an ACL on the server */ -static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, - struct inode *inode, const char *path) +/* Retrieve an ACL from the server */ +static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, + struct inode *inode, const char *path, + u32 *pacllen) { - struct cifsFileInfo *open_file; - bool unlock_file = false; - int xid; - int rc = -EIO; - __u16 fid; - struct super_block *sb; - struct cifs_sb_info *cifs_sb; + struct cifs_ntsd *pntsd = NULL; + struct cifsFileInfo *open_file = NULL; - cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); + if (inode) + open_file = find_readable_file(CIFS_I(inode)); + if (!open_file) + return get_cifs_acl_by_path(cifs_sb, path, pacllen); - if (!inode) - return rc; + pntsd = get_cifs_acl_by_fid(cifs_sb, open_file->netfid, pacllen); + atomic_dec(&open_file->wrtPending); + return pntsd; +} - sb = inode->i_sb; - if (sb == NULL) - return rc; +static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, + struct cifs_ntsd *pnntsd, u32 acllen) +{ + int xid, rc; - cifs_sb = CIFS_SB(sb); xid = GetXid(); + rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); + FreeXid(xid); - open_file = find_readable_file(CIFS_I(inode)); - if (open_file) { - unlock_file = true; - fid = open_file->netfid; - } else { - int oplock = 0; - /* open file */ - rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, - WRITE_DAC, 0, &fid, &oplock, NULL, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc != 0) { - cERROR(1, ("Unable to open file to set ACL")); - FreeXid(xid); - return rc; - } + cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); + return rc; +} + +static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, + struct cifs_ntsd *pnntsd, u32 acllen) +{ + int oplock = 0; + int xid, rc; + __u16 fid; + + xid = GetXid(); + + rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0, + &fid, &oplock, NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc) { + cERROR(1, ("Unable to open file to set ACL")); + goto out; } rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); cFYI(DBG2, ("SetCIFSACL rc = %d", rc)); - if (unlock_file) - atomic_dec(&open_file->wrtPending); - else - CIFSSMBClose(xid, cifs_sb->tcon, fid); + CIFSSMBClose(xid, cifs_sb->tcon, fid); + out: FreeXid(xid); + return rc; +} +/* Set an ACL on the server */ +static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, + struct inode *inode, const char *path) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *open_file; + int rc; + + cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); + + open_file = find_readable_file(CIFS_I(inode)); + if (!open_file) + return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); + + rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); + atomic_dec(&open_file->wrtPending); return rc; } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ -void acl_to_uid_mode(struct inode *inode, const char *path, const __u16 *pfid) +void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, + const char *path, const __u16 *pfid) { struct cifs_ntsd *pntsd = NULL; u32 acllen = 0; int rc = 0; cFYI(DBG2, ("converting ACL to mode for %s", path)); - pntsd = get_cifs_acl(&acllen, inode, path, pfid); + + if (pfid) + pntsd = get_cifs_acl_by_fid(cifs_sb, *pfid, &acllen); + else + pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen); /* if we can retrieve the ACL, now parse Access Control Entries, ACEs */ if (pntsd) @@ -698,7 +706,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) cFYI(DBG2, ("set ACL from mode for %s", path)); /* Get the security descriptor */ - pntsd = get_cifs_acl(&secdesclen, inode, path, NULL); + pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); /* Add three ACEs for owner, group, everyone getting rid of other ACEs as chmod disables ACEs and set the security descriptor */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 5e6d35804d7..0a10a59b639 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -146,7 +146,7 @@ cifs_read_super(struct super_block *sb, void *data, #endif sb->s_blocksize = CIFS_MAX_MSGSIZE; sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ - inode = cifs_iget(sb, ROOT_I); + inode = cifs_root_iget(sb, ROOT_I); if (IS_ERR(inode)) { rc = PTR_ERR(inode); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 051b71cfdea..9570a0e8023 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -36,7 +36,7 @@ extern void cifs_read_inode(struct inode *); /* Functions related to inodes */ extern const struct inode_operations cifs_dir_inode_ops; -extern struct inode *cifs_iget(struct super_block *, unsigned long); +extern struct inode *cifs_root_iget(struct super_block *, unsigned long); extern int cifs_create(struct inode *, struct dentry *, int, struct nameidata *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, @@ -100,5 +100,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.58" +#define CIFS_VERSION "1.59" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index fae083930ee..f9452329bcc 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -90,10 +90,10 @@ extern struct oplock_q_entry *AllocOplockQEntry(struct inode *, u16, struct cifsTconInfo *); extern void DeleteOplockQEntry(struct oplock_q_entry *); extern void DeleteTconOplockQEntries(struct cifsTconInfo *); -extern struct timespec cifs_NTtimeToUnix(u64 utc_nanoseconds_since_1601); +extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); -extern __le64 cnvrtDosCifsTm(__u16 date, __u16 time); -extern struct timespec cnvrtDosUnixTm(__u16 date, __u16 time); +extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, + int offset); extern int cifs_posix_open(char *full_path, struct inode **pinode, struct super_block *sb, int mode, int oflags, @@ -108,8 +108,8 @@ extern int cifs_get_inode_info(struct inode **pinode, extern int cifs_get_inode_info_unix(struct inode **pinode, const unsigned char *search_path, struct super_block *sb, int xid); -extern void acl_to_uid_mode(struct inode *inode, const char *path, - const __u16 *pfid); +extern void acl_to_uid_mode(struct cifs_sb_info *cifs_sb, struct inode *inode, + const char *path, const __u16 *pfid); extern int mode_to_acl(struct inode *inode, const char *path, __u64); extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index d06260251c3..b84c61d5bca 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -524,8 +524,8 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) int val, seconds, remain, result; struct timespec ts, utc; utc = CURRENT_TIME; - ts = cnvrtDosUnixTm(le16_to_cpu(rsp->SrvTime.Date), - le16_to_cpu(rsp->SrvTime.Time)); + ts = cnvrtDosUnixTm(rsp->SrvTime.Date, + rsp->SrvTime.Time, 0); cFYI(1, ("SrvTime %d sec since 1970 (utc: %d) diff: %d", (int)ts.tv_sec, (int)utc.tv_sec, (int)(utc.tv_sec - ts.tv_sec))); @@ -2427,8 +2427,7 @@ querySymLinkRetry: params = 2 /* level */ + 4 /* rsrvd */ + name_len /* incl null */ ; pSMB->TotalDataCount = 0; pSMB->MaxParameterCount = cpu_to_le16(2); - /* BB find exact max data count below from sess structure BB */ - pSMB->MaxDataCount = cpu_to_le16(4000); + pSMB->MaxDataCount = cpu_to_le16(CIFSMaxBufSize); pSMB->MaxSetupCount = 0; pSMB->Reserved = 0; pSMB->Flags = 0; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4aa81a507b7..97f4311b9a8 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -35,6 +35,7 @@ #include <linux/namei.h> #include <asm/uaccess.h> #include <asm/processor.h> +#include <linux/inet.h> #include <net/ipv6.h> #include "cifspdu.h" #include "cifsglob.h" @@ -61,7 +62,6 @@ struct smb_vol { char *domainname; char *UNC; char *UNCip; - char *in6_addr; /* ipv6 address as human readable form of in6_addr */ char *iocharset; /* local code page for mapping to and from Unicode */ char source_rfc1001_name[16]; /* netbios name of client */ char target_rfc1001_name[16]; /* netbios name of server for Win9x/ME */ @@ -827,14 +827,16 @@ cifs_parse_mount_options(char *options, const char *devname, vol->target_rfc1001_name[0] = 0; vol->linux_uid = current_uid(); /* use current_euid() instead? */ vol->linux_gid = current_gid(); - vol->dir_mode = S_IRWXUGO; - /* 2767 perms indicate mandatory locking support */ - vol->file_mode = (S_IRWXUGO | S_ISGID) & (~S_IXGRP); + + /* default to only allowing write access to owner of the mount */ + vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR; /* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */ vol->rw = true; /* default is always to request posix paths. */ vol->posix_paths = 1; + /* default to using server inode numbers where available */ + vol->server_ino = 1; if (!options) return 1; @@ -955,10 +957,12 @@ cifs_parse_mount_options(char *options, const char *devname, } strcpy(vol->password, value); } - } else if (strnicmp(data, "ip", 2) == 0) { + } else if (!strnicmp(data, "ip", 2) || + !strnicmp(data, "addr", 4)) { if (!value || !*value) { vol->UNCip = NULL; - } else if (strnlen(value, 35) < 35) { + } else if (strnlen(value, INET6_ADDRSTRLEN) < + INET6_ADDRSTRLEN) { vol->UNCip = value; } else { printk(KERN_WARNING "CIFS: ip address " @@ -1092,17 +1096,17 @@ cifs_parse_mount_options(char *options, const char *devname, return 1; } } else if (strnicmp(data, "uid", 3) == 0) { - if (value && *value) { + if (value && *value) vol->linux_uid = simple_strtoul(value, &value, 0); + } else if (strnicmp(data, "forceuid", 8) == 0) { vol->override_uid = 1; - } } else if (strnicmp(data, "gid", 3) == 0) { - if (value && *value) { + if (value && *value) vol->linux_gid = simple_strtoul(value, &value, 0); + } else if (strnicmp(data, "forcegid", 8) == 0) { vol->override_gid = 1; - } } else if (strnicmp(data, "file_mode", 4) == 0) { if (value && *value) { vol->file_mode = @@ -1315,16 +1319,6 @@ cifs_parse_mount_options(char *options, const char *devname, vol->direct_io = 1; } else if (strnicmp(data, "forcedirectio", 13) == 0) { vol->direct_io = 1; - } else if (strnicmp(data, "in6_addr", 8) == 0) { - if (!value || !*value) { - vol->in6_addr = NULL; - } else if (strnlen(value, 49) == 48) { - vol->in6_addr = value; - } else { - printk(KERN_WARNING "CIFS: ip v6 address not " - "48 characters long\n"); - return 1; - } } else if (strnicmp(data, "noac", 4) == 0) { printk(KERN_WARNING "CIFS: Mount option noac not " "supported. Instead set " diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 302ea15f02e..06866841b97 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -241,7 +241,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, /* BB need same check in cifs_create too? */ /* if not oplocked, invalidate inode pages if mtime or file size changed */ - temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime)); + temp = cifs_NTtimeToUnix(buf->LastWriteTime); if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && (file->f_path.dentry->d_inode->i_size == (loff_t)le64_to_cpu(buf->EndOfFile))) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9c869a6dcba..fad882b075b 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -85,10 +85,10 @@ static void cifs_unix_info_to_inode(struct inode *inode, __u64 num_of_bytes = le64_to_cpu(info->NumOfBytes); __u64 end_of_file = le64_to_cpu(info->EndOfFile); - inode->i_atime = cifs_NTtimeToUnix(le64_to_cpu(info->LastAccessTime)); + inode->i_atime = cifs_NTtimeToUnix(info->LastAccessTime); inode->i_mtime = - cifs_NTtimeToUnix(le64_to_cpu(info->LastModificationTime)); - inode->i_ctime = cifs_NTtimeToUnix(le64_to_cpu(info->LastStatusChange)); + cifs_NTtimeToUnix(info->LastModificationTime); + inode->i_ctime = cifs_NTtimeToUnix(info->LastStatusChange); inode->i_mode = le64_to_cpu(info->Permissions); /* @@ -554,14 +554,11 @@ int cifs_get_inode_info(struct inode **pinode, /* Linux can not store file creation time so ignore it */ if (pfindData->LastAccessTime) - inode->i_atime = cifs_NTtimeToUnix - (le64_to_cpu(pfindData->LastAccessTime)); + inode->i_atime = cifs_NTtimeToUnix(pfindData->LastAccessTime); else /* do not need to use current_fs_time - time not stored */ inode->i_atime = CURRENT_TIME; - inode->i_mtime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); - inode->i_ctime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); + inode->i_mtime = cifs_NTtimeToUnix(pfindData->LastWriteTime); + inode->i_ctime = cifs_NTtimeToUnix(pfindData->ChangeTime); cFYI(DBG2, ("Attributes came in as 0x%x", attr)); if (adjustTZ && (pTcon->ses) && (pTcon->ses->server)) { inode->i_ctime.tv_sec += pTcon->ses->server->timeAdj; @@ -629,7 +626,7 @@ int cifs_get_inode_info(struct inode **pinode, /* fill in 0777 bits from ACL */ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) { cFYI(1, ("Getting mode bits from ACL")); - acl_to_uid_mode(inode, full_path, pfid); + acl_to_uid_mode(cifs_sb, inode, full_path, pfid); } #endif if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) { @@ -699,7 +696,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) } /* gets root inode */ -struct inode *cifs_iget(struct super_block *sb, unsigned long ino) +struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) { int xid; struct cifs_sb_info *cifs_sb; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index e2fe998989a..32d6baa0a54 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -853,12 +853,12 @@ smbCalcSize_LE(struct smb_hdr *ptr) #define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000) - /* - * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units) - * into Unix UTC (based 1970-01-01, in seconds). - */ +/* + * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units) + * into Unix UTC (based 1970-01-01, in seconds). + */ struct timespec -cifs_NTtimeToUnix(u64 ntutc) +cifs_NTtimeToUnix(__le64 ntutc) { struct timespec ts; /* BB what about the timezone? BB */ @@ -866,7 +866,7 @@ cifs_NTtimeToUnix(u64 ntutc) /* Subtract the NTFS time offset, then convert to 1s intervals. */ u64 t; - t = ntutc - NTFS_TIME_OFFSET; + t = le64_to_cpu(ntutc) - NTFS_TIME_OFFSET; ts.tv_nsec = do_div(t, 10000000) * 100; ts.tv_sec = t; return ts; @@ -883,16 +883,12 @@ cifs_UnixTimeToNT(struct timespec t) static int total_days_of_prev_months[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}; - -__le64 cnvrtDosCifsTm(__u16 date, __u16 time) -{ - return cpu_to_le64(cifs_UnixTimeToNT(cnvrtDosUnixTm(date, time))); -} - -struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) +struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) { struct timespec ts; int sec, min, days, month, year; + u16 date = le16_to_cpu(le_date); + u16 time = le16_to_cpu(le_time); SMB_TIME *st = (SMB_TIME *)&time; SMB_DATE *sd = (SMB_DATE *)&date; @@ -933,7 +929,7 @@ struct timespec cnvrtDosUnixTm(__u16 date, __u16 time) days -= ((year & 0x03) == 0) && (month < 2 ? 1 : 0); sec += 24 * 60 * 60 * days; - ts.tv_sec = sec; + ts.tv_sec = sec + offset; /* cFYI(1,("sec after cnvrt dos to unix time %d",sec)); */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 964e097c820..86d0055dc52 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -115,17 +115,6 @@ construct_dentry(struct qstr *qstring, struct file *file, return rc; } -static void AdjustForTZ(struct cifsTconInfo *tcon, struct inode *inode) -{ - if ((tcon) && (tcon->ses) && (tcon->ses->server)) { - inode->i_ctime.tv_sec += tcon->ses->server->timeAdj; - inode->i_mtime.tv_sec += tcon->ses->server->timeAdj; - inode->i_atime.tv_sec += tcon->ses->server->timeAdj; - } - return; -} - - static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, char *buf, unsigned int *pobject_type, int isNewInode) { @@ -150,26 +139,25 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type, allocation_size = le64_to_cpu(pfindData->AllocationSize); end_of_file = le64_to_cpu(pfindData->EndOfFile); tmp_inode->i_atime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); + cifs_NTtimeToUnix(pfindData->LastAccessTime); tmp_inode->i_mtime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastWriteTime)); + cifs_NTtimeToUnix(pfindData->LastWriteTime); tmp_inode->i_ctime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->ChangeTime)); + cifs_NTtimeToUnix(pfindData->ChangeTime); } else { /* legacy, OS2 and DOS style */ -/* struct timespec ts;*/ + int offset = cifs_sb->tcon->ses->server->timeAdj; FIND_FILE_STANDARD_INFO *pfindData = (FIND_FILE_STANDARD_INFO *)buf; - tmp_inode->i_mtime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastWriteDate), - le16_to_cpu(pfindData->LastWriteTime)); - tmp_inode->i_atime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastAccessDate), - le16_to_cpu(pfindData->LastAccessTime)); - tmp_inode->i_ctime = cnvrtDosUnixTm( - le16_to_cpu(pfindData->LastWriteDate), - le16_to_cpu(pfindData->LastWriteTime)); - AdjustForTZ(cifs_sb->tcon, tmp_inode); + tmp_inode->i_mtime = cnvrtDosUnixTm(pfindData->LastWriteDate, + pfindData->LastWriteTime, + offset); + tmp_inode->i_atime = cnvrtDosUnixTm(pfindData->LastAccessDate, + pfindData->LastAccessTime, + offset); + tmp_inode->i_ctime = cnvrtDosUnixTm(pfindData->LastWriteDate, + pfindData->LastWriteTime, + offset); attr = le16_to_cpu(pfindData->Attributes); allocation_size = le32_to_cpu(pfindData->AllocationSize); end_of_file = le32_to_cpu(pfindData->DataSize); @@ -331,11 +319,11 @@ static void unix_fill_in_inode(struct inode *tmp_inode, local_size = tmp_inode->i_size; tmp_inode->i_atime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastAccessTime)); + cifs_NTtimeToUnix(pfindData->LastAccessTime); tmp_inode->i_mtime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastModificationTime)); + cifs_NTtimeToUnix(pfindData->LastModificationTime); tmp_inode->i_ctime = - cifs_NTtimeToUnix(le64_to_cpu(pfindData->LastStatusChange)); + cifs_NTtimeToUnix(pfindData->LastStatusChange); tmp_inode->i_mode = le64_to_cpu(pfindData->Permissions); /* since we set the inode type below we need to mask off type diff --git a/fs/compat.c b/fs/compat.c index 681ed81e6be..bb2a9b2e817 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1488,7 +1488,7 @@ int compat_do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + retval = mutex_lock_interruptible(¤t->cred_guard_mutex); if (retval < 0) goto out_free; current->in_execve = 1; @@ -1550,7 +1550,7 @@ int compat_do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1573,7 +1573,7 @@ out_unmark: out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); out_free: free_bprm(bprm); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index c68edb96944..9b1d285f9fe 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -557,8 +557,10 @@ static int __init init_devpts_fs(void) int err = register_filesystem(&devpts_fs_type); if (!err) { devpts_mnt = kern_mount(&devpts_fs_type); - if (IS_ERR(devpts_mnt)) + if (IS_ERR(devpts_mnt)) { err = PTR_ERR(devpts_mnt); + unregister_filesystem(&devpts_fs_type); + } } return err; } diff --git a/fs/exec.c b/fs/exec.c index 895823d0149..a7fcd975c6b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1016,7 +1016,7 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; - /* cred_exec_mutex must be held at least to this point to prevent + /* cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's * credentials; any time after this it may be unlocked */ @@ -1026,7 +1026,7 @@ EXPORT_SYMBOL(install_exec_creds); /* * determine how safe it is to execute the proposed program - * - the caller must hold current->cred_exec_mutex to protect against + * - the caller must hold current->cred_guard_mutex to protect against * PTRACE_ATTACH */ int check_unsafe_exec(struct linux_binprm *bprm) @@ -1268,7 +1268,7 @@ int do_execve(char * filename, if (!bprm) goto out_files; - retval = mutex_lock_interruptible(¤t->cred_exec_mutex); + retval = mutex_lock_interruptible(¤t->cred_guard_mutex); if (retval < 0) goto out_free; current->in_execve = 1; @@ -1331,7 +1331,7 @@ int do_execve(char * filename, /* execve succeeded */ current->fs->in_exec = 0; current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); acct_update_integrals(current); free_bprm(bprm); if (displaced) @@ -1354,7 +1354,7 @@ out_unmark: out_unlock: current->in_execve = 0; - mutex_unlock(¤t->cred_exec_mutex); + mutex_unlock(¤t->cred_guard_mutex); out_free: free_bprm(bprm); diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 5c4afe65224..e3c748faf2d 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1093,6 +1093,7 @@ failed_mount: brelse(bh); failed_sbi: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); return ret; } diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 599dbfe504c..d8b73d4abe3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2021,6 +2021,7 @@ failed_mount: brelse(bh); out_fail: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); lock_kernel(); return ret; diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index a8ff003a00f..8a34710ecf4 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -5,8 +5,8 @@ obj-$(CONFIG_EXT4_FS) += ext4.o ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ - ext4_jbd2.o migrate.o mballoc.o + ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ + ext4_jbd2.o migrate.o mballoc.o block_validity.o ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 53c72ad8587..e2126d70dff 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -19,7 +19,6 @@ #include <linux/buffer_head.h> #include "ext4.h" #include "ext4_jbd2.h" -#include "group.h" #include "mballoc.h" /* @@ -88,6 +87,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ext4_group_t block_group, struct ext4_group_desc *gdp) { int bit, bit_max; + ext4_group_t ngroups = ext4_get_groups_count(sb); unsigned free_blocks, group_blocks; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -123,7 +123,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, bit_max += ext4_bg_num_gdb(sb, block_group); } - if (block_group == sbi->s_groups_count - 1) { + if (block_group == ngroups - 1) { /* * Even though mke2fs always initialize first and last group * if some other tool enabled the EXT4_BG_BLOCK_UNINIT we need @@ -131,7 +131,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ group_blocks = ext4_blocks_count(sbi->s_es) - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); + (EXT4_BLOCKS_PER_GROUP(sb) * (ngroups - 1)); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } @@ -205,18 +205,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, { unsigned int group_desc; unsigned int offset; + ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (block_group >= sbi->s_groups_count) { + if (block_group >= ngroups) { ext4_error(sb, "ext4_get_group_desc", "block_group >= groups_count - " "block_group = %u, groups_count = %u", - block_group, sbi->s_groups_count); + block_group, ngroups); return NULL; } - smp_rmb(); group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); @@ -326,16 +326,16 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) unlock_buffer(bh); return bh; } - spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); unlock_buffer(bh); return bh; } - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); if (buffer_uptodate(bh)) { /* * if not uninit if bh is uptodate, @@ -451,7 +451,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, down_write(&grp->alloc_sem); for (i = 0, blocks_freed = 0; i < count; i++) { BUFFER_TRACE(bitmap_bh, "clear bit"); - if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), + if (!ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), bit + i, bitmap_bh->b_data)) { ext4_error(sb, __func__, "bit already cleared for block %llu", @@ -461,11 +461,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, blocks_freed++; } } - spin_lock(sb_bgl_lock(sbi, block_group)); + ext4_lock_group(sb, block_group); blk_free_count = blocks_freed + ext4_free_blks_count(sb, desc); ext4_free_blks_set(sb, desc, blk_free_count); desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc); - spin_unlock(sb_bgl_lock(sbi, block_group)); + ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeblocks_counter, blocks_freed); if (sbi->s_log_groups_per_flex) { @@ -665,7 +665,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) ext4_fsblk_t desc_count; struct ext4_group_desc *gdp; ext4_group_t i; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); #ifdef EXT4FS_DEBUG struct ext4_super_block *es; ext4_fsblk_t bitmap_count; @@ -677,7 +677,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) bitmap_count = 0; gdp = NULL; - smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) @@ -700,7 +699,6 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) return bitmap_count; #else desc_count = 0; - smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c new file mode 100644 index 00000000000..50784ef0756 --- /dev/null +++ b/fs/ext4/block_validity.c @@ -0,0 +1,244 @@ +/* + * linux/fs/ext4/block_validity.c + * + * Copyright (C) 2009 + * Theodore Ts'o (tytso@mit.edu) + * + * Track which blocks in the filesystem are metadata blocks that + * should never be used as data blocks by files or directories. + */ + +#include <linux/time.h> +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/quotaops.h> +#include <linux/buffer_head.h> +#include <linux/module.h> +#include <linux/swap.h> +#include <linux/pagemap.h> +#include <linux/version.h> +#include <linux/blkdev.h> +#include <linux/mutex.h> +#include "ext4.h" + +struct ext4_system_zone { + struct rb_node node; + ext4_fsblk_t start_blk; + unsigned int count; +}; + +static struct kmem_cache *ext4_system_zone_cachep; + +int __init init_ext4_system_zone(void) +{ + ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, + SLAB_RECLAIM_ACCOUNT); + if (ext4_system_zone_cachep == NULL) + return -ENOMEM; + return 0; +} + +void exit_ext4_system_zone(void) +{ + kmem_cache_destroy(ext4_system_zone_cachep); +} + +static inline int can_merge(struct ext4_system_zone *entry1, + struct ext4_system_zone *entry2) +{ + if ((entry1->start_blk + entry1->count) == entry2->start_blk) + return 1; + return 0; +} + +/* + * Mark a range of blocks as belonging to the "system zone" --- that + * is, filesystem metadata blocks which should never be used by + * inodes. + */ +static int add_system_zone(struct ext4_sb_info *sbi, + ext4_fsblk_t start_blk, + unsigned int count) +{ + struct ext4_system_zone *new_entry = NULL, *entry; + struct rb_node **n = &sbi->system_blks.rb_node, *node; + struct rb_node *parent = NULL, *new_node = NULL; + + while (*n) { + parent = *n; + entry = rb_entry(parent, struct ext4_system_zone, node); + if (start_blk < entry->start_blk) + n = &(*n)->rb_left; + else if (start_blk >= (entry->start_blk + entry->count)) + n = &(*n)->rb_right; + else { + if (start_blk + count > (entry->start_blk + + entry->count)) + entry->count = (start_blk + count - + entry->start_blk); + new_node = *n; + new_entry = rb_entry(new_node, struct ext4_system_zone, + node); + break; + } + } + + if (!new_entry) { + new_entry = kmem_cache_alloc(ext4_system_zone_cachep, + GFP_KERNEL); + if (!new_entry) + return -ENOMEM; + new_entry->start_blk = start_blk; + new_entry->count = count; + new_node = &new_entry->node; + + rb_link_node(new_node, parent, n); + rb_insert_color(new_node, &sbi->system_blks); + } + + /* Can we merge to the left? */ + node = rb_prev(new_node); + if (node) { + entry = rb_entry(node, struct ext4_system_zone, node); + if (can_merge(entry, new_entry)) { + new_entry->start_blk = entry->start_blk; + new_entry->count += entry->count; + rb_erase(node, &sbi->system_blks); + kmem_cache_free(ext4_system_zone_cachep, entry); + } + } + + /* Can we merge to the right? */ + node = rb_next(new_node); + if (node) { + entry = rb_entry(node, struct ext4_system_zone, node); + if (can_merge(new_entry, entry)) { + new_entry->count += entry->count; + rb_erase(node, &sbi->system_blks); + kmem_cache_free(ext4_system_zone_cachep, entry); + } + } + return 0; +} + +static void debug_print_tree(struct ext4_sb_info *sbi) +{ + struct rb_node *node; + struct ext4_system_zone *entry; + int first = 1; + + printk(KERN_INFO "System zones: "); + node = rb_first(&sbi->system_blks); + while (node) { + entry = rb_entry(node, struct ext4_system_zone, node); + printk("%s%llu-%llu", first ? "" : ", ", + entry->start_blk, entry->start_blk + entry->count - 1); + first = 0; + node = rb_next(node); + } + printk("\n"); +} + +int ext4_setup_system_zone(struct super_block *sb) +{ + ext4_group_t ngroups = ext4_get_groups_count(sb); + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_group_desc *gdp; + ext4_group_t i; + int flex_size = ext4_flex_bg_size(sbi); + int ret; + + if (!test_opt(sb, BLOCK_VALIDITY)) { + if (EXT4_SB(sb)->system_blks.rb_node) + ext4_release_system_zone(sb); + return 0; + } + if (EXT4_SB(sb)->system_blks.rb_node) + return 0; + + for (i=0; i < ngroups; i++) { + if (ext4_bg_has_super(sb, i) && + ((i < 5) || ((i % flex_size) == 0))) + add_system_zone(sbi, ext4_group_first_block_no(sb, i), + sbi->s_gdb_count + 1); + gdp = ext4_get_group_desc(sb, i, NULL); + ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); + if (ret) + return ret; + ret = add_system_zone(sbi, ext4_inode_bitmap(sb, gdp), 1); + if (ret) + return ret; + ret = add_system_zone(sbi, ext4_inode_table(sb, gdp), + sbi->s_itb_per_group); + if (ret) + return ret; + } + + if (test_opt(sb, DEBUG)) + debug_print_tree(EXT4_SB(sb)); + return 0; +} + +/* Called when the filesystem is unmounted */ +void ext4_release_system_zone(struct super_block *sb) +{ + struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node; + struct rb_node *parent; + struct ext4_system_zone *entry; + + while (n) { + /* Do the node's children first */ + if (n->rb_left) { + n = n->rb_left; + continue; + } + if (n->rb_right) { + n = n->rb_right; + continue; + } + /* + * The node has no children; free it, and then zero + * out parent's link to it. Finally go to the + * beginning of the loop and try to free the parent + * node. + */ + parent = rb_parent(n); + entry = rb_entry(n, struct ext4_system_zone, node); + kmem_cache_free(ext4_system_zone_cachep, entry); + if (!parent) + EXT4_SB(sb)->system_blks.rb_node = NULL; + else if (parent->rb_left == n) + parent->rb_left = NULL; + else if (parent->rb_right == n) + parent->rb_right = NULL; + n = parent; + } + EXT4_SB(sb)->system_blks.rb_node = NULL; +} + +/* + * Returns 1 if the passed-in block region (start_blk, + * start_blk+count) is valid; 0 if some part of the block region + * overlaps with filesystem metadata blocks. + */ +int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, + unsigned int count) +{ + struct ext4_system_zone *entry; + struct rb_node *n = sbi->system_blks.rb_node; + + if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (start_blk + count > ext4_blocks_count(sbi->s_es))) + return 0; + while (n) { + entry = rb_entry(n, struct ext4_system_zone, node); + if (start_blk + count - 1 < entry->start_blk) + n = n->rb_left; + else if (start_blk >= (entry->start_blk + entry->count)) + n = n->rb_right; + else + return 0; + } + return 1; +} + diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index b64789929a6..9dc93168e26 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -131,8 +131,7 @@ static int ext4_readdir(struct file *filp, struct buffer_head *bh = NULL; map_bh.b_state = 0; - err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, - 0, 0, 0); + err = ext4_get_blocks(NULL, inode, blk, 1, &map_bh, 0); if (err > 0) { pgoff_t index = map_bh.b_blocknr >> (PAGE_CACHE_SHIFT - inode->i_blkbits); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d0f15ef56de..cc7d5edc38c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -21,7 +21,14 @@ #include <linux/magic.h> #include <linux/jbd2.h> #include <linux/quota.h> -#include "ext4_i.h" +#include <linux/rwsem.h> +#include <linux/rbtree.h> +#include <linux/seqlock.h> +#include <linux/mutex.h> +#include <linux/timer.h> +#include <linux/wait.h> +#include <linux/blockgroup_lock.h> +#include <linux/percpu_counter.h> /* * The fourth extended filesystem constants/structures @@ -46,6 +53,19 @@ #define ext4_debug(f, a...) do {} while (0) #endif +/* data type for block offset of block group */ +typedef int ext4_grpblk_t; + +/* data type for filesystem-wide blocks number */ +typedef unsigned long long ext4_fsblk_t; + +/* data type for file logical block number */ +typedef __u32 ext4_lblk_t; + +/* data type for block group number */ +typedef unsigned int ext4_group_t; + + /* prefer goal again. length */ #define EXT4_MB_HINT_MERGE 1 /* blocks already reserved */ @@ -179,9 +199,6 @@ struct flex_groups { #define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ #define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ -#ifdef __KERNEL__ -#include "ext4_sb.h" -#endif /* * Macro-instructions used to manage group descriptors */ @@ -297,10 +314,23 @@ struct ext4_new_group_data { }; /* - * Following is used by preallocation code to tell get_blocks() that we - * want uninitialzed extents. + * Flags used by ext4_get_blocks() */ -#define EXT4_CREATE_UNINITIALIZED_EXT 2 + /* Allocate any needed blocks and/or convert an unitialized + extent to be an initialized ext4 */ +#define EXT4_GET_BLOCKS_CREATE 0x0001 + /* Request the creation of an unitialized extent */ +#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002 +#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ + EXT4_GET_BLOCKS_CREATE) + /* Caller is from the delayed allocation writeout path, + so set the magic i_delalloc_reserve_flag after taking the + inode allocation semaphore for */ +#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 + /* Call ext4_da_update_reserve_space() after successfully + allocating the blocks */ +#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008 + /* * ioctl commands @@ -516,6 +546,110 @@ do { \ #endif /* defined(__KERNEL__) || defined(__linux__) */ /* + * storage for cached extent + */ +struct ext4_ext_cache { + ext4_fsblk_t ec_start; + ext4_lblk_t ec_block; + __u32 ec_len; /* must be 32bit to return holes */ + __u32 ec_type; +}; + +/* + * fourth extended file system inode data in memory + */ +struct ext4_inode_info { + __le32 i_data[15]; /* unconverted */ + __u32 i_flags; + ext4_fsblk_t i_file_acl; + __u32 i_dtime; + + /* + * i_block_group is the number of the block group which contains + * this file's inode. Constant across the lifetime of the inode, + * it is ued for making block allocation decisions - we try to + * place a file's data blocks near its inode block, and new inodes + * near to their parent directory's inode. + */ + ext4_group_t i_block_group; + __u32 i_state; /* Dynamic state flags for ext4 */ + + ext4_lblk_t i_dir_start_lookup; +#ifdef CONFIG_EXT4_FS_XATTR + /* + * Extended attributes can be read independently of the main file + * data. Taking i_mutex even when reading would cause contention + * between readers of EAs and writers of regular file data, so + * instead we synchronize on xattr_sem when reading or changing + * EAs. + */ + struct rw_semaphore xattr_sem; +#endif +#ifdef CONFIG_EXT4_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; +#endif + + struct list_head i_orphan; /* unlinked but open inodes */ + + /* + * i_disksize keeps track of what the inode size is ON DISK, not + * in memory. During truncate, i_size is set to the new size by + * the VFS prior to calling ext4_truncate(), but the filesystem won't + * set i_disksize to 0 until the truncate is actually under way. + * + * The intent is that i_disksize always represents the blocks which + * are used by this file. This allows recovery to restart truncate + * on orphans if we crash during truncate. We actually write i_disksize + * into the on-disk inode when writing inodes out, instead of i_size. + * + * The only time when i_disksize and i_size may be different is when + * a truncate is in progress. The only things which change i_disksize + * are ext4_get_block (growth) and ext4_truncate (shrinkth). + */ + loff_t i_disksize; + + /* + * i_data_sem is for serialising ext4_truncate() against + * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's + * data tree are chopped off during truncate. We can't do that in + * ext4 because whenever we perform intermediate commits during + * truncate, the inode and all the metadata blocks *must* be in a + * consistent state which allows truncation of the orphans to restart + * during recovery. Hence we must fix the get_block-vs-truncate race + * by other means, so we have i_data_sem. + */ + struct rw_semaphore i_data_sem; + struct inode vfs_inode; + struct jbd2_inode jinode; + + struct ext4_ext_cache i_cached_extent; + /* + * File creation time. Its function is same as that of + * struct timespec i_{a,c,m}time in the generic inode. + */ + struct timespec i_crtime; + + /* mballoc */ + struct list_head i_prealloc_list; + spinlock_t i_prealloc_lock; + + /* ialloc */ + ext4_group_t i_last_alloc_group; + + /* allocation reservation info for delalloc */ + unsigned int i_reserved_data_blocks; + unsigned int i_reserved_meta_blocks; + unsigned int i_allocated_meta_blocks; + unsigned short i_delalloc_reserved_flag; + + /* on-disk additional length */ + __u16 i_extra_isize; + + spinlock_t i_block_reservation_lock; +}; + +/* * File system states */ #define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */ @@ -560,6 +694,7 @@ do { \ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ +#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -689,6 +824,137 @@ struct ext4_super_block { }; #ifdef __KERNEL__ +/* + * fourth extended-fs super-block data in memory + */ +struct ext4_sb_info { + unsigned long s_desc_size; /* Size of a group descriptor in bytes */ + unsigned long s_inodes_per_block;/* Number of inodes per block */ + unsigned long s_blocks_per_group;/* Number of blocks in a group */ + unsigned long s_inodes_per_group;/* Number of inodes in a group */ + unsigned long s_itb_per_group; /* Number of inode table blocks per group */ + unsigned long s_gdb_count; /* Number of group descriptor blocks */ + unsigned long s_desc_per_block; /* Number of group descriptors per block */ + ext4_group_t s_groups_count; /* Number of groups in the fs */ + unsigned long s_overhead_last; /* Last calculated overhead */ + unsigned long s_blocks_last; /* Last seen block count */ + loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ + struct buffer_head * s_sbh; /* Buffer containing the super block */ + struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ + struct buffer_head **s_group_desc; + unsigned long s_mount_opt; + ext4_fsblk_t s_sb_block; + uid_t s_resuid; + gid_t s_resgid; + unsigned short s_mount_state; + unsigned short s_pad; + int s_addr_per_block_bits; + int s_desc_per_block_bits; + int s_inode_size; + int s_first_ino; + unsigned int s_inode_readahead_blks; + spinlock_t s_next_gen_lock; + u32 s_next_generation; + u32 s_hash_seed[4]; + int s_def_hash_version; + int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ + struct percpu_counter s_freeblocks_counter; + struct percpu_counter s_freeinodes_counter; + struct percpu_counter s_dirs_counter; + struct percpu_counter s_dirtyblocks_counter; + struct blockgroup_lock *s_blockgroup_lock; + struct proc_dir_entry *s_proc; + struct kobject s_kobj; + struct completion s_kobj_unregister; + + /* Journaling */ + struct inode *s_journal_inode; + struct journal_s *s_journal; + struct list_head s_orphan; + struct mutex s_orphan_lock; + struct mutex s_resize_lock; + unsigned long s_commit_interval; + u32 s_max_batch_time; + u32 s_min_batch_time; + struct block_device *journal_bdev; +#ifdef CONFIG_JBD2_DEBUG + struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ + wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ +#endif +#ifdef CONFIG_QUOTA + char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ + int s_jquota_fmt; /* Format of quota to use */ +#endif + unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + struct rb_root system_blks; + +#ifdef EXTENTS_STATS + /* ext4 extents stats */ + unsigned long s_ext_min; + unsigned long s_ext_max; + unsigned long s_depth_max; + spinlock_t s_ext_stats_lock; + unsigned long s_ext_blocks; + unsigned long s_ext_extents; +#endif + + /* for buddy allocator */ + struct ext4_group_info ***s_group_info; + struct inode *s_buddy_cache; + long s_blocks_reserved; + spinlock_t s_reserve_lock; + spinlock_t s_md_lock; + tid_t s_last_transaction; + unsigned short *s_mb_offsets; + unsigned int *s_mb_maxs; + + /* tunables */ + unsigned long s_stripe; + unsigned int s_mb_stream_request; + unsigned int s_mb_max_to_scan; + unsigned int s_mb_min_to_scan; + unsigned int s_mb_stats; + unsigned int s_mb_order2_reqs; + unsigned int s_mb_group_prealloc; + /* where last allocation was done - for stream allocation */ + unsigned long s_mb_last_group; + unsigned long s_mb_last_start; + + /* history to debug policy */ + struct ext4_mb_history *s_mb_history; + int s_mb_history_cur; + int s_mb_history_max; + int s_mb_history_num; + spinlock_t s_mb_history_lock; + int s_mb_history_filter; + + /* stats for buddy allocator */ + spinlock_t s_mb_pa_lock; + atomic_t s_bal_reqs; /* number of reqs with len > 1 */ + atomic_t s_bal_success; /* we found long enough chunks */ + atomic_t s_bal_allocated; /* in blocks */ + atomic_t s_bal_ex_scanned; /* total extents scanned */ + atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_breaks; /* too long searches */ + atomic_t s_bal_2orders; /* 2^order hits */ + spinlock_t s_bal_lock; + unsigned long s_mb_buddies_generated; + unsigned long long s_mb_generation_time; + atomic_t s_mb_lost_chunks; + atomic_t s_mb_preallocated; + atomic_t s_mb_discarded; + + /* locality groups */ + struct ext4_locality_group *s_locality_groups; + + /* for write statistics */ + unsigned long s_sectors_written_start; + u64 s_kbytes_written; + + unsigned int s_log_groups_per_flex; + struct flex_groups *s_flex_groups; +}; + static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) { return sb->s_fs_info; @@ -704,7 +970,6 @@ static inline struct timespec ext4_current_time(struct inode *inode) current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; } - static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) { return ino == EXT4_ROOT_INO || @@ -1014,6 +1279,14 @@ extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); +struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, + ext4_group_t block_group); +extern unsigned ext4_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, + ext4_group_t group, + struct ext4_group_desc *desc); +#define ext4_free_blocks_after_init(sb, group, desc) \ + ext4_init_block_bitmap(sb, NULL, group, desc) /* dir.c */ extern int ext4_check_dir_entry(const char *, struct inode *, @@ -1038,6 +1311,11 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); extern unsigned long ext4_count_free_inodes(struct super_block *); extern unsigned long ext4_count_dirs(struct super_block *); extern void ext4_check_inodes_bitmap(struct super_block *); +extern unsigned ext4_init_inode_bitmap(struct super_block *sb, + struct buffer_head *bh, + ext4_group_t group, + struct ext4_group_desc *desc); +extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); /* mballoc.c */ extern long ext4_mb_stats; @@ -1123,6 +1401,8 @@ extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); extern void ext4_warning(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); +extern void ext4_msg(struct super_block *, const char *, const char *, ...) + __attribute__ ((format (printf, 3, 4))); extern void ext4_grp_locked_error(struct super_block *, ext4_group_t, const char *, const char *, ...) __attribute__ ((format (printf, 4, 5))); @@ -1161,6 +1441,10 @@ extern void ext4_used_dirs_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); extern void ext4_itable_unused_set(struct super_block *sb, struct ext4_group_desc *bg, __u32 count); +extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); +extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, + struct ext4_group_desc *gdp); static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) { @@ -1228,6 +1512,18 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, return grp_info[indexv][indexh]; } +/* + * Reading s_groups_count requires using smp_rmb() afterwards. See + * the locking protocol documented in the comments of ext4_group_add() + * in resize.c + */ +static inline ext4_group_t ext4_get_groups_count(struct super_block *sb) +{ + ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + + smp_rmb(); + return ngroups; +} static inline ext4_group_t ext4_flex_group(struct ext4_sb_info *sbi, ext4_group_t block_group) @@ -1283,33 +1579,25 @@ struct ext4_group_info { }; #define EXT4_GROUP_INFO_NEED_INIT_BIT 0 -#define EXT4_GROUP_INFO_LOCKED_BIT 1 #define EXT4_MB_GRP_NEED_INIT(grp) \ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state))) -static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) +static inline spinlock_t *ext4_group_lock_ptr(struct super_block *sb, + ext4_group_t group) { - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); + return bgl_lock_ptr(EXT4_SB(sb)->s_blockgroup_lock, group); } -static inline void ext4_unlock_group(struct super_block *sb, - ext4_group_t group) +static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state)); + spin_lock(ext4_group_lock_ptr(sb, group)); } -static inline int ext4_is_group_locked(struct super_block *sb, +static inline void ext4_unlock_group(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info *grinfo = ext4_get_group_info(sb, group); - - return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT, - &(grinfo->bb_state)); + spin_unlock(ext4_group_lock_ptr(sb, group)); } /* @@ -1326,11 +1614,21 @@ extern const struct file_operations ext4_file_operations; /* namei.c */ extern const struct inode_operations ext4_dir_inode_operations; extern const struct inode_operations ext4_special_inode_operations; +extern struct dentry *ext4_get_parent(struct dentry *child); /* symlink.c */ extern const struct inode_operations ext4_symlink_inode_operations; extern const struct inode_operations ext4_fast_symlink_inode_operations; +/* block_validity */ +extern void ext4_release_system_zone(struct super_block *sb); +extern int ext4_setup_system_zone(struct super_block *sb); +extern int __init init_ext4_system_zone(void); +extern void exit_ext4_system_zone(void); +extern int ext4_data_block_valid(struct ext4_sb_info *sbi, + ext4_fsblk_t start_blk, + unsigned int count); + /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); @@ -1338,17 +1636,15 @@ extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk); extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, - struct buffer_head *bh_result, - int create, int extend_disksize); + struct buffer_head *bh_result, int flags); extern void ext4_ext_truncate(struct inode *); extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len); -extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, - sector_t block, unsigned int max_blocks, - struct buffer_head *bh, int create, - int extend_disksize, int flag); +extern int ext4_get_blocks(handle_t *handle, struct inode *inode, + sector_t block, unsigned int max_blocks, + struct buffer_head *bh, int flags); extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h deleted file mode 100644 index 4ce2187123a..00000000000 --- a/fs/ext4/ext4_i.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * ext4_i.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_i.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _EXT4_I -#define _EXT4_I - -#include <linux/rwsem.h> -#include <linux/rbtree.h> -#include <linux/seqlock.h> -#include <linux/mutex.h> - -/* data type for block offset of block group */ -typedef int ext4_grpblk_t; - -/* data type for filesystem-wide blocks number */ -typedef unsigned long long ext4_fsblk_t; - -/* data type for file logical block number */ -typedef __u32 ext4_lblk_t; - -/* data type for block group number */ -typedef unsigned int ext4_group_t; - -/* - * storage for cached extent - */ -struct ext4_ext_cache { - ext4_fsblk_t ec_start; - ext4_lblk_t ec_block; - __u32 ec_len; /* must be 32bit to return holes */ - __u32 ec_type; -}; - -/* - * fourth extended file system inode data in memory - */ -struct ext4_inode_info { - __le32 i_data[15]; /* unconverted */ - __u32 i_flags; - ext4_fsblk_t i_file_acl; - __u32 i_dtime; - - /* - * i_block_group is the number of the block group which contains - * this file's inode. Constant across the lifetime of the inode, - * it is ued for making block allocation decisions - we try to - * place a file's data blocks near its inode block, and new inodes - * near to their parent directory's inode. - */ - ext4_group_t i_block_group; - __u32 i_state; /* Dynamic state flags for ext4 */ - - ext4_lblk_t i_dir_start_lookup; -#ifdef CONFIG_EXT4_FS_XATTR - /* - * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention - * between readers of EAs and writers of regular file data, so - * instead we synchronize on xattr_sem when reading or changing - * EAs. - */ - struct rw_semaphore xattr_sem; -#endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL - struct posix_acl *i_acl; - struct posix_acl *i_default_acl; -#endif - - struct list_head i_orphan; /* unlinked but open inodes */ - - /* - * i_disksize keeps track of what the inode size is ON DISK, not - * in memory. During truncate, i_size is set to the new size by - * the VFS prior to calling ext4_truncate(), but the filesystem won't - * set i_disksize to 0 until the truncate is actually under way. - * - * The intent is that i_disksize always represents the blocks which - * are used by this file. This allows recovery to restart truncate - * on orphans if we crash during truncate. We actually write i_disksize - * into the on-disk inode when writing inodes out, instead of i_size. - * - * The only time when i_disksize and i_size may be different is when - * a truncate is in progress. The only things which change i_disksize - * are ext4_get_block (growth) and ext4_truncate (shrinkth). - */ - loff_t i_disksize; - - /* - * i_data_sem is for serialising ext4_truncate() against - * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's - * data tree are chopped off during truncate. We can't do that in - * ext4 because whenever we perform intermediate commits during - * truncate, the inode and all the metadata blocks *must* be in a - * consistent state which allows truncation of the orphans to restart - * during recovery. Hence we must fix the get_block-vs-truncate race - * by other means, so we have i_data_sem. - */ - struct rw_semaphore i_data_sem; - struct inode vfs_inode; - struct jbd2_inode jinode; - - struct ext4_ext_cache i_cached_extent; - /* - * File creation time. Its function is same as that of - * struct timespec i_{a,c,m}time in the generic inode. - */ - struct timespec i_crtime; - - /* mballoc */ - struct list_head i_prealloc_list; - spinlock_t i_prealloc_lock; - - /* ialloc */ - ext4_group_t i_last_alloc_group; - - /* allocation reservation info for delalloc */ - unsigned int i_reserved_data_blocks; - unsigned int i_reserved_meta_blocks; - unsigned int i_allocated_meta_blocks; - unsigned short i_delalloc_reserved_flag; - - /* on-disk additional length */ - __u16 i_extra_isize; - - spinlock_t i_block_reservation_lock; -}; - -#endif /* _EXT4_I */ diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h deleted file mode 100644 index 57b71fefbcc..00000000000 --- a/fs/ext4/ext4_sb.h +++ /dev/null @@ -1,161 +0,0 @@ -/* - * ext4_sb.h - * - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * - * from - * - * linux/include/linux/minix_fs_sb.h - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ - -#ifndef _EXT4_SB -#define _EXT4_SB - -#ifdef __KERNEL__ -#include <linux/timer.h> -#include <linux/wait.h> -#include <linux/blockgroup_lock.h> -#include <linux/percpu_counter.h> -#endif -#include <linux/rbtree.h> - -/* - * fourth extended-fs super-block data in memory - */ -struct ext4_sb_info { - unsigned long s_desc_size; /* Size of a group descriptor in bytes */ - unsigned long s_inodes_per_block;/* Number of inodes per block */ - unsigned long s_blocks_per_group;/* Number of blocks in a group */ - unsigned long s_inodes_per_group;/* Number of inodes in a group */ - unsigned long s_itb_per_group; /* Number of inode table blocks per group */ - unsigned long s_gdb_count; /* Number of group descriptor blocks */ - unsigned long s_desc_per_block; /* Number of group descriptors per block */ - ext4_group_t s_groups_count; /* Number of groups in the fs */ - unsigned long s_overhead_last; /* Last calculated overhead */ - unsigned long s_blocks_last; /* Last seen block count */ - loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ - struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; - unsigned long s_mount_opt; - ext4_fsblk_t s_sb_block; - uid_t s_resuid; - gid_t s_resgid; - unsigned short s_mount_state; - unsigned short s_pad; - int s_addr_per_block_bits; - int s_desc_per_block_bits; - int s_inode_size; - int s_first_ino; - unsigned int s_inode_readahead_blks; - spinlock_t s_next_gen_lock; - u32 s_next_generation; - u32 s_hash_seed[4]; - int s_def_hash_version; - int s_hash_unsigned; /* 3 if hash should be signed, 0 if not */ - struct percpu_counter s_freeblocks_counter; - struct percpu_counter s_freeinodes_counter; - struct percpu_counter s_dirs_counter; - struct percpu_counter s_dirtyblocks_counter; - struct blockgroup_lock *s_blockgroup_lock; - struct proc_dir_entry *s_proc; - struct kobject s_kobj; - struct completion s_kobj_unregister; - - /* Journaling */ - struct inode *s_journal_inode; - struct journal_s *s_journal; - struct list_head s_orphan; - unsigned long s_commit_interval; - u32 s_max_batch_time; - u32 s_min_batch_time; - struct block_device *journal_bdev; -#ifdef CONFIG_JBD2_DEBUG - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ -#endif -#ifdef CONFIG_QUOTA - char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ - int s_jquota_fmt; /* Format of quota to use */ -#endif - unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ - -#ifdef EXTENTS_STATS - /* ext4 extents stats */ - unsigned long s_ext_min; - unsigned long s_ext_max; - unsigned long s_depth_max; - spinlock_t s_ext_stats_lock; - unsigned long s_ext_blocks; - unsigned long s_ext_extents; -#endif - - /* for buddy allocator */ - struct ext4_group_info ***s_group_info; - struct inode *s_buddy_cache; - long s_blocks_reserved; - spinlock_t s_reserve_lock; - spinlock_t s_md_lock; - tid_t s_last_transaction; - unsigned short *s_mb_offsets; - unsigned int *s_mb_maxs; - - /* tunables */ - unsigned long s_stripe; - unsigned int s_mb_stream_request; - unsigned int s_mb_max_to_scan; - unsigned int s_mb_min_to_scan; - unsigned int s_mb_stats; - unsigned int s_mb_order2_reqs; - unsigned int s_mb_group_prealloc; - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; - - /* history to debug policy */ - struct ext4_mb_history *s_mb_history; - int s_mb_history_cur; - int s_mb_history_max; - int s_mb_history_num; - spinlock_t s_mb_history_lock; - int s_mb_history_filter; - - /* stats for buddy allocator */ - spinlock_t s_mb_pa_lock; - atomic_t s_bal_reqs; /* number of reqs with len > 1 */ - atomic_t s_bal_success; /* we found long enough chunks */ - atomic_t s_bal_allocated; /* in blocks */ - atomic_t s_bal_ex_scanned; /* total extents scanned */ - atomic_t s_bal_goals; /* goal hits */ - atomic_t s_bal_breaks; /* too long searches */ - atomic_t s_bal_2orders; /* 2^order hits */ - spinlock_t s_bal_lock; - unsigned long s_mb_buddies_generated; - unsigned long long s_mb_generation_time; - atomic_t s_mb_lost_chunks; - atomic_t s_mb_preallocated; - atomic_t s_mb_discarded; - - /* locality groups */ - struct ext4_locality_group *s_locality_groups; - - /* for write statistics */ - unsigned long s_sectors_written_start; - u64 s_kbytes_written; - - unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; -}; - -static inline spinlock_t * -sb_bgl_lock(struct ext4_sb_info *sbi, unsigned int block_group) -{ - return bgl_lock_ptr(sbi->s_blockgroup_lock, block_group); -} - -#endif /* _EXT4_SB */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e3a55eb8b26..2593f748c3a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -326,32 +326,18 @@ ext4_ext_max_entries(struct inode *inode, int depth) static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { - ext4_fsblk_t block = ext_pblock(ext), valid_block; + ext4_fsblk_t block = ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; - valid_block = le32_to_cpu(es->s_first_data_block) + - EXT4_SB(inode->i_sb)->s_gdb_count; - if (unlikely(block <= valid_block || - ((block + len) > ext4_blocks_count(es)))) - return 0; - else - return 1; + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } static int ext4_valid_extent_idx(struct inode *inode, struct ext4_extent_idx *ext_idx) { - ext4_fsblk_t block = idx_pblock(ext_idx), valid_block; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + ext4_fsblk_t block = idx_pblock(ext_idx); - valid_block = le32_to_cpu(es->s_first_data_block) + - EXT4_SB(inode->i_sb)->s_gdb_count; - if (unlikely(block <= valid_block || - (block >= ext4_blocks_count(es)))) - return 0; - else - return 1; + return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); } static int ext4_valid_extent_entries(struct inode *inode, @@ -2097,12 +2083,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ex = EXT_LAST_EXTENT(eh); ex_ee_block = le32_to_cpu(ex->ee_block); - if (ext4_ext_is_uninitialized(ex)) - uninitialized = 1; ex_ee_len = ext4_ext_get_actual_len(ex); while (ex >= EXT_FIRST_EXTENT(eh) && ex_ee_block + ex_ee_len > start) { + + if (ext4_ext_is_uninitialized(ex)) + uninitialized = 1; + else + uninitialized = 0; + ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len); path[depth].p_ext = ex; @@ -2784,7 +2774,7 @@ fix_extent_len: int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, struct buffer_head *bh_result, - int create, int extend_disksize) + int flags) { struct ext4_ext_path *path = NULL; struct ext4_extent_header *eh; @@ -2793,7 +2783,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, int err = 0, depth, ret, cache_type; unsigned int allocated = 0; struct ext4_allocation_request ar; - loff_t disksize; __clear_bit(BH_New, &bh_result->b_state); ext_debug("blocks %u/%u requested for inode %u\n", @@ -2803,7 +2792,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, cache_type = ext4_ext_in_cache(inode, iblock, &newex); if (cache_type) { if (cache_type == EXT4_EXT_CACHE_GAP) { - if (!create) { + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* * block isn't allocated yet and * user doesn't want to allocate it @@ -2869,9 +2858,11 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, EXT4_EXT_CACHE_EXTENT); goto out; } - if (create == EXT4_CREATE_UNINITIALIZED_EXT) + if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) goto out; - if (!create) { + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { + if (allocated > max_blocks) + allocated = max_blocks; /* * We have blocks reserved already. We * return allocated blocks so that delalloc @@ -2879,8 +2870,6 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * the buffer head will be unmapped so that * a read from the block returns 0s. */ - if (allocated > max_blocks) - allocated = max_blocks; set_buffer_unwritten(bh_result); bh_result->b_bdev = inode->i_sb->s_bdev; bh_result->b_blocknr = newblock; @@ -2903,7 +2892,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * requested block isn't allocated yet; * we couldn't try to create block if create flag is zero */ - if (!create) { + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { /* * put just found gap into cache to speed up * subsequent requests @@ -2932,10 +2921,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, * EXT_UNINIT_MAX_LEN. */ if (max_blocks > EXT_INIT_MAX_LEN && - create != EXT4_CREATE_UNINITIALIZED_EXT) + !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) max_blocks = EXT_INIT_MAX_LEN; else if (max_blocks > EXT_UNINIT_MAX_LEN && - create == EXT4_CREATE_UNINITIALIZED_EXT) + (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) max_blocks = EXT_UNINIT_MAX_LEN; /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */ @@ -2966,7 +2955,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* try to insert new extent into found leaf and return */ ext4_ext_store_pblock(&newex, newblock); newex.ee_len = cpu_to_le16(ar.len); - if (create == EXT4_CREATE_UNINITIALIZED_EXT) /* Mark uninitialized */ + if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */ ext4_ext_mark_uninitialized(&newex); err = ext4_ext_insert_extent(handle, inode, path, &newex); if (err) { @@ -2983,18 +2972,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); outnew: - if (extend_disksize) { - disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = disksize; - } - set_buffer_new(bh_result); /* Cache only when it is _not_ an uninitialized extent */ - if (create != EXT4_CREATE_UNINITIALIZED_EXT) + if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) ext4_ext_put_in_cache(inode, iblock, allocated, newblock, EXT4_EXT_CACHE_EXTENT); out: @@ -3150,9 +3131,10 @@ retry: ret = PTR_ERR(handle); break; } - ret = ext4_get_blocks_wrap(handle, inode, block, - max_blocks, &map_bh, - EXT4_CREATE_UNINITIALIZED_EXT, 0, 0); + map_bh.b_state = 0; + ret = ext4_get_blocks(handle, inode, block, + max_blocks, &map_bh, + EXT4_GET_BLOCKS_CREATE_UNINIT_EXT); if (ret <= 0) { #ifdef EXT4FS_DEBUG WARN_ON(ret <= 0); @@ -3195,7 +3177,7 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, void *data) { struct fiemap_extent_info *fieinfo = data; - unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; + unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; __u64 logical; __u64 physical; __u64 length; @@ -3242,9 +3224,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, * * XXX this might miss a single-block extent at EXT_MAX_BLOCK */ - if (logical + length - 1 == EXT_MAX_BLOCK || - ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) + if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || + newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { + loff_t size = i_size_read(inode); + loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); + flags |= FIEMAP_EXTENT_LAST; + if ((flags & FIEMAP_EXTENT_DELALLOC) && + logical+length > size) + length = (size - logical + bs - 1) & ~(bs-1); + } error = fiemap_fill_next_extent(fieinfo, logical, physical, length, flags); @@ -3318,10 +3307,10 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, * Walk the extent tree gathering extent information. * ext4_ext_fiemap_cb will push extents back to user. */ - down_write(&EXT4_I(inode)->i_data_sem); + down_read(&EXT4_I(inode)->i_data_sem); error = ext4_ext_walk_space(inode, start_blk, len_blks, ext4_ext_fiemap_cb, fieinfo); - up_write(&EXT4_I(inode)->i_data_sem); + up_read(&EXT4_I(inode)->i_data_sem); } return error; diff --git a/fs/ext4/group.h b/fs/ext4/group.h deleted file mode 100644 index c2c0a8d06d0..00000000000 --- a/fs/ext4/group.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * linux/fs/ext4/group.h - * - * Copyright (C) 2007 Cluster File Systems, Inc - * - * Author: Andreas Dilger <adilger@clusterfs.com> - */ - -#ifndef _LINUX_EXT4_GROUP_H -#define _LINUX_EXT4_GROUP_H - -extern __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 group, - struct ext4_group_desc *gdp); -extern int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 group, - struct ext4_group_desc *gdp); -struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, - ext4_group_t block_group); -extern unsigned ext4_init_block_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, - struct ext4_group_desc *desc); -#define ext4_free_blocks_after_init(sb, group, desc) \ - ext4_init_block_bitmap(sb, NULL, group, desc) -extern unsigned ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t group, - struct ext4_group_desc *desc); -extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap); -#endif /* _LINUX_EXT4_GROUP_H */ diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f18e0a08a6b..3743bd849bc 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -27,7 +27,6 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" -#include "group.h" /* * ialloc.c contains the inodes allocation and deallocation routines @@ -123,16 +122,16 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) unlock_buffer(bh); return bh; } - spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); unlock_buffer(bh); return bh; } - spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + ext4_unlock_group(sb, block_group); if (buffer_uptodate(bh)) { /* * if not uninit if bh is uptodate, @@ -247,9 +246,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) goto error_return; /* Ok, now we can actually update the inode bitmaps.. */ - spin_lock(sb_bgl_lock(sbi, block_group)); - cleared = ext4_clear_bit(bit, bitmap_bh->b_data); - spin_unlock(sb_bgl_lock(sbi, block_group)); + cleared = ext4_clear_bit_atomic(ext4_group_lock_ptr(sb, block_group), + bit, bitmap_bh->b_data); if (!cleared) ext4_error(sb, "ext4_free_inode", "bit already cleared for inode %lu", ino); @@ -261,7 +259,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) if (fatal) goto error_return; if (gdp) { - spin_lock(sb_bgl_lock(sbi, block_group)); + ext4_lock_group(sb, block_group); count = ext4_free_inodes_count(sb, gdp) + 1; ext4_free_inodes_set(sb, gdp, count); if (is_directory) { @@ -277,7 +275,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) } gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); + ext4_unlock_group(sb, block_group); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) percpu_counter_dec(&sbi->s_dirs_counter); @@ -316,7 +314,7 @@ error_return: static int find_group_dir(struct super_block *sb, struct inode *parent, ext4_group_t *best_group) { - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); unsigned int freei, avefreei; struct ext4_group_desc *desc, *best_desc = NULL; ext4_group_t group; @@ -349,11 +347,10 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *desc; - struct buffer_head *bh; struct flex_groups *flex_group = sbi->s_flex_groups; ext4_group_t parent_group = EXT4_I(parent)->i_block_group; ext4_group_t parent_fbg_group = ext4_flex_group(sbi, parent_group); - ext4_group_t ngroups = sbi->s_groups_count; + ext4_group_t ngroups = ext4_get_groups_count(sb); int flex_size = ext4_flex_bg_size(sbi); ext4_group_t best_flex = parent_fbg_group; int blocks_per_flex = sbi->s_blocks_per_group * flex_size; @@ -362,7 +359,7 @@ static int find_group_flex(struct super_block *sb, struct inode *parent, ext4_group_t n_fbg_groups; ext4_group_t i; - n_fbg_groups = (sbi->s_groups_count + flex_size - 1) >> + n_fbg_groups = (ngroups + flex_size - 1) >> sbi->s_log_groups_per_flex; find_close_to_parent: @@ -404,7 +401,7 @@ find_close_to_parent: found_flexbg: for (i = best_flex * flex_size; i < ngroups && i < (best_flex + 1) * flex_size; i++) { - desc = ext4_get_group_desc(sb, i, &bh); + desc = ext4_get_group_desc(sb, i, NULL); if (ext4_free_inodes_count(sb, desc)) { *best_group = i; goto out; @@ -478,20 +475,21 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_group_t ngroups = sbi->s_groups_count; + ext4_group_t real_ngroups = ext4_get_groups_count(sb); int inodes_per_group = EXT4_INODES_PER_GROUP(sb); unsigned int freei, avefreei; ext4_fsblk_t freeb, avefreeb; unsigned int ndirs; int max_dirs, min_inodes; ext4_grpblk_t min_blocks; - ext4_group_t i, grp, g; + ext4_group_t i, grp, g, ngroups; struct ext4_group_desc *desc; struct orlov_stats stats; int flex_size = ext4_flex_bg_size(sbi); + ngroups = real_ngroups; if (flex_size > 1) { - ngroups = (ngroups + flex_size - 1) >> + ngroups = (real_ngroups + flex_size - 1) >> sbi->s_log_groups_per_flex; parent_group >>= sbi->s_log_groups_per_flex; } @@ -543,7 +541,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, */ grp *= flex_size; for (i = 0; i < flex_size; i++) { - if (grp+i >= sbi->s_groups_count) + if (grp+i >= real_ngroups) break; desc = ext4_get_group_desc(sb, grp+i, NULL); if (desc && ext4_free_inodes_count(sb, desc)) { @@ -583,7 +581,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, } fallback: - ngroups = sbi->s_groups_count; + ngroups = real_ngroups; avefreei = freei / ngroups; fallback_retry: parent_group = EXT4_I(parent)->i_block_group; @@ -613,9 +611,8 @@ static int find_group_other(struct super_block *sb, struct inode *parent, ext4_group_t *group, int mode) { ext4_group_t parent_group = EXT4_I(parent)->i_block_group; - ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; + ext4_group_t i, last, ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; - ext4_group_t i, last; int flex_size = ext4_flex_bg_size(EXT4_SB(sb)); /* @@ -708,10 +705,10 @@ static int find_group_other(struct super_block *sb, struct inode *parent, /* * claim the inode from the inode bitmap. If the group - * is uninit we need to take the groups's sb_bgl_lock + * is uninit we need to take the groups's ext4_group_lock * and clear the uninit flag. The inode bitmap update * and group desc uninit flag clear should be done - * after holding sb_bgl_lock so that ext4_read_inode_bitmap + * after holding ext4_group_lock so that ext4_read_inode_bitmap * doesn't race with the ext4_claim_inode */ static int ext4_claim_inode(struct super_block *sb, @@ -722,7 +719,7 @@ static int ext4_claim_inode(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL); - spin_lock(sb_bgl_lock(sbi, group)); + ext4_lock_group(sb, group); if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { /* not a free inode */ retval = 1; @@ -731,7 +728,7 @@ static int ext4_claim_inode(struct super_block *sb, ino++; if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || ino > EXT4_INODES_PER_GROUP(sb)) { - spin_unlock(sb_bgl_lock(sbi, group)); + ext4_unlock_group(sb, group); ext4_error(sb, __func__, "reserved inode or inode > inodes count - " "block_group = %u, inode=%lu", group, @@ -780,7 +777,7 @@ static int ext4_claim_inode(struct super_block *sb, } gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); err_ret: - spin_unlock(sb_bgl_lock(sbi, group)); + ext4_unlock_group(sb, group); return retval; } @@ -799,11 +796,10 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) struct super_block *sb; struct buffer_head *inode_bitmap_bh = NULL; struct buffer_head *group_desc_bh; - ext4_group_t group = 0; + ext4_group_t ngroups, group = 0; unsigned long ino = 0; struct inode *inode; struct ext4_group_desc *gdp = NULL; - struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; int ret2, err = 0; @@ -818,15 +814,14 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) return ERR_PTR(-EPERM); sb = dir->i_sb; + ngroups = ext4_get_groups_count(sb); trace_mark(ext4_request_inode, "dev %s dir %lu mode %d", sb->s_id, dir->i_ino, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); ei = EXT4_I(inode); - sbi = EXT4_SB(sb); - es = sbi->s_es; if (sbi->s_log_groups_per_flex && test_opt(sb, OLDALLOC)) { ret2 = find_group_flex(sb, dir, &group); @@ -856,7 +851,7 @@ got_group: if (ret2 == -1) goto out; - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { err = -EIO; gdp = ext4_get_group_desc(sb, group, &group_desc_bh); @@ -917,7 +912,7 @@ repeat_in_this_group: * group descriptor metadata has not yet been updated. * So we just go onto the next blockgroup. */ - if (++group == sbi->s_groups_count) + if (++group == ngroups) group = 0; } err = -ENOSPC; @@ -938,7 +933,7 @@ got: } free = 0; - spin_lock(sb_bgl_lock(sbi, group)); + ext4_lock_group(sb, group); /* recheck and clear flag under lock if we still need to */ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { free = ext4_free_blocks_after_init(sb, group, gdp); @@ -947,7 +942,7 @@ got: gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); } - spin_unlock(sb_bgl_lock(sbi, group)); + ext4_unlock_group(sb, group); /* Don't need to dirty bitmap block if we didn't change it */ if (free) { @@ -1158,7 +1153,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) { unsigned long desc_count; struct ext4_group_desc *gdp; - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); #ifdef EXT4FS_DEBUG struct ext4_super_block *es; unsigned long bitmap_count, x; @@ -1168,7 +1163,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1190,7 +1185,7 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) return desc_count; #else desc_count = 0; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1205,9 +1200,9 @@ unsigned long ext4_count_free_inodes(struct super_block *sb) unsigned long ext4_count_dirs(struct super_block * sb) { unsigned long count = 0; - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a9ffd528dd..875db944b22 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -372,20 +372,21 @@ static int ext4_block_to_path(struct inode *inode, } static int __ext4_check_blockref(const char *function, struct inode *inode, - __le32 *p, unsigned int max) { - - unsigned int maxblocks = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es); + __le32 *p, unsigned int max) +{ __le32 *bref = p; + unsigned int blk; + while (bref < p+max) { - if (unlikely(le32_to_cpu(*bref) >= maxblocks)) { + blk = le32_to_cpu(*bref++); + if (blk && + unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), + blk, 1))) { ext4_error(inode->i_sb, function, - "block reference %u >= max (%u) " - "in inode #%lu, offset=%d", - le32_to_cpu(*bref), maxblocks, - inode->i_ino, (int)(bref-p)); + "invalid block reference %u " + "in inode #%lu", blk, inode->i_ino); return -EIO; } - bref++; } return 0; } @@ -892,6 +893,10 @@ err_out: } /* + * The ext4_ind_get_blocks() function handles non-extents inodes + * (i.e., using the traditional indirect/double-indirect i_blocks + * scheme) for ext4_get_blocks(). + * * Allocation strategy is simple: if we have to allocate something, we will * have to go the whole way to leaf. So let's do it before attaching anything * to tree, set linkage between the newborn blocks, write them if sync is @@ -909,15 +914,16 @@ err_out: * return = 0, if plain lookup failed. * return < 0, error case. * - * - * Need to be called with - * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block - * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem) + * The ext4_ind_get_blocks() function should be called with + * down_write(&EXT4_I(inode)->i_data_sem) if allocating filesystem + * blocks (i.e., flags has EXT4_GET_BLOCKS_CREATE set) or + * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system + * blocks. */ -static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, +static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int maxblocks, struct buffer_head *bh_result, - int create, int extend_disksize) + int flags) { int err = -EIO; ext4_lblk_t offsets[4]; @@ -927,14 +933,11 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, int indirect_blks; int blocks_to_boundary = 0; int depth; - struct ext4_inode_info *ei = EXT4_I(inode); int count = 0; ext4_fsblk_t first_block = 0; - loff_t disksize; - J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)); - J_ASSERT(handle != NULL || create == 0); + J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); depth = ext4_block_to_path(inode, iblock, offsets, &blocks_to_boundary); @@ -963,7 +966,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, } /* Next simple case - plain lookup or failed read of indirect block */ - if (!create || err == -EIO) + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO) goto cleanup; /* @@ -997,19 +1000,7 @@ static int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, if (!err) err = ext4_splice_branch(handle, inode, iblock, partial, indirect_blks, count); - /* - * i_disksize growing is protected by i_data_sem. Don't forget to - * protect it if you're about to implement concurrent - * ext4_get_block() -bzzz - */ - if (!err && extend_disksize) { - disksize = ((loff_t) iblock + count) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > ei->i_disksize) - ei->i_disksize = disksize; - } - if (err) + else goto cleanup; set_buffer_new(bh_result); @@ -1120,8 +1111,23 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) ext4_discard_preallocations(inode); } +static int check_block_validity(struct inode *inode, sector_t logical, + sector_t phys, int len) +{ + if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), phys, len)) { + ext4_error(inode->i_sb, "check_block_validity", + "inode #%lu logical block %llu mapped to %llu " + "(size %d)", inode->i_ino, + (unsigned long long) logical, + (unsigned long long) phys, len); + WARN_ON(1); + return -EIO; + } + return 0; +} + /* - * The ext4_get_blocks_wrap() function try to look up the requested blocks, + * The ext4_get_blocks() function tries to look up the requested blocks, * and returns if the blocks are already mapped. * * Otherwise it takes the write lock of the i_data_sem and allocate blocks @@ -1129,7 +1135,7 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * mapped. * * If file type is extents based, it will call ext4_ext_get_blocks(), - * Otherwise, call with ext4_get_blocks_handle() to handle indirect mapping + * Otherwise, call with ext4_ind_get_blocks() to handle indirect mapping * based files * * On success, it returns the number of blocks being mapped or allocate. @@ -1142,9 +1148,9 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) * * It returns the error in case of allocation failure. */ -int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, - unsigned int max_blocks, struct buffer_head *bh, - int create, int extend_disksize, int flag) +int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, + unsigned int max_blocks, struct buffer_head *bh, + int flags) { int retval; @@ -1152,21 +1158,28 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, clear_buffer_unwritten(bh); /* - * Try to see if we can get the block without requesting - * for new file system block. + * Try to see if we can get the block without requesting a new + * file system block. */ down_read((&EXT4_I(inode)->i_data_sem)); if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, 0, 0); + bh, 0); } else { - retval = ext4_get_blocks_handle(handle, - inode, block, max_blocks, bh, 0, 0); + retval = ext4_ind_get_blocks(handle, inode, block, max_blocks, + bh, 0); } up_read((&EXT4_I(inode)->i_data_sem)); + if (retval > 0 && buffer_mapped(bh)) { + int ret = check_block_validity(inode, block, + bh->b_blocknr, retval); + if (ret != 0) + return ret; + } + /* If it is only a block(s) look up */ - if (!create) + if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) return retval; /* @@ -1205,7 +1218,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, * let the underlying get_block() function know to * avoid double accounting */ - if (flag) + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 1; /* * We need to check for EXT4 here because migrate @@ -1213,10 +1226,10 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, */ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { retval = ext4_ext_get_blocks(handle, inode, block, max_blocks, - bh, create, extend_disksize); + bh, flags); } else { - retval = ext4_get_blocks_handle(handle, inode, block, - max_blocks, bh, create, extend_disksize); + retval = ext4_ind_get_blocks(handle, inode, block, + max_blocks, bh, flags); if (retval > 0 && buffer_new(bh)) { /* @@ -1229,18 +1242,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, } } - if (flag) { + if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks - * after successful block allocation - * which were deferred till now - */ - if ((retval > 0) && buffer_delay(bh)) - ext4_da_update_reserve_space(inode, retval); - } + + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + ext4_da_update_reserve_space(inode, retval); up_write((&EXT4_I(inode)->i_data_sem)); + if (retval > 0 && buffer_mapped(bh)) { + int ret = check_block_validity(inode, block, + bh->b_blocknr, retval); + if (ret != 0) + return ret; + } return retval; } @@ -1268,8 +1286,8 @@ int ext4_get_block(struct inode *inode, sector_t iblock, started = 1; } - ret = ext4_get_blocks_wrap(handle, inode, iblock, - max_blocks, bh_result, create, 0, 0); + ret = ext4_get_blocks(handle, inode, iblock, max_blocks, bh_result, + create ? EXT4_GET_BLOCKS_CREATE : 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -1288,17 +1306,19 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, { struct buffer_head dummy; int fatal = 0, err; + int flags = 0; J_ASSERT(handle != NULL || create == 0); dummy.b_state = 0; dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); - err = ext4_get_blocks_wrap(handle, inode, block, 1, - &dummy, create, 1, 0); + if (create) + flags |= EXT4_GET_BLOCKS_CREATE; + err = ext4_get_blocks(handle, inode, block, 1, &dummy, flags); /* - * ext4_get_blocks_handle() returns number of blocks - * mapped. 0 in case of a HOLE. + * ext4_get_blocks() returns number of blocks mapped. 0 in + * case of a HOLE. */ if (err > 0) { if (err > 1) @@ -1439,7 +1459,7 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct inode *inode = mapping->host; - int ret, needed_blocks = ext4_writepage_trans_blocks(inode); + int ret, needed_blocks; handle_t *handle; int retries = 0; struct page *page; @@ -1450,6 +1470,11 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, "dev %s ino %lu pos %llu len %u flags %u", inode->i_sb->s_id, inode->i_ino, (unsigned long long) pos, len, flags); + /* + * Reserve one block more for addition to orphan list in case + * we allocate blocks but write fails for some reason + */ + needed_blocks = ext4_writepage_trans_blocks(inode) + 1; index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1483,15 +1508,30 @@ retry: if (ret) { unlock_page(page); - ext4_journal_stop(handle); page_cache_release(page); /* * block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. + * + * Add inode to orphan list in case we crash before + * truncate finishes */ if (pos + len > inode->i_size) + ext4_orphan_add(handle, inode); + + ext4_journal_stop(handle); + if (pos + len > inode->i_size) { vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might + * still be on the orphan list; we need to + * make sure the inode is removed from the + * orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -1509,6 +1549,52 @@ static int write_end_fn(handle_t *handle, struct buffer_head *bh) return ext4_handle_dirty_metadata(handle, NULL, bh); } +static int ext4_generic_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + int i_size_changed = 0; + struct inode *inode = mapping->host; + handle_t *handle = ext4_journal_current_handle(); + + copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); + + /* + * No need to use i_size_read() here, the i_size + * cannot change under us because we hold i_mutex. + * + * But it's important to update i_size while still holding page lock: + * page writeout could otherwise come in and zero beyond i_size. + */ + if (pos + copied > inode->i_size) { + i_size_write(inode, pos + copied); + i_size_changed = 1; + } + + if (pos + copied > EXT4_I(inode)->i_disksize) { + /* We need to mark inode dirty even if + * new_i_size is less that inode->i_size + * bu greater than i_disksize.(hint delalloc) + */ + ext4_update_i_disksize(inode, (pos + copied)); + i_size_changed = 1; + } + unlock_page(page); + page_cache_release(page); + + /* + * Don't mark the inode dirty under page lock. First, it unnecessarily + * makes the holding time of page lock longer. Second, it forces lock + * ordering of page lock and transaction start for journaling + * filesystems. + */ + if (i_size_changed) + ext4_mark_inode_dirty(handle, inode); + + return copied; +} + /* * We need to pick up the new inode size which generic_commit_write gave us * `file' can be NULL - eg, when called from page_symlink(). @@ -1532,21 +1618,15 @@ static int ext4_ordered_write_end(struct file *file, ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { - loff_t new_i_size; - - new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } - - ret2 = generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; + if (pos + len > inode->i_size) + /* if we have allocated more blocks and copied + * less. We will have blocks allocated outside + * inode->i_size. So truncate them + */ + ext4_orphan_add(handle, inode); if (ret2 < 0) ret = ret2; } @@ -1554,6 +1634,18 @@ static int ext4_ordered_write_end(struct file *file, if (!ret) ret = ret2; + if (pos + len > inode->i_size) { + vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + + return ret ? ret : copied; } @@ -1565,25 +1657,21 @@ static int ext4_writeback_write_end(struct file *file, handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; int ret = 0, ret2; - loff_t new_i_size; trace_mark(ext4_writeback_write_end, "dev %s ino %lu pos %llu len %u copied %u", inode->i_sb->s_id, inode->i_ino, (unsigned long long) pos, len, copied); - new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ext4_mark_inode_dirty(handle, inode); - } - - ret2 = generic_write_end(file, mapping, pos, len, copied, + ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; + if (pos + len > inode->i_size) + /* if we have allocated more blocks and copied + * less. We will have blocks allocated outside + * inode->i_size. So truncate them + */ + ext4_orphan_add(handle, inode); + if (ret2 < 0) ret = ret2; @@ -1591,6 +1679,17 @@ static int ext4_writeback_write_end(struct file *file, if (!ret) ret = ret2; + if (pos + len > inode->i_size) { + vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } + return ret ? ret : copied; } @@ -1635,10 +1734,27 @@ static int ext4_journalled_write_end(struct file *file, } unlock_page(page); + page_cache_release(page); + if (pos + len > inode->i_size) + /* if we have allocated more blocks and copied + * less. We will have blocks allocated outside + * inode->i_size. So truncate them + */ + ext4_orphan_add(handle, inode); + ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; - page_cache_release(page); + if (pos + len > inode->i_size) { + vmtruncate(inode, inode->i_size); + /* + * If vmtruncate failed early the inode might still be + * on the orphan list; we need to make sure the inode + * is removed from the orphan list in that case. + */ + if (inode->i_nlink) + ext4_orphan_del(NULL, inode); + } return ret ? ret : copied; } @@ -1852,7 +1968,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) * @logical - first logical block to start assignment with * * the function goes through all passed space and put actual disk - * block numbers into buffer heads, dropping BH_Delay + * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten */ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, struct buffer_head *exbh) @@ -1902,16 +2018,24 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, do { if (cur_logical >= logical + blocks) break; - if (buffer_delay(bh)) { - bh->b_blocknr = pblock; - clear_buffer_delay(bh); - bh->b_bdev = inode->i_sb->s_bdev; - } else if (buffer_unwritten(bh)) { - bh->b_blocknr = pblock; - clear_buffer_unwritten(bh); - set_buffer_mapped(bh); - set_buffer_new(bh); - bh->b_bdev = inode->i_sb->s_bdev; + + if (buffer_delay(bh) || + buffer_unwritten(bh)) { + + BUG_ON(bh->b_bdev != inode->i_sb->s_bdev); + + if (buffer_delay(bh)) { + clear_buffer_delay(bh); + bh->b_blocknr = pblock; + } else { + /* + * unwritten already should have + * blocknr assigned. Verify that + */ + clear_buffer_unwritten(bh); + BUG_ON(bh->b_blocknr != pblock); + } + } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); @@ -1990,51 +2114,6 @@ static void ext4_print_free_blocks(struct inode *inode) return; } -#define EXT4_DELALLOC_RSVED 1 -static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - int ret; - unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; - loff_t disksize = EXT4_I(inode)->i_disksize; - handle_t *handle = NULL; - - handle = ext4_journal_current_handle(); - BUG_ON(!handle); - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0, EXT4_DELALLOC_RSVED); - if (ret <= 0) - return ret; - - bh_result->b_size = (ret << inode->i_blkbits); - - if (ext4_should_order_data(inode)) { - int retval; - retval = ext4_jbd2_file_inode(handle, inode); - if (retval) - /* - * Failed to add inode for ordered mode. Don't - * update file size - */ - return retval; - } - - /* - * Update on-disk size along with block allocation we don't - * use 'extend_disksize' as size may change within already - * allocated block -bzzz - */ - disksize = ((loff_t) iblock + ret) << inode->i_blkbits; - if (disksize > i_size_read(inode)) - disksize = i_size_read(inode); - if (disksize > EXT4_I(inode)->i_disksize) { - ext4_update_i_disksize(inode, disksize); - ret = ext4_mark_inode_dirty(handle, inode); - return ret; - } - return 0; -} - /* * mpage_da_map_blocks - go through given space * @@ -2045,29 +2124,57 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, */ static int mpage_da_map_blocks(struct mpage_da_data *mpd) { - int err = 0; + int err, blks, get_blocks_flags; struct buffer_head new; - sector_t next; + sector_t next = mpd->b_blocknr; + unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; + loff_t disksize = EXT4_I(mpd->inode)->i_disksize; + handle_t *handle = NULL; /* * We consider only non-mapped and non-allocated blocks */ if ((mpd->b_state & (1 << BH_Mapped)) && - !(mpd->b_state & (1 << BH_Delay))) + !(mpd->b_state & (1 << BH_Delay)) && + !(mpd->b_state & (1 << BH_Unwritten))) return 0; - new.b_state = mpd->b_state; - new.b_blocknr = 0; - new.b_size = mpd->b_size; - next = mpd->b_blocknr; + /* - * If we didn't accumulate anything - * to write simply return + * If we didn't accumulate anything to write simply return */ - if (!new.b_size) + if (!mpd->b_size) return 0; - err = ext4_da_get_block_write(mpd->inode, next, &new, 1); - if (err) { + handle = ext4_journal_current_handle(); + BUG_ON(!handle); + + /* + * Call ext4_get_blocks() to allocate any delayed allocation + * blocks, or to convert an uninitialized extent to be + * initialized (in the case where we have written into + * one or more preallocated blocks). + * + * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to + * indicate that we are on the delayed allocation path. This + * affects functions in many different parts of the allocation + * call path. This flag exists primarily because we don't + * want to change *many* call functions, so ext4_get_blocks() + * will set the magic i_delalloc_reserved_flag once the + * inode's allocation semaphore is taken. + * + * If the blocks in questions were delalloc blocks, set + * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting + * variables are updated after the blocks have been allocated. + */ + new.b_state = 0; + get_blocks_flags = (EXT4_GET_BLOCKS_CREATE | + EXT4_GET_BLOCKS_DELALLOC_RESERVE); + if (mpd->b_state & (1 << BH_Delay)) + get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE; + blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks, + &new, get_blocks_flags); + if (blks < 0) { + err = blks; /* * If get block returns with error we simply * return. Later writepage will redirty the page and @@ -2100,12 +2207,14 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) if (err == -ENOSPC) { ext4_print_free_blocks(mpd->inode); } - /* invlaidate all the pages */ + /* invalidate all the pages */ ext4_da_block_invalidatepages(mpd, next, mpd->b_size >> mpd->inode->i_blkbits); return err; } - BUG_ON(new.b_size == 0); + BUG_ON(blks == 0); + + new.b_size = (blks << mpd->inode->i_blkbits); if (buffer_new(&new)) __unmap_underlying_blocks(mpd->inode, &new); @@ -2118,6 +2227,23 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd) (mpd->b_state & (1 << BH_Unwritten))) mpage_put_bnr_to_bhs(mpd, next, &new); + if (ext4_should_order_data(mpd->inode)) { + err = ext4_jbd2_file_inode(handle, mpd->inode); + if (err) + return err; + } + + /* + * Update on-disk size along with block allocation. + */ + disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; + if (disksize > i_size_read(mpd->inode)) + disksize = i_size_read(mpd->inode); + if (disksize > EXT4_I(mpd->inode)->i_disksize) { + ext4_update_i_disksize(mpd->inode, disksize); + return ext4_mark_inode_dirty(handle, mpd->inode); + } + return 0; } @@ -2192,6 +2318,17 @@ flush_it: return; } +static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) +{ + /* + * unmapped buffer is possible for holes. + * delay buffer is possible with delayed allocation. + * We also need to consider unwritten buffer as unmapped. + */ + return (!buffer_mapped(bh) || buffer_delay(bh) || + buffer_unwritten(bh)) && buffer_dirty(bh); +} + /* * __mpage_da_writepage - finds extent of pages and blocks * @@ -2276,8 +2413,7 @@ static int __mpage_da_writepage(struct page *page, * Otherwise we won't make progress * with the page in ext4_da_writepage */ - if (buffer_dirty(bh) && - (!buffer_mapped(bh) || buffer_delay(bh))) { + if (ext4_bh_unmapped_or_delay(NULL, bh)) { mpage_add_bh_to_extent(mpd, logical, bh->b_size, bh->b_state); @@ -2303,8 +2439,16 @@ static int __mpage_da_writepage(struct page *page, } /* - * this is a special callback for ->write_begin() only - * it's intention is to return mapped block or reserve space + * This is a special get_blocks_t callback which is used by + * ext4_da_write_begin(). It will either return mapped block or + * reserve space for a single block. + * + * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. + * We also have b_blocknr = -1 and b_bdev initialized properly + * + * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. + * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev + * initialized properly. */ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) @@ -2323,7 +2467,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, * preallocated blocks are unmapped but should treated * the same as allocated blocks. */ - ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0); + ret = ext4_get_blocks(NULL, inode, iblock, 1, bh_result, 0); if ((ret == 0) && !buffer_delay(bh_result)) { /* the block isn't (pre)allocated yet, let's reserve space */ /* @@ -2340,40 +2484,53 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, set_buffer_delay(bh_result); } else if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); - /* - * With sub-block writes into unwritten extents - * we also need to mark the buffer as new so that - * the unwritten parts of the buffer gets correctly zeroed. - */ - if (buffer_unwritten(bh_result)) + if (buffer_unwritten(bh_result)) { + /* A delayed write to unwritten bh should + * be marked new and mapped. Mapped ensures + * that we don't do get_block multiple times + * when we write to the same offset and new + * ensures that we do proper zero out for + * partial write. + */ set_buffer_new(bh_result); + set_buffer_mapped(bh_result); + } ret = 0; } return ret; } -static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) -{ - /* - * unmapped buffer is possible for holes. - * delay buffer is possible with delayed allocation - */ - return ((!buffer_mapped(bh) || buffer_delay(bh)) && buffer_dirty(bh)); -} - -static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, +/* + * This function is used as a standard get_block_t calback function + * when there is no desire to allocate any blocks. It is used as a + * callback function for block_prepare_write(), nobh_writepage(), and + * block_write_full_page(). These functions should only try to map a + * single block at a time. + * + * Since this function doesn't do block allocations even if the caller + * requests it by passing in create=1, it is critically important that + * any caller checks to make sure that any buffer heads are returned + * by this function are either all already mapped or marked for + * delayed allocation before calling nobh_writepage() or + * block_write_full_page(). Otherwise, b_blocknr could be left + * unitialized, and the page write functions will be taken by + * surprise. + */ +static int noalloc_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { int ret = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize); + /* * we don't want to do block allocation in writepage * so call get_block_wrap with create = 0 */ - ret = ext4_get_blocks_wrap(NULL, inode, iblock, max_blocks, - bh_result, 0, 0, 0); + ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); + BUG_ON(create && ret == 0); if (ret > 0) { bh_result->b_size = (ret << inode->i_blkbits); ret = 0; @@ -2382,10 +2539,11 @@ static int ext4_normal_get_block_write(struct inode *inode, sector_t iblock, } /* - * get called vi ext4_da_writepages after taking page lock (have journal handle) - * get called via journal_submit_inode_data_buffers (no journal handle) - * get called via shrink_page_list via pdflush (no journal handle) - * or grab_page_cache when doing write_begin (have journal handle) + * This function can get called via... + * - ext4_da_writepages after taking page lock (have journal handle) + * - journal_submit_inode_data_buffers (no journal handle) + * - shrink_page_list via pdflush (no journal handle) + * - grab_page_cache when doing write_begin (have journal handle) */ static int ext4_da_writepage(struct page *page, struct writeback_control *wbc) @@ -2436,7 +2594,7 @@ static int ext4_da_writepage(struct page *page, * do block allocation here. */ ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - ext4_normal_get_block_write); + noalloc_get_block_write); if (!ret) { page_bufs = page_buffers(page); /* check whether all are mapped and non delay */ @@ -2461,11 +2619,10 @@ static int ext4_da_writepage(struct page *page, } if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) - ret = nobh_writepage(page, ext4_normal_get_block_write, wbc); + ret = nobh_writepage(page, noalloc_get_block_write, wbc); else - ret = block_write_full_page(page, - ext4_normal_get_block_write, - wbc); + ret = block_write_full_page(page, noalloc_get_block_write, + wbc); return ret; } @@ -2777,7 +2934,7 @@ retry: *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_da_get_block_prep); + ext4_da_get_block_prep); if (ret < 0) { unlock_page(page); ext4_journal_stop(handle); @@ -2815,7 +2972,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, for (i = 0; i < idx; i++) bh = bh->b_this_page; - if (!buffer_mapped(bh) || (buffer_delay(bh))) + if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) return 0; return 1; } @@ -3085,12 +3242,10 @@ static int __ext4_normal_writepage(struct page *page, struct inode *inode = page->mapping->host; if (test_opt(inode->i_sb, NOBH)) - return nobh_writepage(page, - ext4_normal_get_block_write, wbc); + return nobh_writepage(page, noalloc_get_block_write, wbc); else - return block_write_full_page(page, - ext4_normal_get_block_write, - wbc); + return block_write_full_page(page, noalloc_get_block_write, + wbc); } static int ext4_normal_writepage(struct page *page, @@ -3142,7 +3297,7 @@ static int __ext4_journalled_writepage(struct page *page, int err; ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - ext4_normal_get_block_write); + noalloc_get_block_write); if (ret != 0) goto out_unlock; @@ -3227,9 +3382,8 @@ static int ext4_journalled_writepage(struct page *page, * really know unless we go poke around in the buffer_heads. * But block_write_full_page will do the right thing. */ - return block_write_full_page(page, - ext4_normal_get_block_write, - wbc); + return block_write_full_page(page, noalloc_get_block_write, + wbc); } no_write: redirty_page_for_writepage(wbc, page); @@ -3973,7 +4127,8 @@ void ext4_truncate(struct inode *inode) if (!ext4_can_truncate(inode)) return; - if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) + if (ei->i_disksize && inode->i_size == 0 && + !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { @@ -4715,25 +4870,6 @@ int ext4_write_inode(struct inode *inode, int wait) return ext4_force_commit(inode->i_sb); } -int __ext4_write_dirty_metadata(struct inode *inode, struct buffer_head *bh) -{ - int err = 0; - - mark_buffer_dirty(bh); - if (inode && inode_needs_sync(inode)) { - sync_dirty_buffer(bh); - if (buffer_req(bh) && !buffer_uptodate(bh)) { - ext4_error(inode->i_sb, __func__, - "IO error syncing inode, " - "inode=%lu, block=%llu", - inode->i_ino, - (unsigned long long)bh->b_blocknr); - err = -EIO; - } - } - return err; -} - /* * ext4_setattr() * @@ -4930,7 +5066,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) */ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) { - int groups, gdpblocks; + ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); + int gdpblocks; int idxblocks; int ret = 0; @@ -4957,8 +5094,8 @@ int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) groups += nrblocks; gdpblocks = groups; - if (groups > EXT4_SB(inode->i_sb)->s_groups_count) - groups = EXT4_SB(inode->i_sb)->s_groups_count; + if (groups > ngroups) + groups = ngroups; if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; @@ -4998,7 +5135,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) * Calculate the journal credits for a chunk of data modification. * * This is called from DIO, fallocate or whoever calling - * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. + * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. * * journal buffers for data blocks are not included here, as DIO * and fallocate do no need to journal data buffers. diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f871677a798..ed8482e22c0 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -372,24 +372,12 @@ static inline void mb_set_bit(int bit, void *addr) ext4_set_bit(bit, addr); } -static inline void mb_set_bit_atomic(spinlock_t *lock, int bit, void *addr) -{ - addr = mb_correct_addr_and_bit(&bit, addr); - ext4_set_bit_atomic(lock, bit, addr); -} - static inline void mb_clear_bit(int bit, void *addr) { addr = mb_correct_addr_and_bit(&bit, addr); ext4_clear_bit(bit, addr); } -static inline void mb_clear_bit_atomic(spinlock_t *lock, int bit, void *addr) -{ - addr = mb_correct_addr_and_bit(&bit, addr); - ext4_clear_bit_atomic(lock, bit, addr); -} - static inline int mb_find_next_zero_bit(void *addr, int max, int start) { int fix = 0, ret, tmpmax; @@ -448,7 +436,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b, if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; - BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); for (i = 0; i < count; i++) { if (!mb_test_bit(first + i, e4b->bd_info->bb_bitmap)) { ext4_fsblk_t blocknr; @@ -472,7 +460,7 @@ static void mb_mark_used_double(struct ext4_buddy *e4b, int first, int count) if (unlikely(e4b->bd_info->bb_bitmap == NULL)) return; - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); for (i = 0; i < count; i++) { BUG_ON(mb_test_bit(first + i, e4b->bd_info->bb_bitmap)); mb_set_bit(first + i, e4b->bd_info->bb_bitmap); @@ -739,6 +727,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb, static int ext4_mb_init_cache(struct page *page, char *incore) { + ext4_group_t ngroups; int blocksize; int blocks_per_page; int groups_per_page; @@ -757,6 +746,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) inode = page->mapping->host; sb = inode->i_sb; + ngroups = ext4_get_groups_count(sb); blocksize = 1 << inode->i_blkbits; blocks_per_page = PAGE_CACHE_SIZE / blocksize; @@ -780,7 +770,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) for (i = 0; i < groups_per_page; i++) { struct ext4_group_desc *desc; - if (first_group + i >= EXT4_SB(sb)->s_groups_count) + if (first_group + i >= ngroups) break; err = -EIO; @@ -801,17 +791,17 @@ static int ext4_mb_init_cache(struct page *page, char *incore) unlock_buffer(bh[i]); continue; } - spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + ext4_lock_group(sb, first_group + i); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], first_group + i, desc); set_bitmap_uptodate(bh[i]); set_buffer_uptodate(bh[i]); - spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + ext4_unlock_group(sb, first_group + i); unlock_buffer(bh[i]); continue; } - spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); + ext4_unlock_group(sb, first_group + i); if (buffer_uptodate(bh[i])) { /* * if not uninit if bh is uptodate, @@ -852,7 +842,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore) struct ext4_group_info *grinfo; group = (first_block + i) >> 1; - if (group >= EXT4_SB(sb)->s_groups_count) + if (group >= ngroups) break; /* @@ -1078,7 +1068,7 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block) return 0; } -static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) +static void mb_clear_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1091,15 +1081,12 @@ static void mb_clear_bits(spinlock_t *lock, void *bm, int cur, int len) cur += 32; continue; } - if (lock) - mb_clear_bit_atomic(lock, cur, bm); - else - mb_clear_bit(cur, bm); + mb_clear_bit(cur, bm); cur++; } } -static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) +static void mb_set_bits(void *bm, int cur, int len) { __u32 *addr; @@ -1112,10 +1099,7 @@ static void mb_set_bits(spinlock_t *lock, void *bm, int cur, int len) cur += 32; continue; } - if (lock) - mb_set_bit_atomic(lock, cur, bm); - else - mb_set_bit(cur, bm); + mb_set_bit(cur, bm); cur++; } } @@ -1131,7 +1115,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, struct super_block *sb = e4b->bd_sb; BUG_ON(first + count > (sb->s_blocksize << 3)); - BUG_ON(!ext4_is_group_locked(sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); mb_check_buddy(e4b); mb_free_blocks_double(inode, e4b, first, count); @@ -1212,7 +1196,7 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, int ord; void *buddy; - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); BUG_ON(ex == NULL); buddy = mb_find_buddy(e4b, order, &max); @@ -1276,7 +1260,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) BUG_ON(start + len > (e4b->bd_sb->s_blocksize << 3)); BUG_ON(e4b->bd_group != ex->fe_group); - BUG_ON(!ext4_is_group_locked(e4b->bd_sb, e4b->bd_group)); + assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); mb_check_buddy(e4b); mb_mark_used_double(e4b, start, len); @@ -1330,8 +1314,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) e4b->bd_info->bb_counters[ord]++; } - mb_set_bits(sb_bgl_lock(EXT4_SB(e4b->bd_sb), ex->fe_group), - EXT4_MB_BITMAP(e4b), ex->fe_start, len0); + mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); mb_check_buddy(e4b); return ret; @@ -1726,7 +1709,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, unsigned free, fragments; unsigned i, bits; int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); - struct ext4_group_desc *desc; struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); BUG_ON(cr < 0 || cr >= 4); @@ -1742,10 +1724,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, switch (cr) { case 0: BUG_ON(ac->ac_2order == 0); - /* If this group is uninitialized, skip it initially */ - desc = ext4_get_group_desc(ac->ac_sb, group, NULL); - if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) - return 0; /* Avoid using the first bg of a flexgroup for data files */ if ((ac->ac_flags & EXT4_MB_HINT_DATA) && @@ -1788,6 +1766,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) int block, pnum; int blocks_per_page; int groups_per_page; + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t first_group; struct ext4_group_info *grp; @@ -1807,7 +1786,7 @@ int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) /* read all groups the page covers into the cache */ for (i = 0; i < groups_per_page; i++) { - if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) + if ((first_group + i) >= ngroups) break; grp = ext4_get_group_info(sb, first_group + i); /* take all groups write allocation @@ -1945,8 +1924,7 @@ err: static noinline_for_stack int ext4_mb_regular_allocator(struct ext4_allocation_context *ac) { - ext4_group_t group; - ext4_group_t i; + ext4_group_t ngroups, group, i; int cr; int err = 0; int bsbits; @@ -1957,6 +1935,7 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) sb = ac->ac_sb; sbi = EXT4_SB(sb); + ngroups = ext4_get_groups_count(sb); BUG_ON(ac->ac_status == AC_STATUS_FOUND); /* first, try the goal */ @@ -2017,11 +1996,11 @@ repeat: */ group = ac->ac_g_ex.fe_group; - for (i = 0; i < EXT4_SB(sb)->s_groups_count; group++, i++) { + for (i = 0; i < ngroups; group++, i++) { struct ext4_group_info *grp; struct ext4_group_desc *desc; - if (group == EXT4_SB(sb)->s_groups_count) + if (group == ngroups) group = 0; /* quick check to skip empty groups */ @@ -2064,9 +2043,7 @@ repeat: ac->ac_groups_scanned++; desc = ext4_get_group_desc(sb, group, NULL); - if (cr == 0 || (desc->bg_flags & - cpu_to_le16(EXT4_BG_BLOCK_UNINIT) && - ac->ac_2order != 0)) + if (cr == 0) ext4_mb_simple_scan_group(ac, &e4b); else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe) @@ -2315,12 +2292,10 @@ static struct file_operations ext4_mb_seq_history_fops = { static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; - if (*pos < 0 || *pos >= sbi->s_groups_count) + if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; - group = *pos + 1; return (void *) ((unsigned long) group); } @@ -2328,11 +2303,10 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { struct super_block *sb = seq->private; - struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t group; ++*pos; - if (*pos < 0 || *pos >= sbi->s_groups_count) + if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) return NULL; group = *pos + 1; return (void *) ((unsigned long) group); @@ -2420,7 +2394,8 @@ static void ext4_mb_history_release(struct super_block *sb) if (sbi->s_proc != NULL) { remove_proc_entry("mb_groups", sbi->s_proc); - remove_proc_entry("mb_history", sbi->s_proc); + if (sbi->s_mb_history_max) + remove_proc_entry("mb_history", sbi->s_proc); } kfree(sbi->s_mb_history); } @@ -2431,17 +2406,17 @@ static void ext4_mb_history_init(struct super_block *sb) int i; if (sbi->s_proc != NULL) { - proc_create_data("mb_history", S_IRUGO, sbi->s_proc, - &ext4_mb_seq_history_fops, sb); + if (sbi->s_mb_history_max) + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, + &ext4_mb_seq_history_fops, sb); proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); } - sbi->s_mb_history_max = 1000; sbi->s_mb_history_cur = 0; spin_lock_init(&sbi->s_mb_history_lock); i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); - sbi->s_mb_history = kzalloc(i, GFP_KERNEL); + sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; /* if we can't allocate history, then we simple won't use it */ } @@ -2451,7 +2426,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); struct ext4_mb_history h; - if (unlikely(sbi->s_mb_history == NULL)) + if (sbi->s_mb_history == NULL) return; if (!(ac->ac_op & sbi->s_mb_history_filter)) @@ -2587,6 +2562,7 @@ void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add) static int ext4_mb_init_backend(struct super_block *sb) { + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int metalen; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -2598,7 +2574,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_group_desc *desc; /* This is the number of blocks used by GDT */ - num_meta_group_infos = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - + num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); /* @@ -2644,7 +2620,7 @@ static int ext4_mb_init_backend(struct super_block *sb) for (i = 0; i < num_meta_group_infos; i++) { if ((i + 1) == num_meta_group_infos) metalen = sizeof(*meta_group_info) * - (sbi->s_groups_count - + (ngroups - (i << EXT4_DESC_PER_BLOCK_BITS(sb))); meta_group_info = kmalloc(metalen, GFP_KERNEL); if (meta_group_info == NULL) { @@ -2655,7 +2631,7 @@ static int ext4_mb_init_backend(struct super_block *sb) sbi->s_group_info[i] = meta_group_info; } - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); if (desc == NULL) { printk(KERN_ERR @@ -2761,7 +2737,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) return 0; } -/* need to called with ext4 group lock (ext4_lock_group) */ +/* need to called with the ext4 group lock held */ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) { struct ext4_prealloc_space *pa; @@ -2781,13 +2757,14 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) int ext4_mb_release(struct super_block *sb) { + ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; struct ext4_group_info *grinfo; struct ext4_sb_info *sbi = EXT4_SB(sb); if (sbi->s_group_info) { - for (i = 0; i < sbi->s_groups_count; i++) { + for (i = 0; i < ngroups; i++) { grinfo = ext4_get_group_info(sb, i); #ifdef DOUBLE_CHECK kfree(grinfo->bb_bitmap); @@ -2797,7 +2774,7 @@ int ext4_mb_release(struct super_block *sb) ext4_unlock_group(sb, i); kfree(grinfo); } - num_meta_group_infos = (sbi->s_groups_count + + num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); for (i = 0; i < num_meta_group_infos; i++) @@ -2984,27 +2961,25 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + le32_to_cpu(es->s_first_data_block); len = ac->ac_b_ex.fe_len; - if (in_range(ext4_block_bitmap(sb, gdp), block, len) || - in_range(ext4_inode_bitmap(sb, gdp), block, len) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || - in_range(block + len - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { + if (!ext4_data_block_valid(sbi, block, len)) { ext4_error(sb, __func__, - "Allocating block %llu in system zone of %d group\n", - block, ac->ac_b_ex.fe_group); + "Allocating blocks %llu-%llu which overlap " + "fs metadata\n", block, block+len); /* File system mounted not to panic on error * Fix the bitmap and repeat the block allocation * We leak some of the blocks here. */ - mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), - bitmap_bh->b_data, ac->ac_b_ex.fe_start, - ac->ac_b_ex.fe_len); + ext4_lock_group(sb, ac->ac_b_ex.fe_group); + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); + ext4_unlock_group(sb, ac->ac_b_ex.fe_group); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); if (!err) err = -EAGAIN; goto out_err; } + + ext4_lock_group(sb, ac->ac_b_ex.fe_group); #ifdef AGGRESSIVE_CHECK { int i; @@ -3014,9 +2989,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, } } #endif - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - mb_set_bits(NULL, bitmap_bh->b_data, - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); + mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start,ac->ac_b_ex.fe_len); if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); ext4_free_blks_set(sb, gdp, @@ -3026,7 +2999,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, len = ext4_free_blks_count(sb, gdp) - ac->ac_b_ex.fe_len; ext4_free_blks_set(sb, gdp, len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); - spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + + ext4_unlock_group(sb, ac->ac_b_ex.fe_group); percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); /* * Now reduce the dirty block count also. Should not go negative @@ -3459,7 +3433,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) * the function goes through all block freed in the group * but not yet committed and marks them used in in-core bitmap. * buddy must be generated from this bitmap - * Need to be called with ext4 group lock (ext4_lock_group) + * Need to be called with the ext4 group lock held */ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group) @@ -3473,9 +3447,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, while (n) { entry = rb_entry(n, struct ext4_free_data, node); - mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), - bitmap, entry->start_blk, - entry->count); + mb_set_bits(bitmap, entry->start_blk, entry->count); n = rb_next(n); } return; @@ -3484,7 +3456,7 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, /* * the function goes through all preallocation in this group and marks them * used in in-core bitmap. buddy must be generated from this bitmap - * Need to be called with ext4 group lock (ext4_lock_group) + * Need to be called with ext4 group lock held */ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group) @@ -3516,8 +3488,7 @@ static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, if (unlikely(len == 0)) continue; BUG_ON(groupnr != group); - mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), - bitmap, start, len); + mb_set_bits(bitmap, start, len); preallocated += len; count++; } @@ -4121,7 +4092,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode, static void ext4_mb_show_ac(struct ext4_allocation_context *ac) { struct super_block *sb = ac->ac_sb; - ext4_group_t i; + ext4_group_t ngroups, i; printk(KERN_ERR "EXT4-fs: Can't allocate:" " Allocation context details:\n"); @@ -4145,7 +4116,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) printk(KERN_ERR "EXT4-fs: %lu scanned, %d found\n", ac->ac_ex_scanned, ac->ac_found); printk(KERN_ERR "EXT4-fs: groups: \n"); - for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { + ngroups = ext4_get_groups_count(sb); + for (i = 0; i < ngroups; i++) { struct ext4_group_info *grp = ext4_get_group_info(sb, i); struct ext4_prealloc_space *pa; ext4_grpblk_t start; @@ -4469,13 +4441,13 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac) static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { - ext4_group_t i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; int freed = 0; trace_mark(ext4_mb_discard_preallocations, "dev %s needed %d", sb->s_id, needed); - for (i = 0; i < EXT4_SB(sb)->s_groups_count && needed > 0; i++) { + for (i = 0; i < ngroups && needed > 0; i++) { ret = ext4_mb_discard_group_preallocations(sb, i, needed); freed += ret; needed -= ret; @@ -4859,29 +4831,25 @@ do_more: new_entry->group = block_group; new_entry->count = count; new_entry->t_tid = handle->h_transaction->t_tid; + ext4_lock_group(sb, block_group); - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, - bit, count); + mb_clear_bits(bitmap_bh->b_data, bit, count); ext4_mb_free_metadata(handle, &e4b, new_entry); - ext4_unlock_group(sb, block_group); } else { - ext4_lock_group(sb, block_group); /* need to update group_info->bb_free and bitmap * with group lock held. generate_buddy look at * them with group lock_held */ - mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, - bit, count); + ext4_lock_group(sb, block_group); + mb_clear_bits(bitmap_bh->b_data, bit, count); mb_free_blocks(inode, &e4b, bit, count); ext4_mb_return_to_preallocation(inode, &e4b, block, count); - ext4_unlock_group(sb, block_group); } - spin_lock(sb_bgl_lock(sbi, block_group)); ret = ext4_free_blks_count(sb, gdp) + count; ext4_free_blks_set(sb, gdp, ret); gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp); - spin_unlock(sb_bgl_lock(sbi, block_group)); + ext4_unlock_group(sb, block_group); percpu_counter_add(&sbi->s_freeblocks_counter, count); if (sbi->s_log_groups_per_flex) { diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index dd9e6cd5f6c..75e34f69215 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -23,7 +23,6 @@ #include <linux/mutex.h> #include "ext4_jbd2.h" #include "ext4.h" -#include "group.h" /* * with AGGRESSIVE_CHECK allocator runs consistency checks over diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 22098e1cd08..07eb6649e4f 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -37,7 +37,6 @@ #include "ext4.h" #include "ext4_jbd2.h" -#include "namei.h" #include "xattr.h" #include "acl.h" @@ -750,7 +749,7 @@ static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, ext4fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u16) ((char *) de - base); + map_tail->offs = ((char *) de - base)>>2; map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); @@ -1148,7 +1147,8 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, unsigned rec_len = 0; while (count--) { - struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs); + struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) + (from + (map->offs<<2)); rec_len = EXT4_DIR_REC_LEN(de->name_len); memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = @@ -1997,7 +1997,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) if (!ext4_handle_valid(handle)) return 0; - lock_super(sb); + mutex_lock(&EXT4_SB(sb)->s_orphan_lock); if (!list_empty(&EXT4_I(inode)->i_orphan)) goto out_unlock; @@ -2006,9 +2006,13 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* @@@ FIXME: Observation from aviro: * I think I can trigger J_ASSERT in ext4_orphan_add(). We block - * here (on lock_super()), so race with ext4_link() which might bump + * here (on s_orphan_lock), so race with ext4_link() which might bump * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. + * + * tytso, 4/25/2009: I'm not sure how that could happen; + * shouldn't the fs core protect us from these sort of + * unlink()/link() races? */ J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); @@ -2045,7 +2049,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); out_unlock: - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); ext4_std_error(inode->i_sb, err); return err; } @@ -2066,11 +2070,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) if (!ext4_handle_valid(handle)) return 0; - lock_super(inode->i_sb); - if (list_empty(&ei->i_orphan)) { - unlock_super(inode->i_sb); - return 0; - } + mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); + if (list_empty(&ei->i_orphan)) + goto out; ino_next = NEXT_ORPHAN(inode); prev = ei->i_orphan.prev; @@ -2120,7 +2122,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) out_err: ext4_std_error(inode->i_sb, err); out: - unlock_super(inode->i_sb); + mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: @@ -2533,6 +2535,7 @@ const struct inode_operations ext4_dir_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext4_permission, + .fiemap = ext4_fiemap, }; const struct inode_operations ext4_special_inode_operations = { diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h deleted file mode 100644 index 5e4dfff36a0..00000000000 --- a/fs/ext4/namei.h +++ /dev/null @@ -1,8 +0,0 @@ -/* linux/fs/ext4/namei.h - * - * Copyright (C) 2005 Simtec Electronics - * Ben Dooks <ben@simtec.co.uk> - * -*/ - -extern struct dentry *ext4_get_parent(struct dentry *child); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 546c7dd869e..27eb289eea3 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -15,7 +15,6 @@ #include <linux/slab.h> #include "ext4_jbd2.h" -#include "group.h" #define outside(b, first, last) ((b) < (first) || (b) >= (last)) #define inside(b, first, last) ((b) >= (first) && (b) < (last)) @@ -193,7 +192,7 @@ static int setup_new_group_blocks(struct super_block *sb, if (IS_ERR(handle)) return PTR_ERR(handle); - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { err = -EBUSY; goto exit_journal; @@ -302,7 +301,7 @@ exit_bh: brelse(bh); exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; @@ -643,11 +642,12 @@ exit_free: * important part is that the new block and inode counts are in the backup * superblocks, and the location of the new group metadata in the GDT backups. * - * We do not need lock_super() for this, because these blocks are not - * otherwise touched by the filesystem code when it is mounted. We don't - * need to worry about last changing from sbi->s_groups_count, because the - * worst that can happen is that we do not copy the full number of backups - * at this time. The resize which changed s_groups_count will backup again. + * We do not need take the s_resize_lock for this, because these + * blocks are not otherwise touched by the filesystem code when it is + * mounted. We don't need to worry about last changing from + * sbi->s_groups_count, because the worst that can happen is that we + * do not copy the full number of backups at this time. The resize + * which changed s_groups_count will backup again. */ static void update_backups(struct super_block *sb, int blk_off, char *data, int size) @@ -809,7 +809,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) goto exit_put; } - lock_super(sb); + mutex_lock(&sbi->s_resize_lock); if (input->group != sbi->s_groups_count) { ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); @@ -840,7 +840,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) /* * OK, now we've set up the new group. Time to make it active. * - * Current kernels don't lock all allocations via lock_super(), + * We do not lock all allocations via s_resize_lock * so we have to be safe wrt. concurrent accesses the group * data. So we need to be careful to set all of the relevant * group descriptor data etc. *before* we enable the group. @@ -900,12 +900,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * * The precise rules we use are: * - * * Writers of s_groups_count *must* hold lock_super + * * Writers of s_groups_count *must* hold s_resize_lock * AND * * Writers must perform a smp_wmb() after updating all dependent * data and before modifying the groups count * - * * Readers must hold lock_super() over the access + * * Readers must hold s_resize_lock over the access * OR * * Readers must perform an smp_rmb() after reading the groups count * and before reading any dependent data. @@ -948,7 +948,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) sb->s_dirt = 1; exit_journal: - unlock_super(sb); + mutex_unlock(&sbi->s_resize_lock); if ((err2 = ext4_journal_stop(handle)) && !err) err = err2; if (!err) { @@ -986,7 +986,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* We don't need to worry about locking wrt other resizers just * yet: we're going to revalidate es->s_blocks_count after - * taking lock_super() below. */ + * taking the s_resize_lock below. */ o_blocks_count = ext4_blocks_count(es); o_groups_count = EXT4_SB(sb)->s_groups_count; @@ -1056,11 +1056,11 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, goto exit_put; } - lock_super(sb); + mutex_lock(&EXT4_SB(sb)->s_resize_lock); if (o_blocks_count != ext4_blocks_count(es)) { ext4_warning(sb, __func__, "multiple resizers run on filesystem!"); - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); err = -EBUSY; goto exit_put; @@ -1070,14 +1070,14 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, EXT4_SB(sb)->s_sbh))) { ext4_warning(sb, __func__, "error %d on journal write access", err); - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_journal_stop(handle); goto exit_put; } ext4_blocks_count_set(es, o_blocks_count + add); ext4_handle_dirty_metadata(handle, NULL, EXT4_SB(sb)->s_sbh); sb->s_dirt = 1; - unlock_super(sb); + mutex_unlock(&EXT4_SB(sb)->s_resize_lock); ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, o_blocks_count + add); /* We add the blocks to the bitmap and set the group need init bit */ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 2958f4e6f22..c191d0f65fe 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -20,6 +20,7 @@ #include <linux/string.h> #include <linux/fs.h> #include <linux/time.h> +#include <linux/vmalloc.h> #include <linux/jbd2.h> #include <linux/slab.h> #include <linux/init.h> @@ -45,16 +46,20 @@ #include "ext4_jbd2.h" #include "xattr.h" #include "acl.h" -#include "namei.h" -#include "group.h" + +static int default_mb_history_length = 1000; + +module_param_named(default_mb_history_length, default_mb_history_length, + int, 0644); +MODULE_PARM_DESC(default_mb_history_length, + "Default number of entries saved for mb_history"); struct proc_dir_entry *ext4_proc_root; static struct kset *ext4_kset; static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); -static int ext4_commit_super(struct super_block *sb, - struct ext4_super_block *es, int sync); +static int ext4_commit_super(struct super_block *sb, int sync); static void ext4_mark_recovery_complete(struct super_block *sb, struct ext4_super_block *es); static void ext4_clear_journal_err(struct super_block *sb, @@ -74,7 +79,7 @@ ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, { return le32_to_cpu(bg->bg_block_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, @@ -82,7 +87,7 @@ ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb, { return le32_to_cpu(bg->bg_inode_bitmap_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0); } ext4_fsblk_t ext4_inode_table(struct super_block *sb, @@ -90,7 +95,7 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb, { return le32_to_cpu(bg->bg_inode_table_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); + (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0); } __u32 ext4_free_blks_count(struct super_block *sb, @@ -98,7 +103,7 @@ __u32 ext4_free_blks_count(struct super_block *sb, { return le16_to_cpu(bg->bg_free_blocks_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0); } __u32 ext4_free_inodes_count(struct super_block *sb, @@ -106,7 +111,7 @@ __u32 ext4_free_inodes_count(struct super_block *sb, { return le16_to_cpu(bg->bg_free_inodes_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0); } __u32 ext4_used_dirs_count(struct super_block *sb, @@ -114,7 +119,7 @@ __u32 ext4_used_dirs_count(struct super_block *sb, { return le16_to_cpu(bg->bg_used_dirs_count_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0); } __u32 ext4_itable_unused_count(struct super_block *sb, @@ -122,7 +127,7 @@ __u32 ext4_itable_unused_count(struct super_block *sb, { return le16_to_cpu(bg->bg_itable_unused_lo) | (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? - (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); + (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0); } void ext4_block_bitmap_set(struct super_block *sb, @@ -202,8 +207,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) journal = EXT4_SB(sb)->s_journal; if (journal) { if (is_journal_aborted(journal)) { - ext4_abort(sb, __func__, - "Detected aborted journal"); + ext4_abort(sb, __func__, "Detected aborted journal"); return ERR_PTR(-EROFS); } return jbd2_journal_start(journal, nblocks); @@ -302,10 +306,10 @@ static void ext4_handle_error(struct super_block *sb) jbd2_journal_abort(journal, -EIO); } if (test_opt(sb, ERRORS_RO)) { - printk(KERN_CRIT "Remounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); sb->s_flags |= MS_RDONLY; } - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); if (test_opt(sb, ERRORS_PANIC)) panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); @@ -395,8 +399,6 @@ void ext4_abort(struct super_block *sb, const char *function, { va_list args; - printk(KERN_CRIT "ext4_abort called.\n"); - va_start(args, fmt); printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); @@ -409,7 +411,7 @@ void ext4_abort(struct super_block *sb, const char *function, if (sb->s_flags & MS_RDONLY) return; - printk(KERN_CRIT "Remounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; @@ -417,6 +419,18 @@ void ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } +void ext4_msg (struct super_block * sb, const char *prefix, + const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + printk("%sEXT4-fs (%s): ", prefix, sb->s_id); + vprintk(fmt, args); + printk("\n"); + va_end(args); +} + void ext4_warning(struct super_block *sb, const char *function, const char *fmt, ...) { @@ -431,7 +445,7 @@ void ext4_warning(struct super_block *sb, const char *function, } void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp, - const char *function, const char *fmt, ...) + const char *function, const char *fmt, ...) __releases(bitlock) __acquires(bitlock) { @@ -447,7 +461,7 @@ __acquires(bitlock) if (test_opt(sb, ERRORS_CONT)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 0); + ext4_commit_super(sb, 0); return; } ext4_unlock_group(sb, grp); @@ -467,7 +481,6 @@ __acquires(bitlock) return; } - void ext4_update_dynamic_rev(struct super_block *sb) { struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -496,7 +509,7 @@ void ext4_update_dynamic_rev(struct super_block *sb) /* * Open the external journal device */ -static struct block_device *ext4_blkdev_get(dev_t dev) +static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb) { struct block_device *bdev; char b[BDEVNAME_SIZE]; @@ -507,7 +520,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev) return bdev; fail: - printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n", + ext4_msg(sb, KERN_ERR, "failed to open journal device %s: %ld", __bdevname(dev, b), PTR_ERR(bdev)); return NULL; } @@ -543,8 +556,8 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) { struct list_head *l; - printk(KERN_ERR "sb orphan head is %d\n", - le32_to_cpu(sbi->s_es->s_last_orphan)); + ext4_msg(sb, KERN_ERR, "sb orphan head is %d", + le32_to_cpu(sbi->s_es->s_last_orphan)); printk(KERN_ERR "sb_info orphan list:\n"); list_for_each(l, &sbi->s_orphan) { @@ -563,6 +576,7 @@ static void ext4_put_super(struct super_block *sb) struct ext4_super_block *es = sbi->s_es; int i, err; + ext4_release_system_zone(sb); ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); @@ -576,7 +590,7 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); @@ -586,7 +600,10 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); - kfree(sbi->s_flex_groups); + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -629,7 +646,6 @@ static void ext4_put_super(struct super_block *sb) lock_kernel(); kfree(sbi->s_blockgroup_lock); kfree(sbi); - return; } static struct kmem_cache *ext4_inode_cachep; @@ -644,6 +660,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; + #ifdef CONFIG_EXT4_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; @@ -664,14 +681,16 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei->i_allocated_meta_blocks = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); + return &ei->vfs_inode; } static void ext4_destroy_inode(struct inode *inode) { if (!list_empty(&(EXT4_I(inode)->i_orphan))) { - printk("EXT4 Inode %p: orphan list check failed!\n", - EXT4_I(inode)); + ext4_msg(inode->i_sb, KERN_ERR, + "Inode %lu (%p): orphan list check failed!", + inode->i_ino, EXT4_I(inode)); print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4, EXT4_I(inode), sizeof(struct ext4_inode_info), true); @@ -870,12 +889,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",noauto_da_alloc"); ext4_show_quota_options(seq, sb); + return 0; } - static struct inode *ext4_nfs_get_inode(struct super_block *sb, - u64 ino, u32 generation) + u64 ino, u32 generation) { struct inode *inode; @@ -904,14 +923,14 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb, } static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) + int fh_len, int fh_type) { return generic_fh_to_dentry(sb, fid, fh_len, fh_type, ext4_nfs_get_inode); } static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fh_type) + int fh_len, int fh_type) { return generic_fh_to_parent(sb, fid, fh_len, fh_type, ext4_nfs_get_inode); @@ -923,7 +942,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, * which would prevent try_to_free_buffers() from freeing them, we must use * jbd2 layer's try_to_free_buffers() function to release them. */ -static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait) +static int bdev_try_to_free_page(struct super_block *sb, struct page *page, + gfp_t wait) { journal_t *journal = EXT4_SB(sb)->s_journal; @@ -992,7 +1012,6 @@ static const struct super_operations ext4_sops = { .dirty_inode = ext4_dirty_inode, .delete_inode = ext4_delete_inode, .put_super = ext4_put_super, - .write_super = ext4_write_super, .sync_fs = ext4_sync_fs, .freeze_fs = ext4_freeze, .unfreeze_fs = ext4_unfreeze, @@ -1007,6 +1026,25 @@ static const struct super_operations ext4_sops = { .bdev_try_to_free_page = bdev_try_to_free_page, }; +static const struct super_operations ext4_nojournal_sops = { + .alloc_inode = ext4_alloc_inode, + .destroy_inode = ext4_destroy_inode, + .write_inode = ext4_write_inode, + .dirty_inode = ext4_dirty_inode, + .delete_inode = ext4_delete_inode, + .write_super = ext4_write_super, + .put_super = ext4_put_super, + .statfs = ext4_statfs, + .remount_fs = ext4_remount, + .clear_inode = ext4_clear_inode, + .show_options = ext4_show_options, +#ifdef CONFIG_QUOTA + .quota_read = ext4_quota_read, + .quota_write = ext4_quota_write, +#endif + .bdev_try_to_free_page = bdev_try_to_free_page, +}; + static const struct export_operations ext4_export_ops = { .fh_to_dentry = ext4_fh_to_dentry, .fh_to_parent = ext4_fh_to_parent, @@ -1023,12 +1061,13 @@ enum { Opt_journal_update, Opt_journal_dev, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, - Opt_data_err_abort, Opt_data_err_ignore, + Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_stripe, Opt_delalloc, Opt_nodelalloc, + Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio }; @@ -1069,6 +1108,7 @@ static const match_table_t tokens = { {Opt_data_writeback, "data=writeback"}, {Opt_data_err_abort, "data_err=abort"}, {Opt_data_err_ignore, "data_err=ignore"}, + {Opt_mb_history_length, "mb_history_length=%u"}, {Opt_offusrjquota, "usrjquota="}, {Opt_usrjquota, "usrjquota=%s"}, {Opt_offgrpjquota, "grpjquota="}, @@ -1087,6 +1127,8 @@ static const match_table_t tokens = { {Opt_resize, "resize"}, {Opt_delalloc, "delalloc"}, {Opt_nodelalloc, "nodelalloc"}, + {Opt_block_validity, "block_validity"}, + {Opt_noblock_validity, "noblock_validity"}, {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, {Opt_journal_ioprio, "journal_ioprio=%u"}, {Opt_auto_da_alloc, "auto_da_alloc=%u"}, @@ -1102,8 +1144,9 @@ static ext4_fsblk_t get_sb_block(void **data) if (!options || strncmp(options, "sb=", 3) != 0) return 1; /* Default location */ + options += 3; - /*todo: use simple_strtoll with >32bit ext4 */ + /* TODO: use simple_strtoll with >32bit ext4 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", @@ -1113,6 +1156,7 @@ static ext4_fsblk_t get_sb_block(void **data) if (*options == ',') options++; *data = (void *) options; + return sb_block; } @@ -1206,8 +1250,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_user_xattr: case Opt_nouser_xattr: - printk(KERN_ERR "EXT4 (no)user_xattr options " - "not supported\n"); + ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); break; #endif #ifdef CONFIG_EXT4_FS_POSIX_ACL @@ -1220,8 +1263,7 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_acl: case Opt_noacl: - printk(KERN_ERR "EXT4 (no)acl options " - "not supported\n"); + ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); break; #endif case Opt_journal_update: @@ -1231,16 +1273,16 @@ static int parse_options(char *options, struct super_block *sb, user to specify an existing inode to be the journal file. */ if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); return 0; } set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; case Opt_journal_dev: if (is_remount) { - printk(KERN_ERR "EXT4-fs: cannot specify " - "journal on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot specify journal on remount"); return 0; } if (match_int(&args[0], &option)) @@ -1294,9 +1336,8 @@ static int parse_options(char *options, struct super_block *sb, if (is_remount) { if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS) != data_opt) { - printk(KERN_ERR - "EXT4-fs: cannot change data " - "mode on remount\n"); + ext4_msg(sb, KERN_ERR, + "Cannot change data mode on remount"); return 0; } } else { @@ -1310,6 +1351,13 @@ static int parse_options(char *options, struct super_block *sb, case Opt_data_err_ignore: clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); break; + case Opt_mb_history_length: + if (match_int(&args[0], &option)) + return 0; + if (option < 0) + return 0; + sbi->s_mb_history_max = option; + break; #ifdef CONFIG_QUOTA case Opt_usrjquota: qtype = USRQUOTA; @@ -1319,31 +1367,31 @@ static int parse_options(char *options, struct super_block *sb, set_qf_name: if (sb_any_quota_loaded(sb) && !sbi->s_qf_names[qtype]) { - printk(KERN_ERR - "EXT4-fs: Cannot change journaled " - "quota options when quota turned on.\n"); + ext4_msg(sb, KERN_ERR, + "Cannot change journaled " + "quota options when quota turned on"); return 0; } qname = match_strdup(&args[0]); if (!qname) { - printk(KERN_ERR - "EXT4-fs: not enough memory for " - "storing quotafile name.\n"); + ext4_msg(sb, KERN_ERR, + "Not enough memory for " + "storing quotafile name"); return 0; } if (sbi->s_qf_names[qtype] && strcmp(sbi->s_qf_names[qtype], qname)) { - printk(KERN_ERR - "EXT4-fs: %s quota file already " - "specified.\n", QTYPE2NAME(qtype)); + ext4_msg(sb, KERN_ERR, + "%s quota file already " + "specified", QTYPE2NAME(qtype)); kfree(qname); return 0; } sbi->s_qf_names[qtype] = qname; if (strchr(sbi->s_qf_names[qtype], '/')) { - printk(KERN_ERR - "EXT4-fs: quotafile must be on " - "filesystem root.\n"); + ext4_msg(sb, KERN_ERR, + "quotafile must be on " + "filesystem root"); kfree(sbi->s_qf_names[qtype]); sbi->s_qf_names[qtype] = NULL; return 0; @@ -1358,9 +1406,9 @@ set_qf_name: clear_qf_name: if (sb_any_quota_loaded(sb) && sbi->s_qf_names[qtype]) { - printk(KERN_ERR "EXT4-fs: Cannot change " + ext4_msg(sb, KERN_ERR, "Cannot change " "journaled quota options when " - "quota turned on.\n"); + "quota turned on"); return 0; } /* @@ -1377,9 +1425,9 @@ clear_qf_name: set_qf_format: if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != qfmt) { - printk(KERN_ERR "EXT4-fs: Cannot change " + ext4_msg(sb, KERN_ERR, "Cannot change " "journaled quota options when " - "quota turned on.\n"); + "quota turned on"); return 0; } sbi->s_jquota_fmt = qfmt; @@ -1395,8 +1443,8 @@ set_qf_format: break; case Opt_noquota: if (sb_any_quota_loaded(sb)) { - printk(KERN_ERR "EXT4-fs: Cannot change quota " - "options when quota turned on.\n"); + ext4_msg(sb, KERN_ERR, "Cannot change quota " + "options when quota turned on"); return 0; } clear_opt(sbi->s_mount_opt, QUOTA); @@ -1407,8 +1455,8 @@ set_qf_format: case Opt_quota: case Opt_usrquota: case Opt_grpquota: - printk(KERN_ERR - "EXT4-fs: quota options not supported.\n"); + ext4_msg(sb, KERN_ERR, + "quota options not supported"); break; case Opt_usrjquota: case Opt_grpjquota: @@ -1416,9 +1464,8 @@ set_qf_format: case Opt_offgrpjquota: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: - printk(KERN_ERR - "EXT4-fs: journaled quota options not " - "supported.\n"); + ext4_msg(sb, KERN_ERR, + "journaled quota options not supported"); break; case Opt_noquota: break; @@ -1443,8 +1490,9 @@ set_qf_format: break; case Opt_resize: if (!is_remount) { - printk("EXT4-fs: resize option only available " - "for remount\n"); + ext4_msg(sb, KERN_ERR, + "resize option only available " + "for remount"); return 0; } if (match_int(&args[0], &option) != 0) @@ -1474,14 +1522,21 @@ set_qf_format: case Opt_delalloc: set_opt(sbi->s_mount_opt, DELALLOC); break; + case Opt_block_validity: + set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); + break; + case Opt_noblock_validity: + clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); + break; case Opt_inode_readahead_blks: if (match_int(&args[0], &option)) return 0; if (option < 0 || option > (1 << 30)) return 0; - if (option & (option - 1)) { - printk(KERN_ERR "EXT4-fs: inode_readahead_blks" - " must be a power of 2\n"); + if (!is_power_of_2(option)) { + ext4_msg(sb, KERN_ERR, + "EXT4-fs: inode_readahead_blks" + " must be a power of 2"); return 0; } sbi->s_inode_readahead_blks = option; @@ -1508,9 +1563,9 @@ set_qf_format: set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); break; default: - printk(KERN_ERR - "EXT4-fs: Unrecognized mount option \"%s\" " - "or missing value\n", p); + ext4_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" " + "or missing value", p); return 0; } } @@ -1528,21 +1583,21 @@ set_qf_format: (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) || (sbi->s_qf_names[GRPQUOTA] && (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) { - printk(KERN_ERR "EXT4-fs: old and new quota " - "format mixing.\n"); + ext4_msg(sb, KERN_ERR, "old and new quota " + "format mixing"); return 0; } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journaled quota format " - "not specified.\n"); + ext4_msg(sb, KERN_ERR, "journaled quota format " + "not specified"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journaled quota format " + ext4_msg(sb, KERN_ERR, "journaled quota format " "specified with no journaling " - "enabled.\n"); + "enabled"); return 0; } } @@ -1557,32 +1612,32 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int res = 0; if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { - printk(KERN_ERR "EXT4-fs warning: revision level too high, " - "forcing read-only mode\n"); + ext4_msg(sb, KERN_ERR, "revision level too high, " + "forcing read-only mode"); res = MS_RDONLY; } if (read_only) return res; if (!(sbi->s_mount_state & EXT4_VALID_FS)) - printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " + "running e2fsck is recommended"); else if ((sbi->s_mount_state & EXT4_ERROR_FS)) - printk(KERN_WARNING - "EXT4-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "warning: mounting fs with errors, " + "running e2fsck is recommended"); else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk(KERN_WARNING - "EXT4-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "warning: maximal mount count reached, " + "running e2fsck is recommended"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk(KERN_WARNING - "EXT4-fs warning: checktime reached, " - "running e2fsck is recommended\n"); - if (!sbi->s_journal) + ext4_msg(sb, KERN_WARNING, + "warning: checktime reached, " + "running e2fsck is recommended"); + if (!sbi->s_journal) es->s_state &= cpu_to_le16(~EXT4_VALID_FS); if (!(__s16) le16_to_cpu(es->s_max_mnt_count)) es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); @@ -1592,7 +1647,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, if (sbi->s_journal) EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); if (test_opt(sb, DEBUG)) printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " "bpg=%lu, ipg=%lu, mo=%04lx]\n", @@ -1603,11 +1658,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, sbi->s_mount_opt); if (EXT4_SB(sb)->s_journal) { - printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", - sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : + ext4_msg(sb, KERN_INFO, "%s journal on %s", + EXT4_SB(sb)->s_journal->j_inode ? "internal" : "external", EXT4_SB(sb)->s_journal->j_devname); } else { - printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id); + ext4_msg(sb, KERN_INFO, "no journal"); } return res; } @@ -1616,10 +1671,10 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; - struct buffer_head *bh; ext4_group_t flex_group_count; ext4_group_t flex_group; int groups_per_flex = 0; + size_t size; int i; if (!sbi->s_es->s_log_groups_per_flex) { @@ -1634,16 +1689,21 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; - sbi->s_flex_groups = kzalloc(flex_group_count * - sizeof(struct flex_groups), GFP_KERNEL); + size = flex_group_count * sizeof(struct flex_groups); + sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); + if (sbi->s_flex_groups == NULL) { + sbi->s_flex_groups = vmalloc(size); + if (sbi->s_flex_groups) + memset(sbi->s_flex_groups, 0, size); + } if (sbi->s_flex_groups == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory for " - "%u flex groups\n", flex_group_count); + ext4_msg(sb, KERN_ERR, "not enough memory for " + "%u flex groups", flex_group_count); goto failed; } for (i = 0; i < sbi->s_groups_count; i++) { - gdp = ext4_get_group_desc(sb, i, &bh); + gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); atomic_set(&sbi->s_flex_groups[flex_group].free_inodes, @@ -1724,44 +1784,44 @@ static int ext4_check_descriptors(struct super_block *sb) block_bitmap = ext4_block_bitmap(sb, gdp); if (block_bitmap < first_block || block_bitmap > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u not in group " - "(block %llu)!\n", i, block_bitmap); + "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); if (inode_bitmap < first_block || inode_bitmap > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u not in group " - "(block %llu)!\n", i, inode_bitmap); + "(block %llu)!", i, inode_bitmap); return 0; } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u not in group " - "(block %llu)!\n", i, inode_table); + "(block %llu)!", i, inode_table); return 0; } - spin_lock(sb_bgl_lock(sbi, i)); + ext4_lock_group(sb, i); if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { - printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " - "Checksum for group %u failed (%u!=%u)\n", - i, le16_to_cpu(ext4_group_desc_csum(sbi, i, - gdp)), le16_to_cpu(gdp->bg_checksum)); + ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " + "Checksum for group %u failed (%u!=%u)", + i, le16_to_cpu(ext4_group_desc_csum(sbi, i, + gdp)), le16_to_cpu(gdp->bg_checksum)); if (!(sb->s_flags & MS_RDONLY)) { - spin_unlock(sb_bgl_lock(sbi, i)); + ext4_unlock_group(sb, i); return 0; } } - spin_unlock(sb_bgl_lock(sbi, i)); + ext4_unlock_group(sb, i); if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); + sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb)); return 1; } @@ -1796,8 +1856,8 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (bdev_read_only(sb->s_bdev)) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, skipping orphan cleanup.\n"); + ext4_msg(sb, KERN_ERR, "write access " + "unavailable, skipping orphan cleanup"); return; } @@ -1811,8 +1871,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, } if (s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n", - sb->s_id); + ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs"); sb->s_flags &= ~MS_RDONLY; } #ifdef CONFIG_QUOTA @@ -1823,9 +1882,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, if (EXT4_SB(sb)->s_qf_names[i]) { int ret = ext4_quota_on_mount(sb, i); if (ret < 0) - printk(KERN_ERR - "EXT4-fs: Cannot turn on journaled " - "quota: error %d\n", ret); + ext4_msg(sb, KERN_ERR, + "Cannot turn on journaled " + "quota: error %d", ret); } } #endif @@ -1842,16 +1901,16 @@ static void ext4_orphan_cleanup(struct super_block *sb, list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); vfs_dq_init(inode); if (inode->i_nlink) { - printk(KERN_DEBUG - "%s: truncating inode %lu to %lld bytes\n", + ext4_msg(sb, KERN_DEBUG, + "%s: truncating inode %lu to %lld bytes", __func__, inode->i_ino, inode->i_size); jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); nr_truncates++; } else { - printk(KERN_DEBUG - "%s: deleting unreferenced inode %lu\n", + ext4_msg(sb, KERN_DEBUG, + "%s: deleting unreferenced inode %lu", __func__, inode->i_ino); jbd_debug(2, "deleting unreferenced inode %lu\n", inode->i_ino); @@ -1863,11 +1922,11 @@ static void ext4_orphan_cleanup(struct super_block *sb, #define PLURAL(x) (x), ((x) == 1) ? "" : "s" if (nr_orphans) - printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", - sb->s_id, PLURAL(nr_orphans)); + ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted", + PLURAL(nr_orphans)); if (nr_truncates) - printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n", - sb->s_id, PLURAL(nr_truncates)); + ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up", + PLURAL(nr_truncates)); #ifdef CONFIG_QUOTA /* Turn quotas off */ for (i = 0; i < MAXQUOTAS; i++) { @@ -1877,6 +1936,7 @@ static void ext4_orphan_cleanup(struct super_block *sb, #endif sb->s_flags = s_flags; /* Restore MS_RDONLY status */ } + /* * Maximal extent format file size. * Resulting logical blkno at s_maxbytes must fit in our on-disk @@ -1927,19 +1987,19 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) loff_t res = EXT4_NDIR_BLOCKS; int meta_blocks; loff_t upper_limit; - /* This is calculated to be the largest file size for a - * dense, bitmapped file such that the total number of - * sectors in the file, including data and all indirect blocks, - * does not exceed 2^48 -1 - * __u32 i_blocks_lo and _u16 i_blocks_high representing the - * total number of 512 bytes blocks of the file + /* This is calculated to be the largest file size for a dense, block + * mapped file such that the file's total number of 512-byte sectors, + * including data and all indirect blocks, does not exceed (2^48 - 1). + * + * __u32 i_blocks_lo and _u16 i_blocks_high represent the total + * number of 512-byte sectors of the file. */ if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { /* - * !has_huge_files or CONFIG_LBD is not enabled - * implies the inode i_block represent total blocks in - * 512 bytes 32 == size of vfs inode i_blocks * 8 + * !has_huge_files or CONFIG_LBD not enabled implies that + * the inode i_block field represents total file blocks in + * 2^32 512-byte sectors == size of vfs inode i_blocks * 8 */ upper_limit = (1LL << 32) - 1; @@ -1981,7 +2041,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) } static ext4_fsblk_t descriptor_loc(struct super_block *sb, - ext4_fsblk_t logical_sb_block, int nr) + ext4_fsblk_t logical_sb_block, int nr) { struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_group_t bg, first_meta_bg; @@ -1995,6 +2055,7 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb, bg = sbi->s_desc_per_block * nr; if (ext4_bg_has_super(sb, bg)) has_super = 1; + return (has_super + ext4_group_first_block_no(sb, bg)); } @@ -2091,8 +2152,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, if (parse_strtoul(buf, 0x40000000, &t)) return -EINVAL; - /* inode_readahead_blks must be a power of 2 */ - if (t & (t-1)) + if (!is_power_of_2(t)) return -EINVAL; sbi->s_inode_readahead_blks = t; @@ -2100,7 +2160,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, } static ssize_t sbi_ui_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) + struct ext4_sb_info *sbi, char *buf) { unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); @@ -2205,7 +2265,6 @@ static struct kobj_type ext4_ktype = { static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) - { struct buffer_head *bh; struct ext4_super_block *es = NULL; @@ -2256,7 +2315,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) { - printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); + ext4_msg(sb, KERN_ERR, "unable to set blocksize"); goto out_fail; } @@ -2272,7 +2331,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (!(bh = sb_bread(sb, logical_sb_block))) { - printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); + ext4_msg(sb, KERN_ERR, "unable to read superblock"); goto out_fail; } /* @@ -2321,6 +2380,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; + sbi->s_mb_history_max = default_mb_history_length; set_opt(sbi->s_mount_opt, BARRIER); @@ -2330,7 +2390,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options((char *) data, sb, &journal_devnum, &journal_ioprio, NULL, 0)) goto failed_mount; @@ -2342,9 +2401,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) || EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) || EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U))) - printk(KERN_WARNING - "EXT4-fs warning: feature flags set on rev 0 fs, " - "running e2fsck is recommended\n"); + ext4_msg(sb, KERN_WARNING, + "feature flags set on rev 0 fs, " + "running e2fsck is recommended"); /* * Check feature flags regardless of the revision level, since we @@ -2353,16 +2412,18 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP); if (features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of " - "unsupported optional features (%x).\n", sb->s_id, + ext4_msg(sb, KERN_ERR, + "Couldn't mount because of " + "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) & ~EXT4_FEATURE_INCOMPAT_SUPP)); goto failed_mount; } features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP); if (!(sb->s_flags & MS_RDONLY) && features) { - printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of " - "unsupported optional features (%x).\n", sb->s_id, + ext4_msg(sb, KERN_ERR, + "Couldn't mount RDWR because of " + "unsupported optional features (%x)", (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) & ~EXT4_FEATURE_RO_COMPAT_SUPP)); goto failed_mount; @@ -2376,9 +2437,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ if (sizeof(root->i_blocks) < sizeof(u64) && !(sb->s_flags & MS_RDONLY)) { - printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge " + ext4_msg(sb, KERN_ERR, "Filesystem with huge " "files cannot be mounted read-write " - "without CONFIG_LBD.\n", sb->s_id); + "without CONFIG_LBD"); goto failed_mount; } } @@ -2386,17 +2447,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (blocksize < EXT4_MIN_BLOCK_SIZE || blocksize > EXT4_MAX_BLOCK_SIZE) { - printk(KERN_ERR - "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n", - blocksize, sb->s_id); + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d", blocksize); goto failed_mount; } if (sb->s_blocksize != blocksize) { - /* Validate the filesystem blocksize */ if (!sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "EXT4-fs: bad block size %d.\n", + ext4_msg(sb, KERN_ERR, "bad block size %d", blocksize); goto failed_mount; } @@ -2406,15 +2465,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) offset = do_div(logical_sb_block, blocksize); bh = sb_bread(sb, logical_sb_block); if (!bh) { - printk(KERN_ERR - "EXT4-fs: Can't read superblock on 2nd try.\n"); + ext4_msg(sb, KERN_ERR, + "Can't read superblock on 2nd try"); goto failed_mount; } es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - printk(KERN_ERR - "EXT4-fs: Magic mismatch, very weird !\n"); + ext4_msg(sb, KERN_ERR, + "Magic mismatch, very weird!"); goto failed_mount; } } @@ -2432,30 +2491,33 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { - printk(KERN_ERR - "EXT4-fs: unsupported inode size: %d\n", + ext4_msg(sb, KERN_ERR, + "unsupported inode size: %d", sbi->s_inode_size); goto failed_mount; } if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); } + sbi->s_desc_size = le16_to_cpu(es->s_desc_size); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || sbi->s_desc_size > EXT4_MAX_DESC_SIZE || !is_power_of_2(sbi->s_desc_size)) { - printk(KERN_ERR - "EXT4-fs: unsupported descriptor size %lu\n", + ext4_msg(sb, KERN_ERR, + "unsupported descriptor size %lu", sbi->s_desc_size); goto failed_mount; } } else sbi->s_desc_size = EXT4_MIN_DESC_SIZE; + sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) goto cantfind_ext4; + sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); if (sbi->s_inodes_per_block == 0) goto cantfind_ext4; @@ -2466,6 +2528,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); + for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -2483,25 +2546,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (sbi->s_blocks_per_group > blocksize * 8) { - printk(KERN_ERR - "EXT4-fs: #blocks per group too big: %lu\n", + ext4_msg(sb, KERN_ERR, + "#blocks per group too big: %lu", sbi->s_blocks_per_group); goto failed_mount; } if (sbi->s_inodes_per_group > blocksize * 8) { - printk(KERN_ERR - "EXT4-fs: #inodes per group too big: %lu\n", + ext4_msg(sb, KERN_ERR, + "#inodes per group too big: %lu", sbi->s_inodes_per_group); goto failed_mount; } if (ext4_blocks_count(es) > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { - printk(KERN_ERR "EXT4-fs: filesystem on %s:" - " too large to mount safely\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "filesystem" + " too large to mount safely"); if (sizeof(sector_t) < 8) - printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not " - "enabled\n"); + ext4_msg(sb, KERN_WARNING, "CONFIG_LBD not enabled"); goto failed_mount; } @@ -2511,21 +2573,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* check blocks count against device size */ blocks_count = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits; if (blocks_count && ext4_blocks_count(es) > blocks_count) { - printk(KERN_WARNING "EXT4-fs: bad geometry: block count %llu " - "exceeds size of device (%llu blocks)\n", + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " + "exceeds size of device (%llu blocks)", ext4_blocks_count(es), blocks_count); goto failed_mount; } - /* - * It makes no sense for the first data block to be beyond the end - * of the filesystem. - */ - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { - printk(KERN_WARNING "EXT4-fs: bad geometry: first data" - "block %u is beyond end of filesystem (%llu)\n", - le32_to_cpu(es->s_first_data_block), - ext4_blocks_count(es)); + /* + * It makes no sense for the first data block to be beyond the end + * of the filesystem. + */ + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data" + "block %u is beyond end of filesystem (%llu)", + le32_to_cpu(es->s_first_data_block), + ext4_blocks_count(es)); goto failed_mount; } blocks_count = (ext4_blocks_count(es) - @@ -2533,9 +2595,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) EXT4_BLOCKS_PER_GROUP(sb) - 1); do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - printk(KERN_WARNING "EXT4-fs: groups count too large: %u " + ext4_msg(sb, KERN_WARNING, "groups count too large: %u " "(block count %llu, first data block %u, " - "blocks per group %lu)\n", sbi->s_groups_count, + "blocks per group %lu)", sbi->s_groups_count, ext4_blocks_count(es), le32_to_cpu(es->s_first_data_block), EXT4_BLOCKS_PER_GROUP(sb)); @@ -2547,7 +2609,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory\n"); + ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; } @@ -2562,21 +2624,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) block = descriptor_loc(sb, logical_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); if (!sbi->s_group_desc[i]) { - printk(KERN_ERR "EXT4-fs: " - "can't read group descriptor %d\n", i); + ext4_msg(sb, KERN_ERR, + "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } } if (!ext4_check_descriptors(sb)) { - printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); + ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); goto failed_mount2; } if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) if (!ext4_fill_flex_info(sb)) { - printk(KERN_ERR - "EXT4-fs: unable to initialize " - "flex_bg meta info!\n"); + ext4_msg(sb, KERN_ERR, + "unable to initialize " + "flex_bg meta info!"); goto failed_mount2; } @@ -2598,7 +2660,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); } if (err) { - printk(KERN_ERR "EXT4-fs: insufficient memory\n"); + ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount3; } @@ -2607,7 +2669,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* * set up enough so that it can read an inode */ - sb->s_op = &ext4_sops; + if (!test_opt(sb, NOLOAD) && + EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + sb->s_op = &ext4_sops; + else + sb->s_op = &ext4_nojournal_sops; sb->s_export_op = &ext4_export_ops; sb->s_xattr = ext4_xattr_handlers; #ifdef CONFIG_QUOTA @@ -2615,6 +2681,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sb->dq_op = &ext4_quota_operations; #endif INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ + mutex_init(&sbi->s_orphan_lock); + mutex_init(&sbi->s_resize_lock); sb->s_root = NULL; @@ -2632,13 +2700,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount3; if (!(sb->s_flags & MS_RDONLY) && EXT4_SB(sb)->s_journal->j_failed_commit) { - printk(KERN_CRIT "EXT4-fs error (device %s): " + ext4_msg(sb, KERN_CRIT, "error: " "ext4_fill_super: Journal transaction " - "%u is corrupt\n", sb->s_id, + "%u is corrupt", EXT4_SB(sb)->s_journal->j_failed_commit); if (test_opt(sb, ERRORS_RO)) { - printk(KERN_CRIT - "Mounting filesystem read-only\n"); + ext4_msg(sb, KERN_CRIT, + "Mounting filesystem read-only"); sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); @@ -2646,14 +2714,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (test_opt(sb, ERRORS_PANIC)) { EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); goto failed_mount4; } } } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { - printk(KERN_ERR "EXT4-fs: required journal recovery " - "suppressed and not mounted read-only\n"); + ext4_msg(sb, KERN_ERR, "required journal recovery " + "suppressed and not mounted read-only"); goto failed_mount4; } else { clear_opt(sbi->s_mount_opt, DATA_FLAGS); @@ -2666,7 +2734,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (ext4_blocks_count(es) > 0xffffffffULL && !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_64BIT)) { - printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n"); + ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); goto failed_mount4; } @@ -2704,8 +2772,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) case EXT4_MOUNT_WRITEBACK_DATA: if (!jbd2_journal_check_available_features (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - printk(KERN_ERR "EXT4-fs: Journal does not support " - "requested data journaling mode\n"); + ext4_msg(sb, KERN_ERR, "Journal does not support " + "requested data journaling mode"); goto failed_mount4; } default: @@ -2717,8 +2785,8 @@ no_journal: if (test_opt(sb, NOBH)) { if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) { - printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - " - "its supported only with writeback mode\n"); + ext4_msg(sb, KERN_WARNING, "Ignoring nobh option - " + "its supported only with writeback mode"); clear_opt(sbi->s_mount_opt, NOBH); } } @@ -2729,18 +2797,18 @@ no_journal: root = ext4_iget(sb, EXT4_ROOT_INO); if (IS_ERR(root)) { - printk(KERN_ERR "EXT4-fs: get root inode failed\n"); + ext4_msg(sb, KERN_ERR, "get root inode failed"); ret = PTR_ERR(root); goto failed_mount4; } if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { iput(root); - printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n"); + ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); goto failed_mount4; } sb->s_root = d_alloc_root(root); if (!sb->s_root) { - printk(KERN_ERR "EXT4-fs: get root dentry failed\n"); + ext4_msg(sb, KERN_ERR, "get root dentry failed"); iput(root); ret = -ENOMEM; goto failed_mount4; @@ -2769,22 +2837,29 @@ no_journal: sbi->s_inode_size) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; - printk(KERN_INFO "EXT4-fs: required extra inode space not" - "available.\n"); + ext4_msg(sb, KERN_INFO, "required extra inode space not" + "available"); } if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { - printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " - "requested data journaling mode\n"); + ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " + "requested data journaling mode"); clear_opt(sbi->s_mount_opt, DELALLOC); } else if (test_opt(sb, DELALLOC)) - printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); + ext4_msg(sb, KERN_INFO, "delayed allocation enabled"); + + err = ext4_setup_system_zone(sb); + if (err) { + ext4_msg(sb, KERN_ERR, "failed to initialize system " + "zone (%d)\n", err); + goto failed_mount4; + } ext4_ext_init(sb); err = ext4_mb_init(sb, needs_recovery); if (err) { - printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", - err); + ext4_msg(sb, KERN_ERR, "failed to initalize mballoc (%d)", + err); goto failed_mount4; } @@ -2798,19 +2873,11 @@ no_journal: goto failed_mount4; }; - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock - * in numerous places. Here we just pop the lock - it's relatively - * harmless, because we are now ready to accept write_super() requests, - * and aviro says that's the only reason for hanging onto the - * superblock lock. - */ EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) { - printk(KERN_INFO "EXT4-fs: recovery complete.\n"); + ext4_msg(sb, KERN_INFO, "recovery complete"); ext4_mark_recovery_complete(sb, es); } if (EXT4_SB(sb)->s_journal) { @@ -2823,25 +2890,30 @@ no_journal: } else descr = "out journal"; - printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n", - sb->s_id, descr); + ext4_msg(sb, KERN_INFO, "mounted filesystem with%s", descr); lock_kernel(); return 0; cantfind_ext4: if (!silent) - printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n", - sb->s_id); + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); goto failed_mount; failed_mount4: - printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "mount failed"); + ext4_release_system_zone(sb); if (sbi->s_journal) { jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; } failed_mount3: + if (sbi->s_flex_groups) { + if (is_vmalloc_addr(sbi->s_flex_groups)) + vfree(sbi->s_flex_groups); + else + kfree(sbi->s_flex_groups); + } percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -2862,6 +2934,7 @@ failed_mount: brelse(bh); out_fail: sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); kfree(sbi); lock_kernel(); return ret; @@ -2906,27 +2979,27 @@ static journal_t *ext4_get_journal(struct super_block *sb, journal_inode = ext4_iget(sb, journal_inum); if (IS_ERR(journal_inode)) { - printk(KERN_ERR "EXT4-fs: no journal found.\n"); + ext4_msg(sb, KERN_ERR, "no journal found"); return NULL; } if (!journal_inode->i_nlink) { make_bad_inode(journal_inode); iput(journal_inode); - printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n"); + ext4_msg(sb, KERN_ERR, "journal inode is deleted"); return NULL; } jbd_debug(2, "Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); if (!S_ISREG(journal_inode->i_mode)) { - printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); + ext4_msg(sb, KERN_ERR, "invalid journal inode"); iput(journal_inode); return NULL; } journal = jbd2_journal_init_inode(journal_inode); if (!journal) { - printk(KERN_ERR "EXT4-fs: Could not load journal inode\n"); + ext4_msg(sb, KERN_ERR, "Could not load journal inode"); iput(journal_inode); return NULL; } @@ -2950,13 +3023,13 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)); - bdev = ext4_blkdev_get(j_dev); + bdev = ext4_blkdev_get(j_dev, sb); if (bdev == NULL) return NULL; if (bd_claim(bdev, sb)) { - printk(KERN_ERR - "EXT4-fs: failed to claim external journal device.\n"); + ext4_msg(sb, KERN_ERR, + "failed to claim external journal device"); blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } @@ -2964,8 +3037,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, blocksize = sb->s_blocksize; hblock = bdev_hardsect_size(bdev); if (blocksize < hblock) { - printk(KERN_ERR - "EXT4-fs: blocksize too small for journal device.\n"); + ext4_msg(sb, KERN_ERR, + "blocksize too small for journal device"); goto out_bdev; } @@ -2973,8 +3046,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, offset = EXT4_MIN_BLOCK_SIZE % blocksize; set_blocksize(bdev, blocksize); if (!(bh = __bread(bdev, sb_block, blocksize))) { - printk(KERN_ERR "EXT4-fs: couldn't read superblock of " - "external journal\n"); + ext4_msg(sb, KERN_ERR, "couldn't read superblock of " + "external journal"); goto out_bdev; } @@ -2982,14 +3055,14 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) || !(le32_to_cpu(es->s_feature_incompat) & EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { - printk(KERN_ERR "EXT4-fs: external journal has " - "bad superblock\n"); + ext4_msg(sb, KERN_ERR, "external journal has " + "bad superblock"); brelse(bh); goto out_bdev; } if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { - printk(KERN_ERR "EXT4-fs: journal UUID does not match\n"); + ext4_msg(sb, KERN_ERR, "journal UUID does not match"); brelse(bh); goto out_bdev; } @@ -3001,25 +3074,26 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, journal = jbd2_journal_init_dev(bdev, sb->s_bdev, start, len, blocksize); if (!journal) { - printk(KERN_ERR "EXT4-fs: failed to create device journal\n"); + ext4_msg(sb, KERN_ERR, "failed to create device journal"); goto out_bdev; } journal->j_private = sb; ll_rw_block(READ, 1, &journal->j_sb_buffer); wait_on_buffer(journal->j_sb_buffer); if (!buffer_uptodate(journal->j_sb_buffer)) { - printk(KERN_ERR "EXT4-fs: I/O error on journal device\n"); + ext4_msg(sb, KERN_ERR, "I/O error on journal device"); goto out_journal; } if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { - printk(KERN_ERR "EXT4-fs: External journal has more than one " - "user (unsupported) - %d\n", + ext4_msg(sb, KERN_ERR, "External journal has more than one " + "user (unsupported) - %d", be32_to_cpu(journal->j_superblock->s_nr_users)); goto out_journal; } EXT4_SB(sb)->journal_bdev = bdev; ext4_init_journal_params(sb, journal); return journal; + out_journal: jbd2_journal_destroy(journal); out_bdev: @@ -3041,8 +3115,8 @@ static int ext4_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { - printk(KERN_INFO "EXT4-fs: external journal device major/minor " - "numbers have changed\n"); + ext4_msg(sb, KERN_INFO, "external journal device major/minor " + "numbers have changed"); journal_dev = new_decode_dev(journal_devnum); } else journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev)); @@ -3054,24 +3128,23 @@ static int ext4_load_journal(struct super_block *sb, * crash? For recovery, we need to check in advance whether we * can get read-write access to the device. */ - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { if (sb->s_flags & MS_RDONLY) { - printk(KERN_INFO "EXT4-fs: INFO: recovery " - "required on readonly filesystem.\n"); + ext4_msg(sb, KERN_INFO, "INFO: recovery " + "required on readonly filesystem"); if (really_read_only) { - printk(KERN_ERR "EXT4-fs: write access " - "unavailable, cannot proceed.\n"); + ext4_msg(sb, KERN_ERR, "write access " + "unavailable, cannot proceed"); return -EROFS; } - printk(KERN_INFO "EXT4-fs: write access will " - "be enabled during recovery.\n"); + ext4_msg(sb, KERN_INFO, "write access will " + "be enabled during recovery"); } } if (journal_inum && journal_dev) { - printk(KERN_ERR "EXT4-fs: filesystem has both journal " - "and inode journals!\n"); + ext4_msg(sb, KERN_ERR, "filesystem has both journal " + "and inode journals!"); return -EINVAL; } @@ -3084,14 +3157,14 @@ static int ext4_load_journal(struct super_block *sb, } if (journal->j_flags & JBD2_BARRIER) - printk(KERN_INFO "EXT4-fs: barriers enabled\n"); + ext4_msg(sb, KERN_INFO, "barriers enabled"); else - printk(KERN_INFO "EXT4-fs: barriers disabled\n"); + ext4_msg(sb, KERN_INFO, "barriers disabled"); if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = jbd2_journal_update_format(journal); if (err) { - printk(KERN_ERR "EXT4-fs: error updating journal.\n"); + ext4_msg(sb, KERN_ERR, "error updating journal"); jbd2_journal_destroy(journal); return err; } @@ -3103,7 +3176,7 @@ static int ext4_load_journal(struct super_block *sb, err = jbd2_journal_load(journal); if (err) { - printk(KERN_ERR "EXT4-fs: error loading journal.\n"); + ext4_msg(sb, KERN_ERR, "error loading journal"); jbd2_journal_destroy(journal); return err; } @@ -3114,18 +3187,17 @@ static int ext4_load_journal(struct super_block *sb, if (journal_devnum && journal_devnum != le32_to_cpu(es->s_journal_dev)) { es->s_journal_dev = cpu_to_le32(journal_devnum); - sb->s_dirt = 1; /* Make sure we flush the recovery flag to disk. */ - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } return 0; } -static int ext4_commit_super(struct super_block *sb, - struct ext4_super_block *es, int sync) +static int ext4_commit_super(struct super_block *sb, int sync) { + struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; int error = 0; @@ -3140,8 +3212,8 @@ static int ext4_commit_super(struct super_block *sb, * be remapped. Nothing we can do but to retry the * write and hope for the best. */ - printk(KERN_ERR "EXT4-fs: previous I/O error to " - "superblock detected for %s.\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "previous I/O error to " + "superblock detected"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } @@ -3154,7 +3226,7 @@ static int ext4_commit_super(struct super_block *sb, &EXT4_SB(sb)->s_freeblocks_counter)); es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive( &EXT4_SB(sb)->s_freeinodes_counter)); - + sb->s_dirt = 0; BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); if (sync) { @@ -3164,8 +3236,8 @@ static int ext4_commit_super(struct super_block *sb, error = buffer_write_io_error(sbh); if (error) { - printk(KERN_ERR "EXT4-fs: I/O error while writing " - "superblock for %s.\n", sb->s_id); + ext4_msg(sb, KERN_ERR, "I/O error while writing " + "superblock"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } @@ -3173,7 +3245,6 @@ static int ext4_commit_super(struct super_block *sb, return error; } - /* * Have we just finished recovery? If so, and if we are mounting (or * remounting) the filesystem readonly, then we will end up with a @@ -3192,14 +3263,11 @@ static void ext4_mark_recovery_complete(struct super_block *sb, if (jbd2_journal_flush(journal) < 0) goto out; - lock_super(sb); if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - sb->s_dirt = 0; - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); } - unlock_super(sb); out: jbd2_journal_unlock_updates(journal); @@ -3238,7 +3306,7 @@ static void ext4_clear_journal_err(struct super_block *sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); jbd2_journal_clear_err(journal); } @@ -3257,29 +3325,15 @@ int ext4_force_commit(struct super_block *sb) return 0; journal = EXT4_SB(sb)->s_journal; - if (journal) { - sb->s_dirt = 0; + if (journal) ret = ext4_journal_force_commit(journal); - } return ret; } -/* - * Ext4 always journals updates to the superblock itself, so we don't - * have to propagate any other updates to the superblock on disk at this - * point. (We can probably nuke this function altogether, and remove - * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...) - */ static void ext4_write_super(struct super_block *sb) { - if (EXT4_SB(sb)->s_journal) { - if (mutex_trylock(&sb->s_lock) != 0) - BUG(); - sb->s_dirt = 0; - } else { - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - } + ext4_commit_super(sb, 1); } static int ext4_sync_fs(struct super_block *sb, int wait) @@ -3288,16 +3342,9 @@ static int ext4_sync_fs(struct super_block *sb, int wait) tid_t target; trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); - sb->s_dirt = 0; - if (EXT4_SB(sb)->s_journal) { - if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, - &target)) { - if (wait) - jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, - target); - } - } else { - ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait); + if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { + if (wait) + jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); } return ret; } @@ -3310,34 +3357,32 @@ static int ext4_freeze(struct super_block *sb) { int error = 0; journal_t *journal; - sb->s_dirt = 0; - if (!(sb->s_flags & MS_RDONLY)) { - journal = EXT4_SB(sb)->s_journal; + if (sb->s_flags & MS_RDONLY) + return 0; - if (journal) { - /* Now we set up the journal barrier. */ - jbd2_journal_lock_updates(journal); + journal = EXT4_SB(sb)->s_journal; - /* - * We don't want to clear needs_recovery flag when we - * failed to flush the journal. - */ - error = jbd2_journal_flush(journal); - if (error < 0) - goto out; - } + /* Now we set up the journal barrier. */ + jbd2_journal_lock_updates(journal); - /* Journal blocked and flushed, clear needs_recovery flag. */ - EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - if (error) - goto out; + /* + * Don't clear the needs_recovery flag if we failed to flush + * the journal. + */ + error = jbd2_journal_flush(journal); + if (error < 0) { + out: + jbd2_journal_unlock_updates(journal); + return error; } + + /* Journal blocked and flushed, clear needs_recovery flag. */ + EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + error = ext4_commit_super(sb, 1); + if (error) + goto out; return 0; -out: - jbd2_journal_unlock_updates(journal); - return error; } /* @@ -3346,14 +3391,15 @@ out: */ static int ext4_unfreeze(struct super_block *sb) { - if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) { - lock_super(sb); - /* Reser the needs_recovery flag before the fs is unlocked. */ - EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); - ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1); - unlock_super(sb); - jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); - } + if (sb->s_flags & MS_RDONLY) + return 0; + + lock_super(sb); + /* Reset the needs_recovery flag before the fs is unlocked. */ + EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); + ext4_commit_super(sb, 1); + unlock_super(sb); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); return 0; } @@ -3432,22 +3478,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) (sbi->s_mount_state & EXT4_VALID_FS)) es->s_state = cpu_to_le16(sbi->s_mount_state); - /* - * We have to unlock super so that we can wait for - * transactions. - */ - if (sbi->s_journal) { - unlock_super(sb); + if (sbi->s_journal) ext4_mark_recovery_complete(sb, es); - lock_super(sb); - } } else { int ret; if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP))) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " + ext4_msg(sb, KERN_WARNING, "couldn't " "remount RDWR because of unsupported " - "optional features (%x).\n", sb->s_id, + "optional features (%x)", (le32_to_cpu(sbi->s_es->s_feature_ro_compat) & ~EXT4_FEATURE_RO_COMPAT_SUPP)); err = -EROFS; @@ -3456,17 +3495,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) /* * Make sure the group descriptor checksums - * are sane. If they aren't, refuse to - * remount r/w. + * are sane. If they aren't, refuse to remount r/w. */ for (g = 0; g < sbi->s_groups_count; g++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, g, NULL); if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { - printk(KERN_ERR - "EXT4-fs: ext4_remount: " - "Checksum for group %u failed (%u!=%u)\n", + ext4_msg(sb, KERN_ERR, + "ext4_remount: Checksum for group %u failed (%u!=%u)", g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), le16_to_cpu(gdp->bg_checksum)); err = -EINVAL; @@ -3480,11 +3517,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) * require a full umount/remount for now. */ if (es->s_last_orphan) { - printk(KERN_WARNING "EXT4-fs: %s: couldn't " + ext4_msg(sb, KERN_WARNING, "Couldn't " "remount RDWR because of unprocessed " "orphan inode list. Please " - "umount/remount instead.\n", - sb->s_id); + "umount/remount instead"); err = -EINVAL; goto restore_opts; } @@ -3504,8 +3540,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~MS_RDONLY; } } + ext4_setup_system_zone(sb); if (sbi->s_journal == NULL) - ext4_commit_super(sb, es, 1); + ext4_commit_super(sb, 1); #ifdef CONFIG_QUOTA /* Release old quota file names */ @@ -3515,6 +3552,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) kfree(old_opts.s_qf_names[i]); #endif return 0; + restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.s_mount_opt; @@ -3545,9 +3583,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) if (test_opt(sb, MINIX_DF)) { sbi->s_overhead_last = 0; } else if (sbi->s_blocks_last != ext4_blocks_count(es)) { - ext4_group_t ngroups = sbi->s_groups_count, i; + ext4_group_t i, ngroups = ext4_get_groups_count(sb); ext4_fsblk_t overhead = 0; - smp_rmb(); /* * Compute the overhead (FS structures). This is constant @@ -3599,11 +3636,12 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) le64_to_cpup((void *)es->s_uuid + sizeof(u64)); buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL; buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL; + return 0; } -/* Helper function for writing quotas on sync - we need to start transaction before quota file - * is locked for write. Otherwise the are possible deadlocks: +/* Helper function for writing quotas on sync - we need to start transaction + * before quota file is locked for write. Otherwise the are possible deadlocks: * Process 1 Process 2 * ext4_create() quota_sync() * jbd2_journal_start() write_dquot() @@ -3627,7 +3665,7 @@ static int ext4_write_dquot(struct dquot *dquot) inode = dquot_to_inode(dquot); handle = ext4_journal_start(inode, - EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit(dquot); @@ -3643,7 +3681,7 @@ static int ext4_acquire_dquot(struct dquot *dquot) handle_t *handle; handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_acquire(dquot); @@ -3659,7 +3697,7 @@ static int ext4_release_dquot(struct dquot *dquot) handle_t *handle; handle = ext4_journal_start(dquot_to_inode(dquot), - EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); + EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); if (IS_ERR(handle)) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); @@ -3707,7 +3745,7 @@ static int ext4_write_info(struct super_block *sb, int type) static int ext4_quota_on_mount(struct super_block *sb, int type) { return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type], - EXT4_SB(sb)->s_jquota_fmt, type); + EXT4_SB(sb)->s_jquota_fmt, type); } /* @@ -3738,9 +3776,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ if (path.dentry->d_parent != sb->s_root) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journaled quota will not work.\n"); + ext4_msg(sb, KERN_WARNING, + "Quota file not on filesystem root. " + "Journaled quota will not work"); } /* @@ -3823,8 +3861,8 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, handle_t *handle = journal_current_handle(); if (EXT4_SB(sb)->s_journal && !handle) { - printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" - " cancelled because transaction is not started.\n", + ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" + " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); return -EIO; } @@ -3878,10 +3916,10 @@ out: #endif -static int ext4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static int ext4_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } static struct file_system_type ext4_fs_type = { @@ -3893,14 +3931,14 @@ static struct file_system_type ext4_fs_type = { }; #ifdef CONFIG_EXT4DEV_COMPAT -static int ext4dev_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static int ext4dev_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data,struct vfsmount *mnt) { - printk(KERN_WARNING "EXT4-fs: Update your userspace programs " - "to mount using ext4\n"); - printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " - "will go away by 2.6.31\n"); - return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); + printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs " + "to mount using ext4\n", dev_name); + printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility " + "will go away by 2.6.31\n", dev_name); + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); } static struct file_system_type ext4dev_fs_type = { @@ -3917,13 +3955,16 @@ static int __init init_ext4_fs(void) { int err; + err = init_ext4_system_zone(); + if (err) + return err; ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); if (!ext4_kset) - return -ENOMEM; + goto out4; ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = init_ext4_mballoc(); if (err) - return err; + goto out3; err = init_ext4_xattr(); if (err) @@ -3948,6 +3989,11 @@ out1: exit_ext4_xattr(); out2: exit_ext4_mballoc(); +out3: + remove_proc_entry("fs/ext4", NULL); + kset_unregister(ext4_kset); +out4: + exit_ext4_system_zone(); return err; } @@ -3962,6 +4008,7 @@ static void __exit exit_ext4_fs(void) exit_ext4_mballoc(); remove_proc_entry("fs/ext4", NULL); kset_unregister(ext4_kset); + exit_ext4_system_zone(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c1462d43e72..941c8425c10 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -30,6 +30,7 @@ #include <linux/dnotify.h> #include <linux/statfs.h> #include <linux/security.h> +#include <linux/ima.h> #include <asm/uaccess.h> @@ -986,6 +987,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) &hugetlbfs_file_operations); if (!file) goto out_dentry; /* inode is already attached */ + ima_counts_get(file); return file; diff --git a/fs/ioctl.c b/fs/ioctl.c index 82d9c42b8ba..286f38dfc6c 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -414,10 +414,6 @@ static int file_ioctl(struct file *filp, unsigned int cmd, switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); - case FS_IOC_FIEMAP: - return ioctl_fiemap(filp, arg); - case FIGETBSZ: - return put_user(inode->i_sb->s_blocksize, p); case FIONREAD: return put_user(i_size_read(inode) - filp->f_pos, p); } @@ -557,6 +553,16 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, error = ioctl_fsthaw(filp); break; + case FS_IOC_FIEMAP: + return ioctl_fiemap(filp, arg); + + case FIGETBSZ: + { + struct inode *inode = filp->f_path.dentry->d_inode; + int __user *p = (int __user *)arg; + return put_user(inode->i_sb->s_blocksize, p); + } + default: if (S_ISREG(filp->f_path.dentry->d_inode->i_mode)) error = file_ioctl(filp, cmd, arg); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 58144102bf2..62be7d294ec 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1781,7 +1781,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) * Journal abort has very specific semantics, which we describe * for journal abort. * - * Two internal function, which provide abort to te jbd layer + * Two internal functions, which provide abort to the jbd layer * itself are here. */ @@ -1879,7 +1879,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) * int jbd2_journal_errno () - returns the journal's error state. * @journal: journal to examine. * - * This is the errno numbet set with jbd2_journal_abort(), the last + * This is the errno number set with jbd2_journal_abort(), the last * time the journal was mounted - if the journal was stopped * without calling abort this will be 0. * @@ -1903,7 +1903,7 @@ int jbd2_journal_errno(journal_t *journal) * int jbd2_journal_clear_err () - clears the journal's error state * @journal: journal to act on. * - * An error must be cleared or Acked to take a FS out of readonly + * An error must be cleared or acked to take a FS out of readonly * mode. */ int jbd2_journal_clear_err(journal_t *journal) @@ -1923,7 +1923,7 @@ int jbd2_journal_clear_err(journal_t *journal) * void jbd2_journal_ack_err() - Ack journal err. * @journal: journal to act on. * - * An error must be cleared or Acked to take a FS out of readonly + * An error must be cleared or acked to take a FS out of readonly * mode. */ void jbd2_journal_ack_err(journal_t *journal) diff --git a/fs/mpage.c b/fs/mpage.c index 680ba60863f..42381bd6543 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -379,7 +379,8 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, struct buffer_head map_bh; unsigned long first_logical_block = 0; - clear_buffer_mapped(&map_bh); + map_bh.b_state = 0; + map_bh.b_size = 0; for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page = list_entry(pages->prev, struct page, lru); @@ -412,7 +413,8 @@ int mpage_readpage(struct page *page, get_block_t get_block) struct buffer_head map_bh; unsigned long first_logical_block = 0; - clear_buffer_mapped(&map_bh); + map_bh.b_state = 0; + map_bh.b_size = 0; bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, &map_bh, &first_logical_block, get_block); if (bio) diff --git a/fs/namei.c b/fs/namei.c index 967c3db9272..c82805d088e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -853,7 +853,8 @@ static int __link_path_walk(const char *name, struct nameidata *nd) err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); if (!err) - err = ima_path_check(&nd->path, MAY_EXEC); + err = ima_path_check(&nd->path, MAY_EXEC, + IMA_COUNT_UPDATE); if (err) break; @@ -1515,7 +1516,8 @@ int may_open(struct path *path, int acc_mode, int flag) return error; error = ima_path_check(path, - acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC)); + acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC), + IMA_COUNT_UPDATE); if (error) return error; /* diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b660435978d..bd584bcf1d9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -55,6 +55,7 @@ #include <linux/security.h> #endif /* CONFIG_NFSD_V4 */ #include <linux/jhash.h> +#include <linux/ima.h> #include <asm/uaccess.h> @@ -735,6 +736,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, flags, cred); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); + else + ima_counts_get(*filp); out_nfserr: err = nfserrno(host_err); out: @@ -2024,6 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct dentry *dentry, int acc) { struct inode *inode = dentry->d_inode; + struct path path; int err; if (acc == NFSD_MAY_NOP) @@ -2096,7 +2100,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, if (err == -EACCES && S_ISREG(inode->i_mode) && acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) err = inode_permission(inode, MAY_EXEC); + if (err) + goto nfsd_out; + /* Do integrity (permission) checking now, but defer incrementing + * IMA counts to the actual file open. + */ + path.mnt = exp->ex_path.mnt; + path.dentry = dentry; + err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC), + IMA_COUNT_LEAVE); +nfsd_out: return err? nfserrno(err) : 0; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 3326bbf9ab9..1539e630c47 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2128,9 +2128,15 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, if (copy_from_user(page, buf, count)) goto out_free; + /* Guard against adverse ptrace interaction */ + length = mutex_lock_interruptible(&task->cred_guard_mutex); + if (length < 0) + goto out_free; + length = security_setprocattr(task, (char*)file->f_path.dentry->d_name.name, (void*)page, count); + mutex_unlock(&task->cred_guard_mutex); out_free: free_page((unsigned long) page); out: diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 788850ba4e7..1fbdea4f08e 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL { #ifndef DP_WINDOW_SIZE -/* #include "cyclomz.h" */ -/****************** ****************** *******************/ -/* - * The data types defined below are used in all ZFIRM interface - * data structures. They accomodate differences between HW - * architectures and compilers. - */ - -typedef __u64 ucdouble; /* 64 bits, unsigned */ -typedef __u32 uclong; /* 32 bits, unsigned */ -typedef __u16 ucshort; /* 16 bits, unsigned */ -typedef __u8 ucchar; /* 8 bits, unsigned */ - /* * Memory Window Sizes */ @@ -507,16 +494,20 @@ struct ZFW_CTRL { /* Per card data structure */ struct cyclades_card { - void __iomem *base_addr; - void __iomem *ctl_addr; - int irq; - unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ - unsigned int first_line; /* minor number of first channel on card */ - unsigned int nports; /* Number of ports in the card */ - int bus_index; /* address shift - 0 for ISA, 1 for PCI */ - int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ - spinlock_t card_lock; - struct cyclades_port *ports; + void __iomem *base_addr; + union { + void __iomem *p9050; + struct RUNTIME_9060 __iomem *p9060; + } ctl_addr; + int irq; + unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */ + unsigned int first_line; /* minor number of first channel on card */ + unsigned int nports; /* Number of ports in the card */ + int bus_index; /* address shift - 0 for ISA, 1 for PCI */ + int intr_enabled; /* FW Interrupt flag - 0 disabled, 1 enabled */ + u32 hw_ver; + spinlock_t card_lock; + struct cyclades_port *ports; }; /*************************************** diff --git a/include/linux/ide.h b/include/linux/ide.h index 9fed365a598..867cb68d846 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -26,6 +26,9 @@ #include <asm/io.h> #include <asm/mutex.h> +/* for request_sense */ +#include <linux/cdrom.h> + #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300) # define SUPPORT_VLB_SYNC 0 #else @@ -324,7 +327,6 @@ struct ide_cmd { unsigned int cursg_ofs; struct request *rq; /* copy of request */ - void *special; /* valid_t generally */ }; /* ATAPI packet command flags */ @@ -360,11 +362,7 @@ struct ide_atapi_pc { /* data buffer */ u8 *buf; - /* current buffer position */ - u8 *cur_pos; int buf_size; - /* missing/available data on the current buffer */ - int b_count; /* the corresponding request */ struct request *rq; @@ -377,10 +375,6 @@ struct ide_atapi_pc { */ u8 pc_buf[IDE_PC_BUFFER_SIZE]; - /* idetape only */ - struct idetape_bh *bh; - char *b_data; - unsigned long timeout; }; @@ -593,16 +587,16 @@ struct ide_drive_s { /* callback for packet commands */ int (*pc_callback)(struct ide_drive_s *, int); - void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *); - int (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *, - unsigned int, int); - ide_startstop_t (*irq_handler)(struct ide_drive_s *); unsigned long atapi_flags; struct ide_atapi_pc request_sense_pc; - struct request request_sense_rq; + + /* current sense rq and buffer */ + bool sense_rq_armed; + struct request sense_rq; + struct request_sense sense_data; }; typedef struct ide_drive_s ide_drive_t; @@ -1174,7 +1168,10 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *); -void ide_retry_pc(ide_drive_t *, struct gendisk *); +void ide_retry_pc(ide_drive_t *drive); + +void ide_prep_sense(ide_drive_t *drive, struct request *rq); +int ide_queue_sense_rq(ide_drive_t *drive, void *special); int ide_cd_expiry(ide_drive_t *); diff --git a/include/linux/ima.h b/include/linux/ima.h index 0e2aa45cb0c..b1b827d091a 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -13,14 +13,17 @@ #include <linux/fs.h> struct linux_binprm; +#define IMA_COUNT_UPDATE 1 +#define IMA_COUNT_LEAVE 0 + #ifdef CONFIG_IMA extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct path *path, int mask, int update_counts); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); -extern void ima_shm_check(struct file *file); +extern void ima_counts_get(struct file *file); #else static inline int ima_bprm_check(struct linux_binprm *bprm) @@ -38,7 +41,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct path *path, int mask, int update_counts) { return 0; } @@ -53,7 +56,7 @@ static inline int ima_file_mmap(struct file *file, unsigned long prot) return 0; } -static inline void ima_shm_check(struct file *file) +static inline void ima_counts_get(struct file *file) { return; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 889bf99eca6..6646bfc7b89 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -145,8 +145,8 @@ extern struct cred init_cred; .group_leader = &tsk, \ .real_cred = &init_cred, \ .cred = &init_cred, \ - .cred_exec_mutex = \ - __MUTEX_INITIALIZER(tsk.cred_exec_mutex), \ + .cred_guard_mutex = \ + __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 8cc137911b3..3db5d8d3748 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -119,7 +119,7 @@ struct kvm_run { __u32 error_code; } ex; /* KVM_EXIT_IO */ - struct kvm_io { + struct { #define KVM_EXIT_IO_IN 0 #define KVM_EXIT_IO_OUT 1 __u8 direction; @@ -224,10 +224,10 @@ struct kvm_interrupt { /* for KVM_GET_DIRTY_LOG */ struct kvm_dirty_log { __u32 slot; - __u32 padding; + __u32 padding1; union { void __user *dirty_bitmap; /* one bit per page */ - __u64 padding; + __u64 padding2; }; }; @@ -409,6 +409,10 @@ struct kvm_trace_rec { #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 #endif +#ifdef __KVM_HAVE_MSIX +#define KVM_CAP_DEVICE_MSIX 28 +#endif +#define KVM_CAP_ASSIGN_DEV_IRQ 29 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */ #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30 @@ -482,11 +486,18 @@ struct kvm_irq_routing { #define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \ struct kvm_assigned_pci_dev) #define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing) +/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */ #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq) #define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71) #define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \ struct kvm_assigned_pci_dev) +#define KVM_ASSIGN_SET_MSIX_NR \ + _IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr) +#define KVM_ASSIGN_SET_MSIX_ENTRY \ + _IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry) +#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq) /* * ioctls for vcpu fds @@ -577,6 +588,8 @@ struct kvm_debug_guest { #define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18) #define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19) +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + struct kvm_assigned_pci_dev { __u32 assigned_dev_id; __u32 busnr; @@ -587,6 +600,17 @@ struct kvm_assigned_pci_dev { }; }; +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +#define KVM_DEV_IRQ_HOST_MASK 0x00ff +#define KVM_DEV_IRQ_GUEST_MASK 0xff00 + struct kvm_assigned_irq { __u32 assigned_dev_id; __u32 host_irq; @@ -602,9 +626,19 @@ struct kvm_assigned_irq { }; }; -#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) -#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI -#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0) +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 512 +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 894a56e365e..aacc5449f58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -38,6 +38,7 @@ #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_KVMCLOCK_UPDATE 8 +#define KVM_REQ_KICK 9 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 @@ -72,7 +73,6 @@ struct kvm_vcpu { struct mutex mutex; int cpu; struct kvm_run *run; - int guest_mode; unsigned long requests; unsigned long guest_debug; int fpu_active; @@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void); void kvm_arch_hardware_unsetup(void); void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); +int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); void kvm_free_physmem(struct kvm *kvm); @@ -319,6 +320,13 @@ struct kvm_irq_ack_notifier { void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; +#define KVM_ASSIGNED_MSIX_PENDING 0x1 +struct kvm_guest_msix_entry { + u32 vector; + u16 entry; + u16 flags; +}; + struct kvm_assigned_dev_kernel { struct kvm_irq_ack_notifier ack_notifier; struct work_struct interrupt_work; @@ -326,18 +334,18 @@ struct kvm_assigned_dev_kernel { int assigned_dev_id; int host_busnr; int host_devfn; + unsigned int entries_nr; int host_irq; bool host_irq_disabled; + struct msix_entry *host_msix_entries; int guest_irq; -#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0) -#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1) -#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8) -#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9) + struct kvm_guest_msix_entry *guest_msix_entries; unsigned long irq_requested_type; int irq_source_id; int flags; struct pci_dev *dev; struct kvm *kvm; + spinlock_t assigned_dev_lock; }; struct kvm_irq_mask_notifier { @@ -360,6 +368,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); +/* For vcpu->arch.iommu_flags */ +#define KVM_IOMMU_CACHE_COHERENCY 0x1 + #ifdef CONFIG_IOMMU_API int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index 2b8318c83e5..fb46efbeabe 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -40,4 +40,31 @@ typedef unsigned long hfn_t; typedef hfn_t pfn_t; +union kvm_ioapic_redirect_entry { + u64 bits; + struct { + u8 vector; + u8 delivery_mode:3; + u8 dest_mode:1; + u8 delivery_status:1; + u8 polarity:1; + u8 remote_irr:1; + u8 trig_mode:1; + u8 mask:1; + u8 reserve:7; + u8 reserved[4]; + u8 dest_id; + } fields; +}; + +struct kvm_lapic_irq { + u32 vector; + u32 delivery_mode; + u32 dest_mode; + u32 level; + u32 trig_mode; + u32 shorthand; + u32 dest_id; +}; + #endif /* __KVM_TYPES_H__ */ diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h new file mode 100644 index 00000000000..e461b2c3d71 --- /dev/null +++ b/include/linux/lsm_audit.h @@ -0,0 +1,111 @@ +/* + * Common LSM logging functions + * Heavily borrowed from selinux/avc.h + * + * Author : Etienne BASSET <etienne.basset@ensta.org> + * + * All credits to : Stephen Smalley, <sds@epoch.ncsc.mil> + * All BUGS to : Etienne BASSET <etienne.basset@ensta.org> + */ +#ifndef _LSM_COMMON_LOGGING_ +#define _LSM_COMMON_LOGGING_ + +#include <linux/stddef.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/kdev_t.h> +#include <linux/spinlock.h> +#include <linux/init.h> +#include <linux/audit.h> +#include <linux/in6.h> +#include <linux/path.h> +#include <linux/key.h> +#include <linux/skbuff.h> +#include <asm/system.h> + + +/* Auxiliary data to use in generating the audit record. */ +struct common_audit_data { + char type; +#define LSM_AUDIT_DATA_FS 1 +#define LSM_AUDIT_DATA_NET 2 +#define LSM_AUDIT_DATA_CAP 3 +#define LSM_AUDIT_DATA_IPC 4 +#define LSM_AUDIT_DATA_TASK 5 +#define LSM_AUDIT_DATA_KEY 6 + struct task_struct *tsk; + union { + struct { + struct path path; + struct inode *inode; + } fs; + struct { + int netif; + struct sock *sk; + u16 family; + __be16 dport; + __be16 sport; + union { + struct { + __be32 daddr; + __be32 saddr; + } v4; + struct { + struct in6_addr daddr; + struct in6_addr saddr; + } v6; + } fam; + } net; + int cap; + int ipc_id; + struct task_struct *tsk; +#ifdef CONFIG_KEYS + struct { + key_serial_t key; + char *key_desc; + } key_struct; +#endif + } u; + const char *function; + /* this union contains LSM specific data */ + union { + /* SMACK data */ + struct smack_audit_data { + char *subject; + char *object; + char *request; + int result; + } smack_audit_data; + /* SELinux data */ + struct { + u32 ssid; + u32 tsid; + u16 tclass; + u32 requested; + u32 audited; + struct av_decision *avd; + int result; + } selinux_audit_data; + } lsm_priv; + /* these callback will be implemented by a specific LSM */ + void (*lsm_pre_audit)(struct audit_buffer *, void *); + void (*lsm_post_audit)(struct audit_buffer *, void *); +}; + +#define v4info fam.v4 +#define v6info fam.v6 + +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto); + +/* Initialize an LSM audit data structure. */ +#define COMMON_AUDIT_DATA_INIT(_d, _t) \ + { memset((_d), 0, sizeof(struct common_audit_data)); \ + (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; } + +void common_lsm_audit(struct common_audit_data *a); + +#endif diff --git a/include/linux/magic.h b/include/linux/magic.h index 5b4e28bcb78..927138cf305 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -9,6 +9,7 @@ #define DEBUGFS_MAGIC 0x64626720 #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 +#define SELINUX_MAGIC 0xf97cff8c #define TMPFS_MAGIC 0x01021994 #define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 diff --git a/include/linux/mm.h b/include/linux/mm.h index 9772d6cbfc8..ad613ed66ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -581,12 +581,10 @@ static inline void set_page_links(struct page *page, enum zone_type zone, */ static inline unsigned long round_hint_to_min(unsigned long hint) { -#ifdef CONFIG_SECURITY hint &= PAGE_MASK; if (((void *)hint != NULL) && (hint < mmap_min_addr)) return PAGE_ALIGN(mmap_min_addr); -#endif return hint; } diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 0f71812d67d..d7d1c41a0b1 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1996,10 +1996,12 @@ #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U 0xC118 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU 0xC11C #define PCI_DEVICE_ID_OXSEMI_16PCI954 0x9501 +#define PCI_DEVICE_ID_OXSEMI_C950 0x950B #define PCI_DEVICE_ID_OXSEMI_16PCI95N 0x9511 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP 0x9513 #define PCI_DEVICE_ID_OXSEMI_16PCI952 0x9521 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP 0x9523 +#define PCI_SUBDEVICE_ID_OXSEMI_C950 0x0001 #define PCI_VENDOR_ID_CHELSIO 0x1425 diff --git a/include/linux/rational.h b/include/linux/rational.h new file mode 100644 index 00000000000..4f532fcd9ee --- /dev/null +++ b/include/linux/rational.h @@ -0,0 +1,19 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * + * helper functions when coping with rational numbers, + * e.g. when calculating optimum numerator/denominator pairs for + * pll configuration taking into account restricted register size + */ + +#ifndef _LINUX_RATIONAL_H +#define _LINUX_RATIONAL_H + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator); + +#endif /* _LINUX_RATIONAL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d1399660b77..42bf2766111 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1261,7 +1261,9 @@ struct task_struct { * credentials (COW) */ const struct cred *cred; /* effective (overridable) subjective task * credentials (COW) */ - struct mutex cred_exec_mutex; /* execve vs ptrace cred calculation mutex */ + struct mutex cred_guard_mutex; /* guard against foreign influences on + * credential calculations + * (notably. ptrace) */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock @@ -1901,6 +1903,7 @@ extern void sched_dead(struct task_struct *p); extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); +extern void __flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); diff --git a/include/linux/security.h b/include/linux/security.h index d5fd6163606..5eff459b383 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2197,6 +2197,8 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { + if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) + return -EACCES; return 0; } diff --git a/include/linux/serial.h b/include/linux/serial.h index 9136cc5608c..e5bb75a6380 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -96,54 +96,76 @@ struct serial_uart_config { /* * Definitions for async_struct (and serial_struct) flags field + * + * Define ASYNCB_* for convenient use with {test,set,clear}_bit. */ -#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes - on the callout port */ -#define ASYNC_FOURPORT 0x0002 /* Set OU1, OUT2 per AST Fourport settings */ -#define ASYNC_SAK 0x0004 /* Secure Attention Key (Orange book) */ -#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */ - -#define ASYNC_SPD_MASK 0x1030 -#define ASYNC_SPD_HI 0x0010 /* Use 56000 instead of 38400 bps */ - -#define ASYNC_SPD_VHI 0x0020 /* Use 115200 instead of 38400 bps */ -#define ASYNC_SPD_CUST 0x0030 /* Use user-specified divisor */ - -#define ASYNC_SKIP_TEST 0x0040 /* Skip UART test during autoconfiguration */ -#define ASYNC_AUTO_IRQ 0x0080 /* Do automatic IRQ during autoconfiguration */ -#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */ -#define ASYNC_PGRP_LOCKOUT 0x0200 /* Lock out cua opens based on pgrp */ -#define ASYNC_CALLOUT_NOHUP 0x0400 /* Don't do hangups for cua device */ - -#define ASYNC_HARDPPS_CD 0x0800 /* Call hardpps when CD goes high */ - -#define ASYNC_SPD_SHI 0x1000 /* Use 230400 instead of 38400 bps */ -#define ASYNC_SPD_WARP 0x1010 /* Use 460800 instead of 38400 bps */ - -#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */ - -#define ASYNC_BUGGY_UART 0x4000 /* This is a buggy UART, skip some safety - * checks. Note: can be dangerous! */ - -#define ASYNC_AUTOPROBE 0x8000 /* Port was autoprobed by PCI or PNP code */ - -#define ASYNC_FLAGS 0x7FFF /* Possible legal async flags */ -#define ASYNC_USR_MASK 0x3430 /* Legal flags that non-privileged - * users can set or reset */ - -/* Internal flags used only by kernel/chr_drv/serial.c */ -#define ASYNC_INITIALIZED 0x80000000 /* Serial port was initialized */ -#define ASYNC_NORMAL_ACTIVE 0x20000000 /* Normal device is active */ -#define ASYNC_BOOT_AUTOCONF 0x10000000 /* Autoconfigure port on bootup */ -#define ASYNC_CLOSING 0x08000000 /* Serial port is closing */ -#define ASYNC_CTS_FLOW 0x04000000 /* Do CTS flow control */ -#define ASYNC_CHECK_CD 0x02000000 /* i.e., CLOCAL */ -#define ASYNC_SHARE_IRQ 0x01000000 /* for multifunction cards - --- no longer used */ -#define ASYNC_CONS_FLOW 0x00800000 /* flow control for console */ - -#define ASYNC_BOOT_ONLYMCA 0x00400000 /* Probe only if MCA bus */ -#define ASYNC_INTERNAL_FLAGS 0xFFC00000 /* Internal flags */ +#define ASYNCB_HUP_NOTIFY 0 /* Notify getty on hangups and closes + * on the callout port */ +#define ASYNCB_FOURPORT 1 /* Set OU1, OUT2 per AST Fourport settings */ +#define ASYNCB_SAK 2 /* Secure Attention Key (Orange book) */ +#define ASYNCB_SPLIT_TERMIOS 3 /* Separate termios for dialin/callout */ +#define ASYNCB_SPD_HI 4 /* Use 56000 instead of 38400 bps */ +#define ASYNCB_SPD_VHI 5 /* Use 115200 instead of 38400 bps */ +#define ASYNCB_SKIP_TEST 6 /* Skip UART test during autoconfiguration */ +#define ASYNCB_AUTO_IRQ 7 /* Do automatic IRQ during + * autoconfiguration */ +#define ASYNCB_SESSION_LOCKOUT 8 /* Lock out cua opens based on session */ +#define ASYNCB_PGRP_LOCKOUT 9 /* Lock out cua opens based on pgrp */ +#define ASYNCB_CALLOUT_NOHUP 10 /* Don't do hangups for cua device */ +#define ASYNCB_HARDPPS_CD 11 /* Call hardpps when CD goes high */ +#define ASYNCB_SPD_SHI 12 /* Use 230400 instead of 38400 bps */ +#define ASYNCB_LOW_LATENCY 13 /* Request low latency behaviour */ +#define ASYNCB_BUGGY_UART 14 /* This is a buggy UART, skip some safety + * checks. Note: can be dangerous! */ +#define ASYNCB_AUTOPROBE 15 /* Port was autoprobed by PCI or PNP code */ +#define ASYNCB_LAST_USER 15 + +/* Internal flags used only by kernel */ +#define ASYNCB_INITIALIZED 31 /* Serial port was initialized */ +#define ASYNCB_NORMAL_ACTIVE 29 /* Normal device is active */ +#define ASYNCB_BOOT_AUTOCONF 28 /* Autoconfigure port on bootup */ +#define ASYNCB_CLOSING 27 /* Serial port is closing */ +#define ASYNCB_CTS_FLOW 26 /* Do CTS flow control */ +#define ASYNCB_CHECK_CD 25 /* i.e., CLOCAL */ +#define ASYNCB_SHARE_IRQ 24 /* for multifunction cards, no longer used */ +#define ASYNCB_CONS_FLOW 23 /* flow control for console */ +#define ASYNCB_BOOT_ONLYMCA 22 /* Probe only if MCA bus */ +#define ASYNCB_FIRST_KERNEL 22 + +#define ASYNC_HUP_NOTIFY (1U << ASYNCB_HUP_NOTIFY) +#define ASYNC_FOURPORT (1U << ASYNCB_FOURPORT) +#define ASYNC_SAK (1U << ASYNCB_SAK) +#define ASYNC_SPLIT_TERMIOS (1U << ASYNCB_SPLIT_TERMIOS) +#define ASYNC_SPD_HI (1U << ASYNCB_SPD_HI) +#define ASYNC_SPD_VHI (1U << ASYNCB_SPD_VHI) +#define ASYNC_SKIP_TEST (1U << ASYNCB_SKIP_TEST) +#define ASYNC_AUTO_IRQ (1U << ASYNCB_AUTO_IRQ) +#define ASYNC_SESSION_LOCKOUT (1U << ASYNCB_SESSION_LOCKOUT) +#define ASYNC_PGRP_LOCKOUT (1U << ASYNCB_PGRP_LOCKOUT) +#define ASYNC_CALLOUT_NOHUP (1U << ASYNCB_CALLOUT_NOHUP) +#define ASYNC_HARDPPS_CD (1U << ASYNCB_HARDPPS_CD) +#define ASYNC_SPD_SHI (1U << ASYNCB_SPD_SHI) +#define ASYNC_LOW_LATENCY (1U << ASYNCB_LOW_LATENCY) +#define ASYNC_BUGGY_UART (1U << ASYNCB_BUGGY_UART) +#define ASYNC_AUTOPROBE (1U << ASYNCB_AUTOPROBE) + +#define ASYNC_FLAGS ((1U << ASYNCB_LAST_USER) - 1) +#define ASYNC_USR_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI| \ + ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY) +#define ASYNC_SPD_CUST (ASYNC_SPD_HI|ASYNC_SPD_VHI) +#define ASYNC_SPD_WARP (ASYNC_SPD_HI|ASYNC_SPD_SHI) +#define ASYNC_SPD_MASK (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI) + +#define ASYNC_INITIALIZED (1U << ASYNCB_INITIALIZED) +#define ASYNC_NORMAL_ACTIVE (1U << ASYNCB_NORMAL_ACTIVE) +#define ASYNC_BOOT_AUTOCONF (1U << ASYNCB_BOOT_AUTOCONF) +#define ASYNC_CLOSING (1U << ASYNCB_CLOSING) +#define ASYNC_CTS_FLOW (1U << ASYNCB_CTS_FLOW) +#define ASYNC_CHECK_CD (1U << ASYNCB_CHECK_CD) +#define ASYNC_SHARE_IRQ (1U << ASYNCB_SHARE_IRQ) +#define ASYNC_CONS_FLOW (1U << ASYNCB_CONS_FLOW) +#define ASYNC_BOOT_ONLYMCA (1U << ASYNCB_BOOT_ONLYMCA) +#define ASYNC_INTERNAL_FLAGS (~((1U << ASYNCB_FIRST_KERNEL) - 1)) /* * Multiport serial configuration structure --- external structure diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 57a97e52e58..6fd80c4243f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -41,7 +41,8 @@ #define PORT_XSCALE 15 #define PORT_RM9000 16 /* PMC-Sierra RM9xxx internal UART */ #define PORT_OCTEON 17 /* Cavium OCTEON internal UART */ -#define PORT_MAX_8250 17 /* max port ID */ +#define PORT_AR7 18 /* Texas Instruments AR7 internal UART */ +#define PORT_MAX_8250 18 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed @@ -167,6 +168,9 @@ /* MAX3100 */ #define PORT_MAX3100 86 +/* Timberdale UART */ +#define PORT_TIMBUART 87 + #ifdef __KERNEL__ #include <linux/compiler.h> diff --git a/include/linux/tty.h b/include/linux/tty.h index fc39db95499..1488d8c81aa 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -185,7 +185,7 @@ struct tty_port; struct tty_port_operations { /* Return 1 if the carrier is raised */ int (*carrier_raised)(struct tty_port *port); - void (*raise_dtr_rts)(struct tty_port *port); + void (*dtr_rts)(struct tty_port *port, int raise); }; struct tty_port { @@ -201,6 +201,9 @@ struct tty_port { unsigned char *xmit_buf; /* Optional buffer */ int close_delay; /* Close port delay */ int closing_wait; /* Delay for output */ + int drain_delay; /* Set to zero if no pure time + based drain is needed else + set to size of fifo */ }; /* @@ -223,8 +226,11 @@ struct tty_struct { struct tty_driver *driver; const struct tty_operations *ops; int index; - /* The ldisc objects are protected by tty_ldisc_lock at the moment */ - struct tty_ldisc ldisc; + + /* Protects ldisc changes: Lock tty not pty */ + struct mutex ldisc_mutex; + struct tty_ldisc *ldisc; + struct mutex termios_mutex; spinlock_t ctrl_lock; /* Termios values are protected by the termios mutex */ @@ -311,6 +317,7 @@ struct tty_struct { #define TTY_CLOSING 7 /* ->close() in progress */ #define TTY_LDISC 9 /* Line discipline attached */ #define TTY_LDISC_CHANGING 10 /* Line discipline changing */ +#define TTY_LDISC_OPEN 11 /* Line discipline is open */ #define TTY_HW_COOK_OUT 14 /* Hardware can do output cooking */ #define TTY_HW_COOK_IN 15 /* Hardware can do input cooking */ #define TTY_PTY_LOCK 16 /* pty private */ @@ -403,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b); extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *); extern void tty_ldisc_deref(struct tty_ldisc *); extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *); +extern void tty_ldisc_hangup(struct tty_struct *tty); extern const struct file_operations tty_ldiscs_proc_fops; extern void tty_wakeup(struct tty_struct *tty); @@ -425,6 +433,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx, extern void tty_release_dev(struct file *filp); extern int tty_init_termios(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); +extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); + extern struct mutex tty_mutex; extern void tty_write_unlock(struct tty_struct *tty); @@ -438,6 +449,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port); extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); extern int tty_port_carrier_raised(struct tty_port *port); extern void tty_port_raise_dtr_rts(struct tty_port *port); +extern void tty_port_lower_dtr_rts(struct tty_port *port); extern void tty_port_hangup(struct tty_port *port); extern int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index bcba84ea2d8..3566129384a 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -127,7 +127,8 @@ * the line discipline are close to full, and it should somehow * signal that no more characters should be sent to the tty. * - * Optional: Always invoke via tty_throttle(); + * Optional: Always invoke via tty_throttle(), called under the + * termios lock. * * void (*unthrottle)(struct tty_struct * tty); * @@ -135,7 +136,8 @@ * that characters can now be sent to the tty without fear of * overrunning the input buffers of the line disciplines. * - * Optional: Always invoke via tty_unthrottle(); + * Optional: Always invoke via tty_unthrottle(), called under the + * termios lock. * * void (*stop)(struct tty_struct *tty); * diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 625e9e4639c..8cdfed738fe 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -224,8 +224,7 @@ struct usb_serial_driver { /* Called by console with tty = NULL and by tty */ int (*open)(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); - void (*close)(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); + void (*close)(struct usb_serial_port *port); int (*write)(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); /* Called only by the tty layer */ @@ -241,6 +240,10 @@ struct usb_serial_driver { int (*tiocmget)(struct tty_struct *tty, struct file *file); int (*tiocmset)(struct tty_struct *tty, struct file *file, unsigned int set, unsigned int clear); + /* Called by the tty layer for port level work. There may or may not + be an attached tty at this point */ + void (*dtr_rts)(struct usb_serial_port *port, int on); + int (*carrier_raised)(struct usb_serial_port *port); /* USB events */ void (*read_int_callback)(struct urb *urb); void (*write_int_callback)(struct urb *urb); @@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port, struct file *filp); extern int usb_serial_generic_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); -extern void usb_serial_generic_close(struct tty_struct *tty, - struct usb_serial_port *port, struct file *filp); +extern void usb_serial_generic_close(struct usb_serial_port *port); extern int usb_serial_generic_resume(struct usb_serial *serial); extern int usb_serial_generic_write_room(struct tty_struct *tty); extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty); diff --git a/ipc/shm.c b/ipc/shm.c index 42597160048..15dd238e533 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -384,7 +384,6 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) error = PTR_ERR(file); if (IS_ERR(file)) goto no_file; - ima_shm_check(file); id = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni); if (id < 0) { @@ -891,7 +890,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) file = alloc_file(path.mnt, path.dentry, f_mode, &shm_file_operations); if (!file) goto out_free; - ima_shm_check(file); + ima_counts_get(file); file->private_data = sfd; file->f_mapping = shp->shm_file->f_mapping; diff --git a/kernel/cred.c b/kernel/cred.c index 3a039189d70..1bb4d7e5d61 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -167,7 +167,7 @@ EXPORT_SYMBOL(prepare_creds); /* * Prepare credentials for current to perform an execve() - * - The caller must hold current->cred_exec_mutex + * - The caller must hold current->cred_guard_mutex */ struct cred *prepare_exec_creds(void) { @@ -276,7 +276,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) struct cred *new; int ret; - mutex_init(&p->cred_exec_mutex); + mutex_init(&p->cred_guard_mutex); if ( #ifdef CONFIG_KEYS diff --git a/kernel/exit.c b/kernel/exit.c index cab535c427b..51d1fe3fb7a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1472,6 +1472,7 @@ static int wait_consider_task(struct task_struct *parent, int ptrace, */ if (*notask_error) *notask_error = ret; + return 0; } if (likely(!ptrace) && unlikely(p->ptrace)) { diff --git a/kernel/module.c b/kernel/module.c index 2383e60fcf3..278e9b6762b 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -73,6 +73,9 @@ DEFINE_MUTEX(module_mutex); EXPORT_SYMBOL_GPL(module_mutex); static LIST_HEAD(modules); +/* Block module loading/unloading? */ +int modules_disabled = 0; + /* Waiting for a module to finish initializing? */ static DECLARE_WAIT_QUEUE_HEAD(module_wq); @@ -778,7 +781,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, char name[MODULE_NAME_LEN]; int ret, forced = 0; - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; if (strncpy_from_user(name, name_user, MODULE_NAME_LEN-1) < 0) @@ -2338,7 +2341,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, int ret = 0; /* Must have permission */ - if (!capable(CAP_SYS_MODULE)) + if (!capable(CAP_SYS_MODULE) || modules_disabled) return -EPERM; /* Only one module load at a time, please */ diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 2442d140bd9..f6d8b8cb5e3 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -175,10 +175,11 @@ int ptrace_attach(struct task_struct *task) if (same_thread_group(task, current)) goto out; - /* Protect exec's credential calculations against our interference; - * SUID, SGID and LSM creds get determined differently under ptrace. + /* Protect the target's credential calculations against our + * interference; SUID, SGID and LSM creds get determined differently + * under ptrace. */ - retval = mutex_lock_interruptible(&task->cred_exec_mutex); + retval = mutex_lock_interruptible(&task->cred_guard_mutex); if (retval < 0) goto out; @@ -222,7 +223,7 @@ repeat: bad: write_unlock_irqrestore(&tasklist_lock, flags); task_unlock(task); - mutex_unlock(&task->cred_exec_mutex); + mutex_unlock(&task->cred_guard_mutex); out: return retval; } diff --git a/kernel/signal.c b/kernel/signal.c index dba6ae99978..809a228019a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -247,14 +247,19 @@ void flush_sigqueue(struct sigpending *queue) /* * Flush all pending signals for a task. */ +void __flush_signals(struct task_struct *t) +{ + clear_tsk_thread_flag(t, TIF_SIGPENDING); + flush_sigqueue(&t->pending); + flush_sigqueue(&t->signal->shared_pending); +} + void flush_signals(struct task_struct *t) { unsigned long flags; spin_lock_irqsave(&t->sighand->siglock, flags); - clear_tsk_thread_flag(t, TIF_SIGPENDING); - flush_sigqueue(&t->pending); - flush_sigqueue(&t->signal->shared_pending); + __flush_signals(t); spin_unlock_irqrestore(&t->sighand->siglock, flags); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6a463716ecb..944ba03cae1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -114,6 +114,7 @@ static int ngroups_max = NGROUPS_MAX; #ifdef CONFIG_MODULES extern char modprobe_path[]; +extern int modules_disabled; #endif #ifdef CONFIG_CHR_DEV_SG extern int sg_big_buff; @@ -534,6 +535,17 @@ static struct ctl_table kern_table[] = { .proc_handler = &proc_dostring, .strategy = &sysctl_string, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "modules_disabled", + .data = &modules_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = &proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, #endif #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET) { @@ -1233,7 +1245,6 @@ static struct ctl_table vm_table[] = { .strategy = &sysctl_jiffies, }, #endif -#ifdef CONFIG_SECURITY { .ctl_name = CTL_UNNUMBERED, .procname = "mmap_min_addr", @@ -1242,7 +1253,6 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = &proc_doulongvec_minmax, }, -#endif #ifdef CONFIG_NUMA { .ctl_name = CTL_UNNUMBERED, diff --git a/lib/Kconfig b/lib/Kconfig index 8ade0a7a91e..9960be04cbb 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -10,6 +10,9 @@ menu "Library routines" config BITREVERSE tristate +config RATIONAL + boolean + config GENERIC_FIND_FIRST_BIT bool diff --git a/lib/Makefile b/lib/Makefile index 33a40e40e3e..1f6edefebff 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y) endif obj-$(CONFIG_BITREVERSE) += bitrev.o +obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o diff --git a/lib/rational.c b/lib/rational.c new file mode 100644 index 00000000000..b3c099b5478 --- /dev/null +++ b/lib/rational.c @@ -0,0 +1,62 @@ +/* + * rational fractions + * + * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com> + * + * helper functions when coping with rational numbers + */ + +#include <linux/rational.h> + +/* + * calculate best rational approximation for a given fraction + * taking into account restricted register size, e.g. to find + * appropriate values for a pll with 5 bit denominator and + * 8 bit numerator register fields, trying to set up with a + * frequency ratio of 3.1415, one would say: + * + * rational_best_approximation(31415, 10000, + * (1 << 8) - 1, (1 << 5) - 1, &n, &d); + * + * you may look at given_numerator as a fixed point number, + * with the fractional part size described in given_denominator. + * + * for theoretical background, see: + * http://en.wikipedia.org/wiki/Continued_fraction + */ + +void rational_best_approximation( + unsigned long given_numerator, unsigned long given_denominator, + unsigned long max_numerator, unsigned long max_denominator, + unsigned long *best_numerator, unsigned long *best_denominator) +{ + unsigned long n, d, n0, d0, n1, d1; + n = given_numerator; + d = given_denominator; + n0 = d1 = 0; + n1 = d0 = 1; + for (;;) { + unsigned long t, a; + if ((n1 > max_numerator) || (d1 > max_denominator)) { + n1 = n0; + d1 = d0; + break; + } + if (d == 0) + break; + t = d; + a = n / d; + d = n % d; + n = t; + t = n0 + a * n1; + n0 = n1; + n1 = t; + t = d0 + a * d1; + d0 = d1; + d1 = t; + } + *best_numerator = n1; + *best_denominator = d1; +} + +EXPORT_SYMBOL(rational_best_approximation); diff --git a/mm/Kconfig b/mm/Kconfig index c2b57d81e15..71830ba7b98 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -226,6 +226,25 @@ config HAVE_MLOCKED_PAGE_BIT config MMU_NOTIFIER bool +config DEFAULT_MMAP_MIN_ADDR + int "Low address space to protect from user allocation" + default 4096 + help + This is the portion of low virtual memory which should be protected + from userspace allocation. Keeping a user from writing to low pages + can help reduce the impact of kernel NULL pointer bugs. + + For most ia64, ppc64 and x86 users with lots of address space + a value of 65536 is reasonable and should cause no problems. + On arm and other archs it should not be higher than 32768. + Programs which use vm86 functionality would either need additional + permissions from either the LSM or the capabilities module or have + this protection disabled. + + This value can be changed after boot using the + /proc/sys/vm/mmap_min_addr tunable. + + config NOMMU_INITIAL_TRIM_EXCESS int "Turn on mmap() excess space trimming before booting" depends on !MMU diff --git a/mm/mmap.c b/mm/mmap.c index 6b7b1a95944..2b43fa1aa3c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -87,6 +87,9 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; struct percpu_counter vm_committed_as; +/* amount of vm to protect from userspace access */ +unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; + /* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to diff --git a/mm/nommu.c b/mm/nommu.c index b571ef70742..2fd2ad5da98 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -69,6 +69,9 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; +/* amount of vm to protect from userspace access */ +unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; + atomic_long_t mmap_pages_allocated; EXPORT_SYMBOL(mem_map); diff --git a/mm/shmem.c b/mm/shmem.c index b25f95ce3db..0132fbd45a2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2659,6 +2659,7 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags) if (error) goto close_file; #endif + ima_counts_get(file); return file; close_file: @@ -2684,7 +2685,6 @@ int shmem_zero_setup(struct vm_area_struct *vma) if (IS_ERR(file)) return PTR_ERR(file); - ima_shm_check(file); if (vma->vm_file) fput(vma->vm_file); vma->vm_file = file; diff --git a/security/Kconfig b/security/Kconfig index bb244774e9d..d23c839038f 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -110,28 +110,8 @@ config SECURITY_ROOTPLUG See <http://www.linuxjournal.com/article.php?sid=6279> for more information about this module. - - If you are unsure how to answer this question, answer N. - -config SECURITY_DEFAULT_MMAP_MIN_ADDR - int "Low address space to protect from user allocation" - depends on SECURITY - default 0 - help - This is the portion of low virtual memory which should be protected - from userspace allocation. Keeping a user from writing to low pages - can help reduce the impact of kernel NULL pointer bugs. - - For most ia64, ppc64 and x86 users with lots of address space - a value of 65536 is reasonable and should cause no problems. - On arm and other archs it should not be higher than 32768. - Programs which use vm86 functionality would either need additional - permissions from either the LSM or the capabilities module or have - this protection disabled. - - This value can be changed after boot using the - /proc/sys/vm/mmap_min_addr tunable. + If you are unsure how to answer this question, answer N. source security/selinux/Kconfig source security/smack/Kconfig diff --git a/security/Makefile b/security/Makefile index fa77021d977..c67557cdaa8 100644 --- a/security/Makefile +++ b/security/Makefile @@ -16,6 +16,9 @@ obj-$(CONFIG_SECURITYFS) += inode.o # Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_SMACK) += smack/built-in.o +ifeq ($(CONFIG_AUDIT),y) +obj-$(CONFIG_SECURITY_SMACK) += lsm_audit.o +endif obj-$(CONFIG_SECURITY_TOMOYO) += tomoyo/built-in.o obj-$(CONFIG_SECURITY_ROOTPLUG) += root_plug.o obj-$(CONFIG_CGROUP_DEVICE) += device_cgroup.o diff --git a/security/commoncap.c b/security/commoncap.c index beac0258c2a..48b7e0228fa 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -28,6 +28,28 @@ #include <linux/prctl.h> #include <linux/securebits.h> +/* + * If a non-root user executes a setuid-root binary in + * !secure(SECURE_NOROOT) mode, then we raise capabilities. + * However if fE is also set, then the intent is for only + * the file capabilities to be applied, and the setuid-root + * bit is left on either to change the uid (plausible) or + * to get full privilege on a kernel without file capabilities + * support. So in that case we do not raise capabilities. + * + * Warn if that happens, once per boot. + */ +static void warn_setuid_and_fcaps_mixed(char *fname) +{ + static int warned; + if (!warned) { + printk(KERN_INFO "warning: `%s' has both setuid-root and" + " effective capabilities. Therefore not raising all" + " capabilities.\n", fname); + warned = 1; + } +} + int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { NETLINK_CB(skb).eff_cap = current_cap(); @@ -464,6 +486,15 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) if (!issecure(SECURE_NOROOT)) { /* + * If the legacy file capability is set, then don't set privs + * for a setuid root binary run by a non-root user. Do set it + * for a root user just to cause least surprise to an admin. + */ + if (effective && new->uid != 0 && new->euid == 0) { + warn_setuid_and_fcaps_mixed(bprm->filename); + goto skip; + } + /* * To support inheritance of root-permissions and suid-root * executables under compatibility mode, we override the * capability sets for the file. @@ -478,6 +509,7 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) if (new->euid == 0) effective = true; } +skip: /* Don't let someone trace a set[ug]id/setpcap binary with the revised * credentials unless they have the appropriate permit diff --git a/security/inode.c b/security/inode.c index f3b91bfbe4c..f7496c6a022 100644 --- a/security/inode.c +++ b/security/inode.c @@ -287,7 +287,7 @@ void securityfs_remove(struct dentry *dentry) { struct dentry *parent; - if (!dentry) + if (!dentry || IS_ERR(dentry)) return; parent = dentry->d_parent; diff --git a/security/integrity/ima/ima_audit.c b/security/integrity/ima/ima_audit.c index 1e082bb987b..ff513ff737f 100644 --- a/security/integrity/ima/ima_audit.c +++ b/security/integrity/ima/ima_audit.c @@ -22,18 +22,9 @@ static int ima_audit; static int __init ima_audit_setup(char *str) { unsigned long audit; - int rc, result = 0; - char *op = "ima_audit"; - char *cause; - rc = strict_strtoul(str, 0, &audit); - if (rc || audit > 1) - result = 1; - else - ima_audit = audit; - cause = ima_audit ? "enabled" : "not_enabled"; - integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL, - op, cause, result, 0); + if (!strict_strtoul(str, 0, &audit)) + ima_audit = audit ? 1 : 0; return 1; } __setup("ima_audit=", ima_audit_setup); @@ -50,23 +41,14 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode, ab = audit_log_start(current->audit_context, GFP_KERNEL, audit_msgno); audit_log_format(ab, "integrity: pid=%d uid=%u auid=%u ses=%u", - current->pid, current->cred->uid, + current->pid, current_cred()->uid, audit_get_loginuid(current), audit_get_sessionid(current)); audit_log_task_context(ab); - switch (audit_msgno) { - case AUDIT_INTEGRITY_DATA: - case AUDIT_INTEGRITY_METADATA: - case AUDIT_INTEGRITY_PCR: - case AUDIT_INTEGRITY_STATUS: - audit_log_format(ab, " op=%s cause=%s", op, cause); - break; - case AUDIT_INTEGRITY_HASH: - audit_log_format(ab, " op=%s hash=%s", op, cause); - break; - default: - audit_log_format(ab, " op=%s", op); - } + audit_log_format(ab, " op="); + audit_log_string(ab, op); + audit_log_format(ab, " cause="); + audit_log_string(ab, cause); audit_log_format(ab, " comm="); audit_log_untrustedstring(ab, current->comm); if (fname) { diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c index 50d572b74ca..63003a63aae 100644 --- a/security/integrity/ima/ima_crypto.c +++ b/security/integrity/ima/ima_crypto.c @@ -103,7 +103,7 @@ int ima_calc_template_hash(int template_len, void *template, char *digest) return rc; } -static void ima_pcrread(int idx, u8 *pcr) +static void __init ima_pcrread(int idx, u8 *pcr) { if (!ima_used_chip) return; @@ -115,7 +115,7 @@ static void ima_pcrread(int idx, u8 *pcr) /* * Calculate the boot aggregate hash */ -int ima_calc_boot_aggregate(char *digest) +int __init ima_calc_boot_aggregate(char *digest) { struct hash_desc desc; struct scatterlist sg; diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 510186f0b72..6bfc7eaebfd 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -15,6 +15,7 @@ * implemenents security file system for reporting * current measurement list and IMA statistics */ +#include <linux/fcntl.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/rculist.h> @@ -283,6 +284,9 @@ static atomic_t policy_opencount = ATOMIC_INIT(1); */ int ima_open_policy(struct inode * inode, struct file * filp) { + /* No point in being allowed to open it if you aren't going to write */ + if (!(filp->f_flags & O_WRONLY)) + return -EACCES; if (atomic_dec_and_test(&policy_opencount)) return 0; return -EBUSY; @@ -315,7 +319,7 @@ static struct file_operations ima_measure_policy_ops = { .release = ima_release_policy }; -int ima_fs_init(void) +int __init ima_fs_init(void) { ima_dir = securityfs_create_dir("ima", NULL); if (IS_ERR(ima_dir)) @@ -349,7 +353,7 @@ int ima_fs_init(void) goto out; ima_policy = securityfs_create_file("policy", - S_IRUSR | S_IRGRP | S_IWUSR, + S_IWUSR, ima_dir, NULL, &ima_measure_policy_ops); if (IS_ERR(ima_policy)) diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c index ec79f1ee992..b8dd693f879 100644 --- a/security/integrity/ima/ima_iint.c +++ b/security/integrity/ima/ima_iint.c @@ -196,7 +196,7 @@ static void init_once(void *foo) kref_set(&iint->refcount, 1); } -void ima_iintcache_init(void) +void __init ima_iintcache_init(void) { iint_cache = kmem_cache_create("iint_cache", sizeof(struct ima_iint_cache), 0, diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c index 0b0bb8c978c..a40da7ae590 100644 --- a/security/integrity/ima/ima_init.c +++ b/security/integrity/ima/ima_init.c @@ -38,7 +38,7 @@ int ima_used_chip; * a different value.) Violations add a zero entry to the measurement * list and extend the aggregate PCR value with ff...ff's. */ -static void ima_add_boot_aggregate(void) +static void __init ima_add_boot_aggregate(void) { struct ima_template_entry *entry; const char *op = "add_boot_aggregate"; @@ -71,7 +71,7 @@ err_out: audit_cause, result, 0); } -int ima_init(void) +int __init ima_init(void) { u8 pcr_i[IMA_DIGEST_SIZE]; int rc; diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c index f4e7266f5ae..6f611874d10 100644 --- a/security/integrity/ima/ima_main.c +++ b/security/integrity/ima/ima_main.c @@ -29,20 +29,8 @@ int ima_initialized; char *ima_hash = "sha1"; static int __init hash_setup(char *str) { - const char *op = "hash_setup"; - const char *hash = "sha1"; - int result = 0; - int audit_info = 0; - - if (strncmp(str, "md5", 3) == 0) { - hash = "md5"; - ima_hash = str; - } else if (strncmp(str, "sha1", 4) != 0) { - hash = "invalid_hash_type"; - result = 1; - } - integrity_audit_msg(AUDIT_INTEGRITY_HASH, NULL, NULL, op, hash, - result, audit_info); + if (strncmp(str, "md5", 3) == 0) + ima_hash = "md5"; return 1; } __setup("ima_hash=", hash_setup); @@ -128,10 +116,6 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, { int rc = 0; - if (IS_ERR(file)) { - pr_info("%s dentry_open failed\n", filename); - return rc; - } iint->opencount++; iint->readcount++; @@ -141,6 +125,15 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, return rc; } +static void ima_update_counts(struct ima_iint_cache *iint, int mask) +{ + iint->opencount++; + if ((mask & MAY_WRITE) || (mask == 0)) + iint->writecount++; + else if (mask & (MAY_READ | MAY_EXEC)) + iint->readcount++; +} + /** * ima_path_check - based on policy, collect/store measurement. * @path: contains a pointer to the path to be measured @@ -156,10 +149,10 @@ static int get_path_measurement(struct ima_iint_cache *iint, struct file *file, * - Opening a file for read when already open for write, * could result in a file measurement error. * - * Return 0 on success, an error code on failure. - * (Based on the results of appraise_measurement().) + * Always return 0 and audit dentry_open failures. + * (Return code will be based upon measurement appraisal.) */ -int ima_path_check(struct path *path, int mask) +int ima_path_check(struct path *path, int mask, int update_counts) { struct inode *inode = path->dentry->d_inode; struct ima_iint_cache *iint; @@ -173,11 +166,8 @@ int ima_path_check(struct path *path, int mask) return 0; mutex_lock(&iint->mutex); - iint->opencount++; - if ((mask & MAY_WRITE) || (mask == 0)) - iint->writecount++; - else if (mask & (MAY_READ | MAY_EXEC)) - iint->readcount++; + if (update_counts) + ima_update_counts(iint, mask); rc = ima_must_measure(iint, inode, MAY_READ, PATH_CHECK); if (rc < 0) @@ -196,7 +186,19 @@ int ima_path_check(struct path *path, int mask) struct dentry *dentry = dget(path->dentry); struct vfsmount *mnt = mntget(path->mnt); - file = dentry_open(dentry, mnt, O_RDONLY, current->cred); + file = dentry_open(dentry, mnt, O_RDONLY | O_LARGEFILE, + current_cred()); + if (IS_ERR(file)) { + int audit_info = 0; + + integrity_audit_msg(AUDIT_INTEGRITY_PCR, inode, + dentry->d_name.name, + "add_measurement", + "dentry_open failed", + 1, audit_info); + file = NULL; + goto out; + } rc = get_path_measurement(iint, file, dentry->d_name.name); } out: @@ -206,6 +208,7 @@ out: kref_put(&iint->refcount, iint_free); return 0; } +EXPORT_SYMBOL_GPL(ima_path_check); static int process_measurement(struct file *file, const unsigned char *filename, int mask, int function) @@ -234,7 +237,16 @@ out: return rc; } -static void opencount_get(struct file *file) +/* + * ima_opens_get - increment file counts + * + * - for IPC shm and shmat file. + * - for nfsd exported files. + * + * Increment the counts for these files to prevent unnecessary + * imbalance messages. + */ +void ima_counts_get(struct file *file) { struct inode *inode = file->f_dentry->d_inode; struct ima_iint_cache *iint; @@ -246,8 +258,14 @@ static void opencount_get(struct file *file) return; mutex_lock(&iint->mutex); iint->opencount++; + if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) + iint->readcount++; + + if (file->f_mode & FMODE_WRITE) + iint->writecount++; mutex_unlock(&iint->mutex); } +EXPORT_SYMBOL_GPL(ima_counts_get); /** * ima_file_mmap - based on policy, collect/store measurement. @@ -272,18 +290,6 @@ int ima_file_mmap(struct file *file, unsigned long prot) return 0; } -/* - * ima_shm_check - IPC shm and shmat create/fput a file - * - * Maintain the opencount for these files to prevent unnecessary - * imbalance messages. - */ -void ima_shm_check(struct file *file) -{ - opencount_get(file); - return; -} - /** * ima_bprm_check - based on policy, collect/store measurement. * @bprm: contains the linux_binprm structure diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index b5291ad5ef5..e1278399b34 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -45,24 +45,30 @@ struct ima_measure_rule_entry { } lsm[MAX_LSM_RULES]; }; -/* Without LSM specific knowledge, the default policy can only be +/* + * Without LSM specific knowledge, the default policy can only be * written in terms of .action, .func, .mask, .fsmagic, and .uid */ + +/* + * The minimum rule set to allow for full TCB coverage. Measures all files + * opened or mmap for exec and everything read by root. Dangerous because + * normal users can easily run the machine out of memory simply building + * and running executables. + */ static struct ima_measure_rule_entry default_rules[] = { - {.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC, - .flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE,.fsmagic = PROC_SUPER_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = SYSFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = DEBUGFS_MAGIC,.flags = IMA_FSMAGIC}, {.action = DONT_MEASURE,.fsmagic = TMPFS_MAGIC,.flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC, - .flags = IMA_FSMAGIC}, - {.action = DONT_MEASURE,.fsmagic = 0xF97CFF8C,.flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE,.fsmagic = SECURITYFS_MAGIC,.flags = IMA_FSMAGIC}, + {.action = DONT_MEASURE,.fsmagic = SELINUX_MAGIC,.flags = IMA_FSMAGIC}, {.action = MEASURE,.func = FILE_MMAP,.mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE,.func = BPRM_CHECK,.mask = MAY_EXEC, .flags = IMA_FUNC | IMA_MASK}, {.action = MEASURE,.func = PATH_CHECK,.mask = MAY_READ,.uid = 0, - .flags = IMA_FUNC | IMA_MASK | IMA_UID} + .flags = IMA_FUNC | IMA_MASK | IMA_UID}, }; static LIST_HEAD(measure_default_rules); @@ -71,6 +77,14 @@ static struct list_head *ima_measure; static DEFINE_MUTEX(ima_measure_mutex); +static bool ima_use_tcb __initdata; +static int __init default_policy_setup(char *str) +{ + ima_use_tcb = 1; + return 1; +} +__setup("ima_tcb", default_policy_setup); + /** * ima_match_rules - determine whether an inode matches the measure rule. * @rule: a pointer to a rule @@ -96,7 +110,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule, if ((rule->flags & IMA_UID) && rule->uid != tsk->cred->uid) return false; for (i = 0; i < MAX_LSM_RULES; i++) { - int rc; + int rc = 0; u32 osid, sid; if (!rule->lsm[i].rule) @@ -109,7 +123,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule, security_inode_getsecid(inode, &osid); rc = security_filter_rule_match(osid, rule->lsm[i].type, - AUDIT_EQUAL, + Audit_equal, rule->lsm[i].rule, NULL); break; @@ -119,7 +133,7 @@ static bool ima_match_rules(struct ima_measure_rule_entry *rule, security_task_getsecid(tsk, &sid); rc = security_filter_rule_match(sid, rule->lsm[i].type, - AUDIT_EQUAL, + Audit_equal, rule->lsm[i].rule, NULL); default: @@ -164,11 +178,17 @@ int ima_match_policy(struct inode *inode, enum ima_hooks func, int mask) * ima_measure points to either the measure_default_rules or the * the new measure_policy_rules. */ -void ima_init_policy(void) +void __init ima_init_policy(void) { - int i; + int i, entries; + + /* if !ima_use_tcb set entries = 0 so we load NO default rules */ + if (ima_use_tcb) + entries = ARRAY_SIZE(default_rules); + else + entries = 0; - for (i = 0; i < ARRAY_SIZE(default_rules); i++) + for (i = 0; i < entries; i++) list_add_tail(&default_rules[i].list, &measure_default_rules); ima_measure = &measure_default_rules; } @@ -227,7 +247,7 @@ static int ima_lsm_rule_init(struct ima_measure_rule_entry *entry, entry->lsm[lsm_rule].type = audit_type; result = security_filter_rule_init(entry->lsm[lsm_rule].type, - AUDIT_EQUAL, args, + Audit_equal, args, &entry->lsm[lsm_rule].rule); return result; } diff --git a/security/lsm_audit.c b/security/lsm_audit.c new file mode 100644 index 00000000000..94b868494b3 --- /dev/null +++ b/security/lsm_audit.c @@ -0,0 +1,386 @@ +/* + * common LSM auditing functions + * + * Based on code written for SELinux by : + * Stephen Smalley, <sds@epoch.ncsc.mil> + * James Morris <jmorris@redhat.com> + * Author : Etienne Basset, <etienne.basset@ensta.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <net/sock.h> +#include <linux/un.h> +#include <net/af_unix.h> +#include <linux/audit.h> +#include <linux/ipv6.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/dccp.h> +#include <linux/sctp.h> +#include <linux/lsm_audit.h> + +/** + * ipv4_skb_to_auditdata : fill auditdata from skb + * @skb : the skb + * @ad : the audit data to fill + * @proto : the layer 4 protocol + * + * return 0 on success + */ +int ipv4_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto) +{ + int ret = 0; + struct iphdr *ih; + + ih = ip_hdr(skb); + if (ih == NULL) + return -EINVAL; + + ad->u.net.v4info.saddr = ih->saddr; + ad->u.net.v4info.daddr = ih->daddr; + + if (proto) + *proto = ih->protocol; + /* non initial fragment */ + if (ntohs(ih->frag_off) & IP_OFFSET) + return 0; + + switch (ih->protocol) { + case IPPROTO_TCP: { + struct tcphdr *th = tcp_hdr(skb); + if (th == NULL) + break; + + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr *uh = udp_hdr(skb); + if (uh == NULL) + break; + + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; + break; + } + case IPPROTO_DCCP: { + struct dccp_hdr *dh = dccp_hdr(skb); + if (dh == NULL) + break; + + ad->u.net.sport = dh->dccph_sport; + ad->u.net.dport = dh->dccph_dport; + break; + } + case IPPROTO_SCTP: { + struct sctphdr *sh = sctp_hdr(skb); + if (sh == NULL) + break; + ad->u.net.sport = sh->source; + ad->u.net.dport = sh->dest; + break; + } + default: + ret = -EINVAL; + } + return ret; +} +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * ipv6_skb_to_auditdata : fill auditdata from skb + * @skb : the skb + * @ad : the audit data to fill + * @proto : the layer 4 protocol + * + * return 0 on success + */ +int ipv6_skb_to_auditdata(struct sk_buff *skb, + struct common_audit_data *ad, u8 *proto) +{ + int offset, ret = 0; + struct ipv6hdr *ip6; + u8 nexthdr; + + ip6 = ipv6_hdr(skb); + if (ip6 == NULL) + return -EINVAL; + ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr); + ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr); + ret = 0; + /* IPv6 can have several extension header before the Transport header + * skip them */ + offset = skb_network_offset(skb); + offset += sizeof(*ip6); + nexthdr = ip6->nexthdr; + offset = ipv6_skip_exthdr(skb, offset, &nexthdr); + if (offset < 0) + return 0; + if (proto) + *proto = nexthdr; + switch (nexthdr) { + case IPPROTO_TCP: { + struct tcphdr _tcph, *th; + + th = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); + if (th == NULL) + break; + + ad->u.net.sport = th->source; + ad->u.net.dport = th->dest; + break; + } + case IPPROTO_UDP: { + struct udphdr _udph, *uh; + + uh = skb_header_pointer(skb, offset, sizeof(_udph), &_udph); + if (uh == NULL) + break; + + ad->u.net.sport = uh->source; + ad->u.net.dport = uh->dest; + break; + } + case IPPROTO_DCCP: { + struct dccp_hdr _dccph, *dh; + + dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph); + if (dh == NULL) + break; + + ad->u.net.sport = dh->dccph_sport; + ad->u.net.dport = dh->dccph_dport; + break; + } + case IPPROTO_SCTP: { + struct sctphdr _sctph, *sh; + + sh = skb_header_pointer(skb, offset, sizeof(_sctph), &_sctph); + if (sh == NULL) + break; + ad->u.net.sport = sh->source; + ad->u.net.dport = sh->dest; + break; + } + default: + ret = -EINVAL; + } + return ret; +} +#endif + + +static inline void print_ipv6_addr(struct audit_buffer *ab, + struct in6_addr *addr, __be16 port, + char *name1, char *name2) +{ + if (!ipv6_addr_any(addr)) + audit_log_format(ab, " %s=%pI6", name1, addr); + if (port) + audit_log_format(ab, " %s=%d", name2, ntohs(port)); +} + +static inline void print_ipv4_addr(struct audit_buffer *ab, __be32 addr, + __be16 port, char *name1, char *name2) +{ + if (addr) + audit_log_format(ab, " %s=%pI4", name1, &addr); + if (port) + audit_log_format(ab, " %s=%d", name2, ntohs(port)); +} + +/** + * dump_common_audit_data - helper to dump common audit data + * @a : common audit data + * + */ +static void dump_common_audit_data(struct audit_buffer *ab, + struct common_audit_data *a) +{ + struct inode *inode = NULL; + struct task_struct *tsk = current; + + if (a->tsk) + tsk = a->tsk; + if (tsk && tsk->pid) { + audit_log_format(ab, " pid=%d comm=", tsk->pid); + audit_log_untrustedstring(ab, tsk->comm); + } + + switch (a->type) { + case LSM_AUDIT_DATA_IPC: + audit_log_format(ab, " key=%d ", a->u.ipc_id); + break; + case LSM_AUDIT_DATA_CAP: + audit_log_format(ab, " capability=%d ", a->u.cap); + break; + case LSM_AUDIT_DATA_FS: + if (a->u.fs.path.dentry) { + struct dentry *dentry = a->u.fs.path.dentry; + if (a->u.fs.path.mnt) { + audit_log_d_path(ab, "path=", &a->u.fs.path); + } else { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, + dentry->d_name.name); + } + inode = dentry->d_inode; + } else if (a->u.fs.inode) { + struct dentry *dentry; + inode = a->u.fs.inode; + dentry = d_find_alias(inode); + if (dentry) { + audit_log_format(ab, " name="); + audit_log_untrustedstring(ab, + dentry->d_name.name); + dput(dentry); + } + } + if (inode) + audit_log_format(ab, " dev=%s ino=%lu", + inode->i_sb->s_id, + inode->i_ino); + break; + case LSM_AUDIT_DATA_TASK: + tsk = a->u.tsk; + if (tsk && tsk->pid) { + audit_log_format(ab, " pid=%d comm=", tsk->pid); + audit_log_untrustedstring(ab, tsk->comm); + } + break; + case LSM_AUDIT_DATA_NET: + if (a->u.net.sk) { + struct sock *sk = a->u.net.sk; + struct unix_sock *u; + int len = 0; + char *p = NULL; + + switch (sk->sk_family) { + case AF_INET: { + struct inet_sock *inet = inet_sk(sk); + + print_ipv4_addr(ab, inet->rcv_saddr, + inet->sport, + "laddr", "lport"); + print_ipv4_addr(ab, inet->daddr, + inet->dport, + "faddr", "fport"); + break; + } + case AF_INET6: { + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *inet6 = inet6_sk(sk); + + print_ipv6_addr(ab, &inet6->rcv_saddr, + inet->sport, + "laddr", "lport"); + print_ipv6_addr(ab, &inet6->daddr, + inet->dport, + "faddr", "fport"); + break; + } + case AF_UNIX: + u = unix_sk(sk); + if (u->dentry) { + struct path path = { + .dentry = u->dentry, + .mnt = u->mnt + }; + audit_log_d_path(ab, "path=", &path); + break; + } + if (!u->addr) + break; + len = u->addr->len-sizeof(short); + p = &u->addr->name->sun_path[0]; + audit_log_format(ab, " path="); + if (*p) + audit_log_untrustedstring(ab, p); + else + audit_log_n_hex(ab, p, len); + break; + } + } + + switch (a->u.net.family) { + case AF_INET: + print_ipv4_addr(ab, a->u.net.v4info.saddr, + a->u.net.sport, + "saddr", "src"); + print_ipv4_addr(ab, a->u.net.v4info.daddr, + a->u.net.dport, + "daddr", "dest"); + break; + case AF_INET6: + print_ipv6_addr(ab, &a->u.net.v6info.saddr, + a->u.net.sport, + "saddr", "src"); + print_ipv6_addr(ab, &a->u.net.v6info.daddr, + a->u.net.dport, + "daddr", "dest"); + break; + } + if (a->u.net.netif > 0) { + struct net_device *dev; + + /* NOTE: we always use init's namespace */ + dev = dev_get_by_index(&init_net, a->u.net.netif); + if (dev) { + audit_log_format(ab, " netif=%s", dev->name); + dev_put(dev); + } + } + break; +#ifdef CONFIG_KEYS + case LSM_AUDIT_DATA_KEY: + audit_log_format(ab, " key_serial=%u", a->u.key_struct.key); + if (a->u.key_struct.key_desc) { + audit_log_format(ab, " key_desc="); + audit_log_untrustedstring(ab, a->u.key_struct.key_desc); + } + break; +#endif + } /* switch (a->type) */ +} + +/** + * common_lsm_audit - generic LSM auditing function + * @a: auxiliary audit data + * + * setup the audit buffer for common security information + * uses callback to print LSM specific information + */ +void common_lsm_audit(struct common_audit_data *a) +{ + struct audit_buffer *ab; + + if (a == NULL) + return; + /* we use GFP_ATOMIC so we won't sleep */ + ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); + + if (ab == NULL) + return; + + if (a->lsm_pre_audit) + a->lsm_pre_audit(ab, a); + + dump_common_audit_data(ab, a); + + if (a->lsm_post_audit) + a->lsm_post_audit(ab, a); + + audit_log_end(ab); +} diff --git a/security/root_plug.c b/security/root_plug.c index 40fb4f15e27..2f7ffa67c4d 100644 --- a/security/root_plug.c +++ b/security/root_plug.c @@ -71,18 +71,6 @@ static int rootplug_bprm_check_security (struct linux_binprm *bprm) } static struct security_operations rootplug_security_ops = { - /* Use the capability functions for some of the hooks */ - .ptrace_may_access = cap_ptrace_may_access, - .ptrace_traceme = cap_ptrace_traceme, - .capget = cap_capget, - .capset = cap_capset, - .capable = cap_capable, - - .bprm_set_creds = cap_bprm_set_creds, - - .task_fix_setuid = cap_task_fix_setuid, - .task_prctl = cap_task_prctl, - .bprm_check_security = rootplug_bprm_check_security, }; diff --git a/security/security.c b/security/security.c index 5284255c5cd..dc7674fbfc7 100644 --- a/security/security.c +++ b/security/security.c @@ -26,9 +26,6 @@ extern void security_fixup_ops(struct security_operations *ops); struct security_operations *security_ops; /* Initialized to NULL */ -/* amount of vm to protect from userspace access */ -unsigned long mmap_min_addr = CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR; - static inline int verify(struct security_operations *ops) { /* verify the security_operations structure exists */ diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 7f9b5fac877..b2ab6085983 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -927,7 +927,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, if (denied) { if (flags & AVC_STRICT) rc = -EACCES; - else if (!selinux_enforcing || security_permissive_sid(ssid)) + else if (!selinux_enforcing || (avd->flags & AVD_FLAGS_PERMISSIVE)) avc_update_node(AVC_CALLBACK_GRANT, requested, ssid, tsid, tclass, avd->seqno); else diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 2fcad7c33ea..195906bce26 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1980,10 +1980,6 @@ static int selinux_sysctl(ctl_table *table, int op) u32 tsid, sid; int rc; - rc = secondary_ops->sysctl(table, op); - if (rc) - return rc; - sid = current_sid(); rc = selinux_sysctl_get_sid(table, (op == 0001) ? @@ -2375,10 +2371,8 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) { const struct task_security_struct *tsec = current_security(); struct itimerval itimer; - struct sighand_struct *psig; u32 osid, sid; int rc, i; - unsigned long flags; osid = tsec->osid; sid = tsec->sid; @@ -2398,22 +2392,20 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) memset(&itimer, 0, sizeof itimer); for (i = 0; i < 3; i++) do_setitimer(i, &itimer, NULL); - flush_signals(current); spin_lock_irq(¤t->sighand->siglock); - flush_signal_handlers(current, 1); - sigemptyset(¤t->blocked); - recalc_sigpending(); + if (!(current->signal->flags & SIGNAL_GROUP_EXIT)) { + __flush_signals(current); + flush_signal_handlers(current, 1); + sigemptyset(¤t->blocked); + } spin_unlock_irq(¤t->sighand->siglock); } /* Wake up the parent if it is waiting so that it can recheck * wait permission to the new task SID. */ - read_lock_irq(&tasklist_lock); - psig = current->parent->sighand; - spin_lock_irqsave(&psig->siglock, flags); - wake_up_interruptible(¤t->parent->signal->wait_chldexit); - spin_unlock_irqrestore(&psig->siglock, flags); - read_unlock_irq(&tasklist_lock); + read_lock(&tasklist_lock); + wake_up_interruptible(¤t->real_parent->signal->wait_chldexit); + read_unlock(&tasklist_lock); } /* superblock security operations */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 5c3434f7626..ca835795a8b 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -8,14 +8,13 @@ #ifndef _SELINUX_SECURITY_H_ #define _SELINUX_SECURITY_H_ +#include <linux/magic.h> #include "flask.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ #define SECSID_WILD 0xffffffff /* wildcard SID */ #define SECCLASS_NULL 0x0000 /* no class */ -#define SELINUX_MAGIC 0xf97cff8c - /* Identify specific policy version changes */ #define POLICYDB_VERSION_BASE 15 #define POLICYDB_VERSION_BOOL 16 @@ -91,9 +90,11 @@ struct av_decision { u32 auditallow; u32 auditdeny; u32 seqno; + u32 flags; }; -int security_permissive_sid(u32 sid); +/* definitions of av_decision.flags */ +#define AVD_FLAGS_PERMISSIVE 0x0001 int security_compute_av(u32 ssid, u32 tsid, u16 tclass, u32 requested, diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index c6875fd3b9d..dd7cc6de77f 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -112,6 +112,8 @@ static struct nlmsg_perm nlmsg_audit_perms[] = { AUDIT_DEL_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_USER, NETLINK_AUDIT_SOCKET__NLMSG_RELAY }, { AUDIT_SIGNAL_INFO, NETLINK_AUDIT_SOCKET__NLMSG_READ }, + { AUDIT_TRIM, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, + { AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT }, }; diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 2d5136ec3d5..b4fc506e7a8 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -527,10 +527,10 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) goto out2; length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, - "%x %x %x %x %u", + "%x %x %x %x %u %x", avd.allowed, 0xffffffff, avd.auditallow, avd.auditdeny, - avd.seqno); + avd.seqno, avd.flags); out2: kfree(tcon); out: @@ -803,10 +803,6 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, goto out; } - if (count > PAGE_SIZE) { - ret = -EINVAL; - goto out; - } page = (char *)get_zeroed_page(GFP_KERNEL); if (!page) { ret = -ENOMEM; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index deeec6c013a..500e6f78e11 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -410,6 +410,7 @@ static int context_struct_compute_av(struct context *scontext, avd->auditallow = 0; avd->auditdeny = 0xffffffff; avd->seqno = latest_granting; + avd->flags = 0; /* * Check for all the invalid cases. @@ -528,31 +529,6 @@ inval_class: return 0; } -/* - * Given a sid find if the type has the permissive flag set - */ -int security_permissive_sid(u32 sid) -{ - struct context *context; - u32 type; - int rc; - - read_lock(&policy_rwlock); - - context = sidtab_search(&sidtab, sid); - BUG_ON(!context); - - type = context->type; - /* - * we are intentionally using type here, not type-1, the 0th bit may - * someday indicate that we are globally setting permissive in policy. - */ - rc = ebitmap_get_bit(&policydb.permissive_map, type); - - read_unlock(&policy_rwlock); - return rc; -} - static int security_validtrans_handle_fail(struct context *ocontext, struct context *ncontext, struct context *tcontext, @@ -767,6 +743,10 @@ int security_compute_av(u32 ssid, rc = context_struct_compute_av(scontext, tcontext, tclass, requested, avd); + + /* permissive domain? */ + if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + avd->flags |= AVD_FLAGS_PERMISSIVE; out: read_unlock(&policy_rwlock); return rc; diff --git a/security/smack/smack.h b/security/smack/smack.h index 42ef313f985..243bec175be 100644 --- a/security/smack/smack.h +++ b/security/smack/smack.h @@ -20,6 +20,7 @@ #include <net/netlabel.h> #include <linux/list.h> #include <linux/rculist.h> +#include <linux/lsm_audit.h> /* * Why 23? CIPSO is constrained to 30, so a 32 byte buffer is @@ -179,6 +180,20 @@ struct smack_known { #define MAY_NOT 0 /* + * Number of access types used by Smack (rwxa) + */ +#define SMK_NUM_ACCESS_TYPE 4 + +/* + * Smack audit data; is empty if CONFIG_AUDIT not set + * to save some stack + */ +struct smk_audit_info { +#ifdef CONFIG_AUDIT + struct common_audit_data a; +#endif +}; +/* * These functions are in smack_lsm.c */ struct inode_smack *new_inode_smack(char *); @@ -186,8 +201,8 @@ struct inode_smack *new_inode_smack(char *); /* * These functions are in smack_access.c */ -int smk_access(char *, char *, int); -int smk_curacc(char *, u32); +int smk_access(char *, char *, int, struct smk_audit_info *); +int smk_curacc(char *, u32, struct smk_audit_info *); int smack_to_cipso(const char *, struct smack_cipso *); void smack_from_cipso(u32, char *, char *); char *smack_from_secid(const u32); @@ -237,4 +252,93 @@ static inline char *smk_of_inode(const struct inode *isp) return sip->smk_inode; } +/* + * logging functions + */ +#define SMACK_AUDIT_DENIED 0x1 +#define SMACK_AUDIT_ACCEPT 0x2 +extern int log_policy; + +void smack_log(char *subject_label, char *object_label, + int request, + int result, struct smk_audit_info *auditdata); + +#ifdef CONFIG_AUDIT + +/* + * some inline functions to set up audit data + * they do nothing if CONFIG_AUDIT is not set + * + */ +static inline void smk_ad_init(struct smk_audit_info *a, const char *func, + char type) +{ + memset(a, 0, sizeof(*a)); + a->a.type = type; + a->a.function = func; +} + +static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, + struct task_struct *t) +{ + a->a.u.tsk = t; +} +static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a, + struct dentry *d) +{ + a->a.u.fs.path.dentry = d; +} +static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a, + struct vfsmount *m) +{ + a->a.u.fs.path.mnt = m; +} +static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a, + struct inode *i) +{ + a->a.u.fs.inode = i; +} +static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a, + struct path p) +{ + a->a.u.fs.path = p; +} +static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a, + struct sock *sk) +{ + a->a.u.net.sk = sk; +} + +#else /* no AUDIT */ + +static inline void smk_ad_init(struct smk_audit_info *a, const char *func, + char type) +{ +} +static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a, + struct task_struct *t) +{ +} +static inline void smk_ad_setfield_u_fs_path_dentry(struct smk_audit_info *a, + struct dentry *d) +{ +} +static inline void smk_ad_setfield_u_fs_path_mnt(struct smk_audit_info *a, + struct vfsmount *m) +{ +} +static inline void smk_ad_setfield_u_fs_inode(struct smk_audit_info *a, + struct inode *i) +{ +} +static inline void smk_ad_setfield_u_fs_path(struct smk_audit_info *a, + struct path p) +{ +} +static inline void smk_ad_setfield_u_net_sk(struct smk_audit_info *a, + struct sock *sk) +{ +} +#endif + #endif /* _SECURITY_SMACK_H */ diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index ac0a2707f6d..513dc1aa16d 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -59,11 +59,18 @@ LIST_HEAD(smack_known_list); */ static u32 smack_next_secid = 10; +/* + * what events do we log + * can be overwritten at run-time by /smack/logging + */ +int log_policy = SMACK_AUDIT_DENIED; + /** * smk_access - determine if a subject has a specific access to an object * @subject_label: a pointer to the subject's Smack label * @object_label: a pointer to the object's Smack label * @request: the access requested, in "MAY" format + * @a : a pointer to the audit data * * This function looks up the subject/object pair in the * access rule list and returns 0 if the access is permitted, @@ -78,10 +85,12 @@ static u32 smack_next_secid = 10; * will be on the list, so checking the pointers may be a worthwhile * optimization. */ -int smk_access(char *subject_label, char *object_label, int request) +int smk_access(char *subject_label, char *object_label, int request, + struct smk_audit_info *a) { u32 may = MAY_NOT; struct smack_rule *srp; + int rc = 0; /* * Hardcoded comparisons. @@ -89,8 +98,10 @@ int smk_access(char *subject_label, char *object_label, int request) * A star subject can't access any object. */ if (subject_label == smack_known_star.smk_known || - strcmp(subject_label, smack_known_star.smk_known) == 0) - return -EACCES; + strcmp(subject_label, smack_known_star.smk_known) == 0) { + rc = -EACCES; + goto out_audit; + } /* * An internet object can be accessed by any subject. * Tasks cannot be assigned the internet label. @@ -100,20 +111,20 @@ int smk_access(char *subject_label, char *object_label, int request) subject_label == smack_known_web.smk_known || strcmp(object_label, smack_known_web.smk_known) == 0 || strcmp(subject_label, smack_known_web.smk_known) == 0) - return 0; + goto out_audit; /* * A star object can be accessed by any subject. */ if (object_label == smack_known_star.smk_known || strcmp(object_label, smack_known_star.smk_known) == 0) - return 0; + goto out_audit; /* * An object can be accessed in any way by a subject * with the same label. */ if (subject_label == object_label || strcmp(subject_label, object_label) == 0) - return 0; + goto out_audit; /* * A hat subject can read any object. * A floor object can be read by any subject. @@ -121,10 +132,10 @@ int smk_access(char *subject_label, char *object_label, int request) if ((request & MAY_ANYREAD) == request) { if (object_label == smack_known_floor.smk_known || strcmp(object_label, smack_known_floor.smk_known) == 0) - return 0; + goto out_audit; if (subject_label == smack_known_hat.smk_known || strcmp(subject_label, smack_known_hat.smk_known) == 0) - return 0; + goto out_audit; } /* * Beyond here an explicit relationship is required. @@ -148,28 +159,36 @@ int smk_access(char *subject_label, char *object_label, int request) * This is a bit map operation. */ if ((request & may) == request) - return 0; - - return -EACCES; + goto out_audit; + + rc = -EACCES; +out_audit: +#ifdef CONFIG_AUDIT + if (a) + smack_log(subject_label, object_label, request, rc, a); +#endif + return rc; } /** * smk_curacc - determine if current has a specific access to an object * @obj_label: a pointer to the object's Smack label * @mode: the access requested, in "MAY" format + * @a : common audit data * * This function checks the current subject label/object label pair * in the access rule list and returns 0 if the access is permitted, * non zero otherwise. It allows that current may have the capability * to override the rules. */ -int smk_curacc(char *obj_label, u32 mode) +int smk_curacc(char *obj_label, u32 mode, struct smk_audit_info *a) { int rc; + char *sp = current_security(); - rc = smk_access(current_security(), obj_label, mode); + rc = smk_access(sp, obj_label, mode, NULL); if (rc == 0) - return 0; + goto out_audit; /* * Return if a specific label has been designated as the @@ -177,14 +196,105 @@ int smk_curacc(char *obj_label, u32 mode) * have that label. */ if (smack_onlycap != NULL && smack_onlycap != current->cred->security) - return rc; + goto out_audit; if (capable(CAP_MAC_OVERRIDE)) return 0; +out_audit: +#ifdef CONFIG_AUDIT + if (a) + smack_log(sp, obj_label, mode, rc, a); +#endif return rc; } +#ifdef CONFIG_AUDIT +/** + * smack_str_from_perm : helper to transalate an int to a + * readable string + * @string : the string to fill + * @access : the int + * + */ +static inline void smack_str_from_perm(char *string, int access) +{ + int i = 0; + if (access & MAY_READ) + string[i++] = 'r'; + if (access & MAY_WRITE) + string[i++] = 'w'; + if (access & MAY_EXEC) + string[i++] = 'x'; + if (access & MAY_APPEND) + string[i++] = 'a'; + string[i] = '\0'; +} +/** + * smack_log_callback - SMACK specific information + * will be called by generic audit code + * @ab : the audit_buffer + * @a : audit_data + * + */ +static void smack_log_callback(struct audit_buffer *ab, void *a) +{ + struct common_audit_data *ad = a; + struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data; + audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function, + sad->result ? "denied" : "granted"); + audit_log_format(ab, " subject="); + audit_log_untrustedstring(ab, sad->subject); + audit_log_format(ab, " object="); + audit_log_untrustedstring(ab, sad->object); + audit_log_format(ab, " requested=%s", sad->request); +} + +/** + * smack_log - Audit the granting or denial of permissions. + * @subject_label : smack label of the requester + * @object_label : smack label of the object being accessed + * @request: requested permissions + * @result: result from smk_access + * @a: auxiliary audit data + * + * Audit the granting or denial of permissions in accordance + * with the policy. + */ +void smack_log(char *subject_label, char *object_label, int request, + int result, struct smk_audit_info *ad) +{ + char request_buffer[SMK_NUM_ACCESS_TYPE + 1]; + struct smack_audit_data *sad; + struct common_audit_data *a = &ad->a; + + /* check if we have to log the current event */ + if (result != 0 && (log_policy & SMACK_AUDIT_DENIED) == 0) + return; + if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0) + return; + + if (a->function == NULL) + a->function = "unknown"; + + /* end preparing the audit data */ + sad = &a->lsm_priv.smack_audit_data; + smack_str_from_perm(request_buffer, request); + sad->subject = subject_label; + sad->object = object_label; + sad->request = request_buffer; + sad->result = result; + a->lsm_pre_audit = smack_log_callback; + + common_lsm_audit(a); +} +#else /* #ifdef CONFIG_AUDIT */ +void smack_log(char *subject_label, char *object_label, int request, + int result, struct smk_audit_info *ad) +{ +} +#endif + static DEFINE_MUTEX(smack_known_lock); /** @@ -209,7 +319,8 @@ struct smack_known *smk_import_entry(const char *string, int len) if (found) smack[i] = '\0'; else if (i >= len || string[i] > '~' || string[i] <= ' ' || - string[i] == '/') { + string[i] == '/' || string[i] == '"' || + string[i] == '\\' || string[i] == '\'') { smack[i] = '\0'; found = 1; } else diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 98b3195347a..0023182078c 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -30,7 +30,6 @@ #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <linux/audit.h> - #include "smack.h" #define task_security(task) (task_cred_xxx((task), security)) @@ -103,14 +102,24 @@ struct inode_smack *new_inode_smack(char *smack) static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) { int rc; + struct smk_audit_info ad; + char *sp, *tsp; rc = cap_ptrace_may_access(ctp, mode); if (rc != 0) return rc; - rc = smk_access(current_security(), task_security(ctp), MAY_READWRITE); + sp = current_security(); + tsp = task_security(ctp); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, ctp); + + /* we won't log here, because rc can be overriden */ + rc = smk_access(sp, tsp, MAY_READWRITE, NULL); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) - return 0; + rc = 0; + + smack_log(sp, tsp, MAY_READWRITE, rc, &ad); return rc; } @@ -125,14 +134,24 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) static int smack_ptrace_traceme(struct task_struct *ptp) { int rc; + struct smk_audit_info ad; + char *sp, *tsp; rc = cap_ptrace_traceme(ptp); if (rc != 0) return rc; - rc = smk_access(task_security(ptp), current_security(), MAY_READWRITE); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, ptp); + + sp = current_security(); + tsp = task_security(ptp); + /* we won't log here, because rc can be overriden */ + rc = smk_access(tsp, sp, MAY_READWRITE, NULL); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) - return 0; + rc = 0; + + smack_log(tsp, sp, MAY_READWRITE, rc, &ad); return rc; } @@ -327,8 +346,14 @@ static int smack_sb_kern_mount(struct super_block *sb, int flags, void *data) static int smack_sb_statfs(struct dentry *dentry) { struct superblock_smack *sbp = dentry->d_sb->s_security; + int rc; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); - return smk_curacc(sbp->smk_floor, MAY_READ); + rc = smk_curacc(sbp->smk_floor, MAY_READ, &ad); + return rc; } /** @@ -346,8 +371,12 @@ static int smack_sb_mount(char *dev_name, struct path *path, char *type, unsigned long flags, void *data) { struct superblock_smack *sbp = path->mnt->mnt_sb->s_security; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path(&ad, *path); - return smk_curacc(sbp->smk_floor, MAY_WRITE); + return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad); } /** @@ -361,10 +390,14 @@ static int smack_sb_mount(char *dev_name, struct path *path, static int smack_sb_umount(struct vfsmount *mnt, int flags) { struct superblock_smack *sbp; + struct smk_audit_info ad; - sbp = mnt->mnt_sb->s_security; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, mnt->mnt_mountpoint); + smk_ad_setfield_u_fs_path_mnt(&ad, mnt); - return smk_curacc(sbp->smk_floor, MAY_WRITE); + sbp = mnt->mnt_sb->s_security; + return smk_curacc(sbp->smk_floor, MAY_WRITE, &ad); } /* @@ -441,15 +474,20 @@ static int smack_inode_init_security(struct inode *inode, struct inode *dir, static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { - int rc; char *isp; + struct smk_audit_info ad; + int rc; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry); isp = smk_of_inode(old_dentry->d_inode); - rc = smk_curacc(isp, MAY_WRITE); + rc = smk_curacc(isp, MAY_WRITE, &ad); if (rc == 0 && new_dentry->d_inode != NULL) { isp = smk_of_inode(new_dentry->d_inode); - rc = smk_curacc(isp, MAY_WRITE); + smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry); + rc = smk_curacc(isp, MAY_WRITE, &ad); } return rc; @@ -466,18 +504,24 @@ static int smack_inode_link(struct dentry *old_dentry, struct inode *dir, static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) { struct inode *ip = dentry->d_inode; + struct smk_audit_info ad; int rc; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + /* * You need write access to the thing you're unlinking */ - rc = smk_curacc(smk_of_inode(ip), MAY_WRITE); - if (rc == 0) + rc = smk_curacc(smk_of_inode(ip), MAY_WRITE, &ad); + if (rc == 0) { /* * You also need write access to the containing directory */ - rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); - + smk_ad_setfield_u_fs_path_dentry(&ad, NULL); + smk_ad_setfield_u_fs_inode(&ad, dir); + rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad); + } return rc; } @@ -491,17 +535,24 @@ static int smack_inode_unlink(struct inode *dir, struct dentry *dentry) */ static int smack_inode_rmdir(struct inode *dir, struct dentry *dentry) { + struct smk_audit_info ad; int rc; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + /* * You need write access to the thing you're removing */ - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); - if (rc == 0) + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); + if (rc == 0) { /* * You also need write access to the containing directory */ - rc = smk_curacc(smk_of_inode(dir), MAY_WRITE); + smk_ad_setfield_u_fs_path_dentry(&ad, NULL); + smk_ad_setfield_u_fs_inode(&ad, dir); + rc = smk_curacc(smk_of_inode(dir), MAY_WRITE, &ad); + } return rc; } @@ -525,15 +576,19 @@ static int smack_inode_rename(struct inode *old_inode, { int rc; char *isp; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, old_dentry); isp = smk_of_inode(old_dentry->d_inode); - rc = smk_curacc(isp, MAY_READWRITE); + rc = smk_curacc(isp, MAY_READWRITE, &ad); if (rc == 0 && new_dentry->d_inode != NULL) { isp = smk_of_inode(new_dentry->d_inode); - rc = smk_curacc(isp, MAY_READWRITE); + smk_ad_setfield_u_fs_path_dentry(&ad, new_dentry); + rc = smk_curacc(isp, MAY_READWRITE, &ad); } - return rc; } @@ -548,13 +603,15 @@ static int smack_inode_rename(struct inode *old_inode, */ static int smack_inode_permission(struct inode *inode, int mask) { + struct smk_audit_info ad; /* * No permission to check. Existence test. Yup, it's there. */ if (mask == 0) return 0; - - return smk_curacc(smk_of_inode(inode), mask); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_inode(&ad, inode); + return smk_curacc(smk_of_inode(inode), mask, &ad); } /** @@ -566,13 +623,16 @@ static int smack_inode_permission(struct inode *inode, int mask) */ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) { + struct smk_audit_info ad; /* * Need to allow for clearing the setuid bit. */ if (iattr->ia_valid & ATTR_FORCE) return 0; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); - return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); } /** @@ -584,7 +644,12 @@ static int smack_inode_setattr(struct dentry *dentry, struct iattr *iattr) */ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) { - return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + smk_ad_setfield_u_fs_path_mnt(&ad, mnt); + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad); } /** @@ -602,6 +667,7 @@ static int smack_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) static int smack_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { + struct smk_audit_info ad; int rc = 0; if (strcmp(name, XATTR_NAME_SMACK) == 0 || @@ -619,8 +685,11 @@ static int smack_inode_setxattr(struct dentry *dentry, const char *name, } else rc = cap_inode_setxattr(dentry, name, value, size, flags); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + if (rc == 0) - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); return rc; } @@ -672,7 +741,12 @@ static void smack_inode_post_setxattr(struct dentry *dentry, const char *name, */ static int smack_inode_getxattr(struct dentry *dentry, const char *name) { - return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ); + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); + + return smk_curacc(smk_of_inode(dentry->d_inode), MAY_READ, &ad); } /* @@ -686,6 +760,7 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name) */ static int smack_inode_removexattr(struct dentry *dentry, const char *name) { + struct smk_audit_info ad; int rc = 0; if (strcmp(name, XATTR_NAME_SMACK) == 0 || @@ -696,8 +771,10 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name) } else rc = cap_inode_removexattr(dentry, name); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, dentry); if (rc == 0) - rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE); + rc = smk_curacc(smk_of_inode(dentry->d_inode), MAY_WRITE, &ad); return rc; } @@ -856,12 +933,16 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int rc = 0; + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path(&ad, file->f_path); if (_IOC_DIR(cmd) & _IOC_WRITE) - rc = smk_curacc(file->f_security, MAY_WRITE); + rc = smk_curacc(file->f_security, MAY_WRITE, &ad); if (rc == 0 && (_IOC_DIR(cmd) & _IOC_READ)) - rc = smk_curacc(file->f_security, MAY_READ); + rc = smk_curacc(file->f_security, MAY_READ, &ad); return rc; } @@ -875,7 +956,11 @@ static int smack_file_ioctl(struct file *file, unsigned int cmd, */ static int smack_file_lock(struct file *file, unsigned int cmd) { - return smk_curacc(file->f_security, MAY_WRITE); + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path_dentry(&ad, file->f_path.dentry); + return smk_curacc(file->f_security, MAY_WRITE, &ad); } /** @@ -889,8 +974,12 @@ static int smack_file_lock(struct file *file, unsigned int cmd) static int smack_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { + struct smk_audit_info ad; int rc; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); + smk_ad_setfield_u_fs_path(&ad, file->f_path); + switch (cmd) { case F_DUPFD: case F_GETFD: @@ -898,7 +987,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, case F_GETLK: case F_GETOWN: case F_GETSIG: - rc = smk_curacc(file->f_security, MAY_READ); + rc = smk_curacc(file->f_security, MAY_READ, &ad); break; case F_SETFD: case F_SETFL: @@ -906,10 +995,10 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, case F_SETLKW: case F_SETOWN: case F_SETSIG: - rc = smk_curacc(file->f_security, MAY_WRITE); + rc = smk_curacc(file->f_security, MAY_WRITE, &ad); break; default: - rc = smk_curacc(file->f_security, MAY_READWRITE); + rc = smk_curacc(file->f_security, MAY_READWRITE, &ad); } return rc; @@ -944,14 +1033,21 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, { struct file *file; int rc; + char *tsp = tsk->cred->security; + struct smk_audit_info ad; /* * struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); + /* we don't log here as rc can be overriden */ + rc = smk_access(file->f_security, tsp, MAY_WRITE, NULL); if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) - return 0; + rc = 0; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, tsk); + smack_log(file->f_security, tsp, MAY_WRITE, rc, &ad); return rc; } @@ -964,7 +1060,10 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, static int smack_file_receive(struct file *file) { int may = 0; + struct smk_audit_info ad; + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_fs_path(&ad, file->f_path); /* * This code relies on bitmasks. */ @@ -973,7 +1072,7 @@ static int smack_file_receive(struct file *file) if (file->f_mode & FMODE_WRITE) may |= MAY_WRITE; - return smk_curacc(file->f_security, may); + return smk_curacc(file->f_security, may, &ad); } /* @@ -1053,6 +1152,22 @@ static int smack_kernel_create_files_as(struct cred *new, } /** + * smk_curacc_on_task - helper to log task related access + * @p: the task object + * @access : the access requested + * + * Return 0 if access is permitted + */ +static int smk_curacc_on_task(struct task_struct *p, int access) +{ + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, p); + return smk_curacc(task_security(p), access, &ad); +} + +/** * smack_task_setpgid - Smack check on setting pgid * @p: the task object * @pgid: unused @@ -1061,7 +1176,7 @@ static int smack_kernel_create_files_as(struct cred *new, */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(task_security(p), MAY_WRITE); + return smk_curacc_on_task(p, MAY_WRITE); } /** @@ -1072,7 +1187,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1083,7 +1198,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1111,7 +1226,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(task_security(p), MAY_WRITE); + rc = smk_curacc_on_task(p, MAY_WRITE); return rc; } @@ -1128,7 +1243,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(task_security(p), MAY_WRITE); + rc = smk_curacc_on_task(p, MAY_WRITE); return rc; } @@ -1140,7 +1255,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1158,7 +1273,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(task_security(p), MAY_WRITE); + rc = smk_curacc_on_task(p, MAY_WRITE); return rc; } @@ -1170,7 +1285,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_READ); + return smk_curacc_on_task(p, MAY_READ); } /** @@ -1181,7 +1296,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(task_security(p), MAY_WRITE); + return smk_curacc_on_task(p, MAY_WRITE); } /** @@ -1199,18 +1314,23 @@ static int smack_task_movememory(struct task_struct *p) static int smack_task_kill(struct task_struct *p, struct siginfo *info, int sig, u32 secid) { + struct smk_audit_info ad; + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, p); /* * Sending a signal requires that the sender * can write the receiver. */ if (secid == 0) - return smk_curacc(task_security(p), MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE, &ad); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE); + return smk_access(smack_from_secid(secid), task_security(p), + MAY_WRITE, &ad); } /** @@ -1221,11 +1341,15 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, */ static int smack_task_wait(struct task_struct *p) { + struct smk_audit_info ad; + char *sp = current_security(); + char *tsp = task_security(p); int rc; - rc = smk_access(current_security(), task_security(p), MAY_WRITE); + /* we don't log here, we can be overriden */ + rc = smk_access(sp, tsp, MAY_WRITE, NULL); if (rc == 0) - return 0; + goto out_log; /* * Allow the operation to succeed if either task @@ -1239,8 +1363,12 @@ static int smack_task_wait(struct task_struct *p) * the smack value. */ if (capable(CAP_MAC_OVERRIDE) || has_capability(p, CAP_MAC_OVERRIDE)) - return 0; - + rc = 0; + /* we log only if we didn't get overriden */ + out_log: + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_TASK); + smk_ad_setfield_u_tsk(&ad, p); + smack_log(sp, tsp, MAY_WRITE, rc, &ad); return rc; } @@ -1456,12 +1584,19 @@ static int smack_netlabel_send(struct sock *sk, struct sockaddr_in *sap) int sk_lbl; char *hostsp; struct socket_smack *ssp = sk->sk_security; + struct smk_audit_info ad; rcu_read_lock(); hostsp = smack_host_label(sap); if (hostsp != NULL) { sk_lbl = SMACK_UNLABELED_SOCKET; - rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + ad.a.u.net.family = sap->sin_family; + ad.a.u.net.dport = sap->sin_port; + ad.a.u.net.v4info.daddr = sap->sin_addr.s_addr; +#endif + rc = smk_access(ssp->smk_out, hostsp, MAY_WRITE, &ad); } else { sk_lbl = SMACK_CIPSO_SOCKET; rc = 0; @@ -1657,6 +1792,25 @@ static void smack_shm_free_security(struct shmid_kernel *shp) } /** + * smk_curacc_shm : check if current has access on shm + * @shp : the object + * @access : access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smk_curacc_shm(struct shmid_kernel *shp, int access) +{ + char *ssp = smack_of_shm(shp); + struct smk_audit_info ad; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = shp->shm_perm.id; +#endif + return smk_curacc(ssp, access, &ad); +} + +/** * smack_shm_associate - Smack access check for shm * @shp: the object * @shmflg: access requested @@ -1665,11 +1819,10 @@ static void smack_shm_free_security(struct shmid_kernel *shp) */ static int smack_shm_associate(struct shmid_kernel *shp, int shmflg) { - char *ssp = smack_of_shm(shp); int may; may = smack_flags_to_may(shmflg); - return smk_curacc(ssp, may); + return smk_curacc_shm(shp, may); } /** @@ -1681,7 +1834,6 @@ static int smack_shm_associate(struct shmid_kernel *shp, int shmflg) */ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) { - char *ssp; int may; switch (cmd) { @@ -1704,9 +1856,7 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) default: return -EINVAL; } - - ssp = smack_of_shm(shp); - return smk_curacc(ssp, may); + return smk_curacc_shm(shp, may); } /** @@ -1720,11 +1870,10 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) static int smack_shm_shmat(struct shmid_kernel *shp, char __user *shmaddr, int shmflg) { - char *ssp = smack_of_shm(shp); int may; may = smack_flags_to_may(shmflg); - return smk_curacc(ssp, may); + return smk_curacc_shm(shp, may); } /** @@ -1766,6 +1915,25 @@ static void smack_sem_free_security(struct sem_array *sma) } /** + * smk_curacc_sem : check if current has access on sem + * @sma : the object + * @access : access requested + * + * Returns 0 if current has the requested access, error code otherwise + */ +static int smk_curacc_sem(struct sem_array *sma, int access) +{ + char *ssp = smack_of_sem(sma); + struct smk_audit_info ad; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = sma->sem_perm.id; +#endif + return smk_curacc(ssp, access, &ad); +} + +/** * smack_sem_associate - Smack access check for sem * @sma: the object * @semflg: access requested @@ -1774,11 +1942,10 @@ static void smack_sem_free_security(struct sem_array *sma) */ static int smack_sem_associate(struct sem_array *sma, int semflg) { - char *ssp = smack_of_sem(sma); int may; may = smack_flags_to_may(semflg); - return smk_curacc(ssp, may); + return smk_curacc_sem(sma, may); } /** @@ -1790,7 +1957,6 @@ static int smack_sem_associate(struct sem_array *sma, int semflg) */ static int smack_sem_semctl(struct sem_array *sma, int cmd) { - char *ssp; int may; switch (cmd) { @@ -1819,8 +1985,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd) return -EINVAL; } - ssp = smack_of_sem(sma); - return smk_curacc(ssp, may); + return smk_curacc_sem(sma, may); } /** @@ -1837,9 +2002,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd) static int smack_sem_semop(struct sem_array *sma, struct sembuf *sops, unsigned nsops, int alter) { - char *ssp = smack_of_sem(sma); - - return smk_curacc(ssp, MAY_READWRITE); + return smk_curacc_sem(sma, MAY_READWRITE); } /** @@ -1881,6 +2044,25 @@ static char *smack_of_msq(struct msg_queue *msq) } /** + * smk_curacc_msq : helper to check if current has access on msq + * @msq : the msq + * @access : access requested + * + * return 0 if current has access, error otherwise + */ +static int smk_curacc_msq(struct msg_queue *msq, int access) +{ + char *msp = smack_of_msq(msq); + struct smk_audit_info ad; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = msq->q_perm.id; +#endif + return smk_curacc(msp, access, &ad); +} + +/** * smack_msg_queue_associate - Smack access check for msg_queue * @msq: the object * @msqflg: access requested @@ -1889,11 +2071,10 @@ static char *smack_of_msq(struct msg_queue *msq) */ static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg) { - char *msp = smack_of_msq(msq); int may; may = smack_flags_to_may(msqflg); - return smk_curacc(msp, may); + return smk_curacc_msq(msq, may); } /** @@ -1905,7 +2086,6 @@ static int smack_msg_queue_associate(struct msg_queue *msq, int msqflg) */ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) { - char *msp; int may; switch (cmd) { @@ -1927,8 +2107,7 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) return -EINVAL; } - msp = smack_of_msq(msq); - return smk_curacc(msp, may); + return smk_curacc_msq(msq, may); } /** @@ -1942,11 +2121,10 @@ static int smack_msg_queue_msgctl(struct msg_queue *msq, int cmd) static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, int msqflg) { - char *msp = smack_of_msq(msq); - int rc; + int may; - rc = smack_flags_to_may(msqflg); - return smk_curacc(msp, rc); + may = smack_flags_to_may(msqflg); + return smk_curacc_msq(msq, may); } /** @@ -1962,9 +2140,7 @@ static int smack_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct task_struct *target, long type, int mode) { - char *msp = smack_of_msq(msq); - - return smk_curacc(msp, MAY_READWRITE); + return smk_curacc_msq(msq, MAY_READWRITE); } /** @@ -1977,10 +2153,14 @@ static int smack_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, static int smack_ipc_permission(struct kern_ipc_perm *ipp, short flag) { char *isp = ipp->security; - int may; + int may = smack_flags_to_may(flag); + struct smk_audit_info ad; - may = smack_flags_to_may(flag); - return smk_curacc(isp, may); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_IPC); + ad.a.u.ipc_id = ipp->id; +#endif + return smk_curacc(isp, may, &ad); } /** @@ -2239,8 +2419,12 @@ static int smack_unix_stream_connect(struct socket *sock, { struct inode *sp = SOCK_INODE(sock); struct inode *op = SOCK_INODE(other); + struct smk_audit_info ad; - return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_READWRITE); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + smk_ad_setfield_u_net_sk(&ad, other->sk); + return smk_access(smk_of_inode(sp), smk_of_inode(op), + MAY_READWRITE, &ad); } /** @@ -2255,8 +2439,11 @@ static int smack_unix_may_send(struct socket *sock, struct socket *other) { struct inode *sp = SOCK_INODE(sock); struct inode *op = SOCK_INODE(other); + struct smk_audit_info ad; - return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE); + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + smk_ad_setfield_u_net_sk(&ad, other->sk); + return smk_access(smk_of_inode(sp), smk_of_inode(op), MAY_WRITE, &ad); } /** @@ -2371,7 +2558,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) char smack[SMK_LABELLEN]; char *csp; int rc; - + struct smk_audit_info ad; if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) return 0; @@ -2389,13 +2576,19 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) netlbl_secattr_destroy(&secattr); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + ad.a.u.net.family = sk->sk_family; + ad.a.u.net.netif = skb->iif; + ipv4_skb_to_auditdata(skb, &ad.a, NULL); +#endif /* * Receiving a packet requires that the other end * be able to write here. Read access is not required. * This is the simplist possible security model * for networking. */ - rc = smk_access(csp, ssp->smk_in, MAY_WRITE); + rc = smk_access(csp, ssp->smk_in, MAY_WRITE, &ad); if (rc != 0) netlbl_skbuff_err(skb, rc, 0); return rc; @@ -2524,6 +2717,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, struct iphdr *hdr; char smack[SMK_LABELLEN]; int rc; + struct smk_audit_info ad; /* handle mapped IPv4 packets arriving via IPv6 sockets */ if (family == PF_INET6 && skb->protocol == htons(ETH_P_IP)) @@ -2537,11 +2731,17 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, strncpy(smack, smack_known_huh.smk_known, SMK_MAXLEN); netlbl_secattr_destroy(&secattr); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NET); + ad.a.u.net.family = family; + ad.a.u.net.netif = skb->iif; + ipv4_skb_to_auditdata(skb, &ad.a, NULL); +#endif /* * Receiving a packet requires that the other end be able to write * here. Read access is not required. */ - rc = smk_access(smack, ssp->smk_in, MAY_WRITE); + rc = smk_access(smack, ssp->smk_in, MAY_WRITE, &ad); if (rc != 0) return rc; @@ -2643,6 +2843,7 @@ static int smack_key_permission(key_ref_t key_ref, const struct cred *cred, key_perm_t perm) { struct key *keyp; + struct smk_audit_info ad; keyp = key_ref_to_ptr(key_ref); if (keyp == NULL) @@ -2658,8 +2859,13 @@ static int smack_key_permission(key_ref_t key_ref, */ if (cred->security == NULL) return -EACCES; - - return smk_access(cred->security, keyp->security, MAY_READWRITE); +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY); + ad.a.u.key_struct.key = keyp->serial; + ad.a.u.key_struct.key_desc = keyp->description; +#endif + return smk_access(cred->security, keyp->security, + MAY_READWRITE, &ad); } #endif /* CONFIG_KEYS */ @@ -2828,15 +3034,7 @@ struct security_operations smack_ops = { .ptrace_may_access = smack_ptrace_may_access, .ptrace_traceme = smack_ptrace_traceme, - .capget = cap_capget, - .capset = cap_capset, - .capable = cap_capable, .syslog = smack_syslog, - .settime = cap_settime, - .vm_enough_memory = cap_vm_enough_memory, - - .bprm_set_creds = cap_bprm_set_creds, - .bprm_secureexec = cap_bprm_secureexec, .sb_alloc_security = smack_sb_alloc_security, .sb_free_security = smack_sb_free_security, @@ -2860,8 +3058,6 @@ struct security_operations smack_ops = { .inode_post_setxattr = smack_inode_post_setxattr, .inode_getxattr = smack_inode_getxattr, .inode_removexattr = smack_inode_removexattr, - .inode_need_killpriv = cap_inode_need_killpriv, - .inode_killpriv = cap_inode_killpriv, .inode_getsecurity = smack_inode_getsecurity, .inode_setsecurity = smack_inode_setsecurity, .inode_listsecurity = smack_inode_listsecurity, @@ -2882,7 +3078,6 @@ struct security_operations smack_ops = { .cred_commit = smack_cred_commit, .kernel_act_as = smack_kernel_act_as, .kernel_create_files_as = smack_kernel_create_files_as, - .task_fix_setuid = cap_task_fix_setuid, .task_setpgid = smack_task_setpgid, .task_getpgid = smack_task_getpgid, .task_getsid = smack_task_getsid, @@ -2896,7 +3091,6 @@ struct security_operations smack_ops = { .task_kill = smack_task_kill, .task_wait = smack_task_wait, .task_to_inode = smack_task_to_inode, - .task_prctl = cap_task_prctl, .ipc_permission = smack_ipc_permission, .ipc_getsecid = smack_ipc_getsecid, @@ -2923,9 +3117,6 @@ struct security_operations smack_ops = { .sem_semctl = smack_sem_semctl, .sem_semop = smack_sem_semop, - .netlink_send = cap_netlink_send, - .netlink_recv = cap_netlink_recv, - .d_instantiate = smack_d_instantiate, .getprocattr = smack_getprocattr, diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 11d2cb19d7a..f83a8098072 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -41,6 +41,7 @@ enum smk_inos { SMK_AMBIENT = 7, /* internet ambient label */ SMK_NETLBLADDR = 8, /* single label hosts */ SMK_ONLYCAP = 9, /* the only "capable" label */ + SMK_LOGGING = 10, /* logging */ }; /* @@ -775,7 +776,7 @@ static ssize_t smk_write_netlbladdr(struct file *file, const char __user *buf, struct sockaddr_in newname; char smack[SMK_LABELLEN]; char *sp; - char data[SMK_NETLBLADDRMAX]; + char data[SMK_NETLBLADDRMAX + 1]; char *host = (char *)&newname.sin_addr.s_addr; int rc; struct netlbl_audit audit_info; @@ -1192,6 +1193,69 @@ static const struct file_operations smk_onlycap_ops = { }; /** + * smk_read_logging - read() for /smack/logging + * @filp: file pointer, not actually used + * @buf: where to put the result + * @cn: maximum to send along + * @ppos: where to start + * + * Returns number of bytes read or error code, as appropriate + */ +static ssize_t smk_read_logging(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[32]; + ssize_t rc; + + if (*ppos != 0) + return 0; + + sprintf(temp, "%d\n", log_policy); + rc = simple_read_from_buffer(buf, count, ppos, temp, strlen(temp)); + return rc; +} + +/** + * smk_write_logging - write() for /smack/logging + * @file: file pointer, not actually used + * @buf: where to get the data from + * @count: bytes sent + * @ppos: where to start + * + * Returns number of bytes written or error code, as appropriate + */ +static ssize_t smk_write_logging(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char temp[32]; + int i; + + if (!capable(CAP_MAC_ADMIN)) + return -EPERM; + + if (count >= sizeof(temp) || count == 0) + return -EINVAL; + + if (copy_from_user(temp, buf, count) != 0) + return -EFAULT; + + temp[count] = '\0'; + + if (sscanf(temp, "%d", &i) != 1) + return -EINVAL; + if (i < 0 || i > 3) + return -EINVAL; + log_policy = i; + return count; +} + + + +static const struct file_operations smk_logging_ops = { + .read = smk_read_logging, + .write = smk_write_logging, +}; +/** * smk_fill_super - fill the /smackfs superblock * @sb: the empty superblock * @data: unused @@ -1221,6 +1285,8 @@ static int smk_fill_super(struct super_block *sb, void *data, int silent) {"netlabel", &smk_netlbladdr_ops, S_IRUGO|S_IWUSR}, [SMK_ONLYCAP] = {"onlycap", &smk_onlycap_ops, S_IRUGO|S_IWUSR}, + [SMK_LOGGING] = + {"logging", &smk_logging_ops, S_IRUGO|S_IWUSR}, /* last one */ {""} }; diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index ddfb9cccf46..fdd1f4b8c44 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -28,7 +28,13 @@ static const char *tomoyo_mode_2[4] = { "disabled", "enabled", "enabled", "enabled" }; -/* Table for profile. */ +/* + * tomoyo_control_array is a static data which contains + * + * (1) functionality name used by /sys/kernel/security/tomoyo/profile . + * (2) initial values for "struct tomoyo_profile". + * (3) max values for "struct tomoyo_profile". + */ static struct { const char *keyword; unsigned int current_value; @@ -39,7 +45,13 @@ static struct { [TOMOYO_VERBOSE] = { "TOMOYO_VERBOSE", 1, 1 }, }; -/* Profile table. Memory is allocated as needed. */ +/* + * tomoyo_profile is a structure which is used for holding the mode of access + * controls. TOMOYO has 4 modes: disabled, learning, permissive, enforcing. + * An administrator can define up to 256 profiles. + * The ->profile of "struct tomoyo_domain_info" is used for remembering + * the profile's number (0 - 255) assigned to that domain. + */ static struct tomoyo_profile { unsigned int value[TOMOYO_MAX_CONTROL_INDEX]; const struct tomoyo_path_info *comment; @@ -428,7 +440,6 @@ void tomoyo_fill_path_info(struct tomoyo_path_info *ptr) const char *name = ptr->name; const int len = strlen(name); - ptr->total_len = len; ptr->const_len = tomoyo_const_part_length(name); ptr->is_dir = len && (name[len - 1] == '/'); ptr->is_patterned = (ptr->const_len < len); @@ -866,7 +877,6 @@ static struct tomoyo_profile *tomoyo_find_or_assign_new_profile(const unsigned if (profile >= TOMOYO_MAX_PROFILES) return NULL; - /***** EXCLUSIVE SECTION START *****/ mutex_lock(&lock); ptr = tomoyo_profile_ptr[profile]; if (ptr) @@ -880,7 +890,6 @@ static struct tomoyo_profile *tomoyo_find_or_assign_new_profile(const unsigned tomoyo_profile_ptr[profile] = ptr; ok: mutex_unlock(&lock); - /***** EXCLUSIVE SECTION END *****/ return ptr; } @@ -1009,7 +1018,19 @@ static int tomoyo_read_profile(struct tomoyo_io_buffer *head) return 0; } -/* Structure for policy manager. */ +/* + * tomoyo_policy_manager_entry is a structure which is used for holding list of + * domainnames or programs which are permitted to modify configuration via + * /sys/kernel/security/tomoyo/ interface. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_policy_manager_list . + * (2) "manager" is a domainname or a program's pathname. + * (3) "is_domain" is a bool which is true if "manager" is a domainname, false + * otherwise. + * (4) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + */ struct tomoyo_policy_manager_entry { struct list_head list; /* A path to program or a domainname. */ @@ -1018,7 +1039,36 @@ struct tomoyo_policy_manager_entry { bool is_deleted; /* True if this entry is deleted. */ }; -/* The list for "struct tomoyo_policy_manager_entry". */ +/* + * tomoyo_policy_manager_list is used for holding list of domainnames or + * programs which are permitted to modify configuration via + * /sys/kernel/security/tomoyo/ interface. + * + * An entry is added by + * + * # echo '<kernel> /sbin/mingetty /bin/login /bin/bash' > \ + * /sys/kernel/security/tomoyo/manager + * (if you want to specify by a domainname) + * + * or + * + * # echo '/usr/lib/ccs/editpolicy' > /sys/kernel/security/tomoyo/manager + * (if you want to specify by a program's location) + * + * and is deleted by + * + * # echo 'delete <kernel> /sbin/mingetty /bin/login /bin/bash' > \ + * /sys/kernel/security/tomoyo/manager + * + * or + * + * # echo 'delete /usr/lib/ccs/editpolicy' > \ + * /sys/kernel/security/tomoyo/manager + * + * and all entries are retrieved by + * + * # cat /sys/kernel/security/tomoyo/manager + */ static LIST_HEAD(tomoyo_policy_manager_list); static DECLARE_RWSEM(tomoyo_policy_manager_list_lock); @@ -1050,7 +1100,6 @@ static int tomoyo_update_manager_entry(const char *manager, saved_manager = tomoyo_save_name(manager); if (!saved_manager) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_policy_manager_list_lock); list_for_each_entry(ptr, &tomoyo_policy_manager_list, list) { if (ptr->manager != saved_manager) @@ -1072,7 +1121,6 @@ static int tomoyo_update_manager_entry(const char *manager, error = 0; out: up_write(&tomoyo_policy_manager_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -1117,10 +1165,9 @@ static int tomoyo_read_manager_policy(struct tomoyo_io_buffer *head) list); if (ptr->is_deleted) continue; - if (!tomoyo_io_printf(head, "%s\n", ptr->manager->name)) { - done = false; + done = tomoyo_io_printf(head, "%s\n", ptr->manager->name); + if (!done) break; - } } up_read(&tomoyo_policy_manager_list_lock); head->read_eof = done; @@ -1197,13 +1244,11 @@ static bool tomoyo_is_select_one(struct tomoyo_io_buffer *head, if (sscanf(data, "pid=%u", &pid) == 1) { struct task_struct *p; - /***** CRITICAL SECTION START *****/ read_lock(&tasklist_lock); p = find_task_by_vpid(pid); if (p) domain = tomoyo_real_domain(p); read_unlock(&tasklist_lock); - /***** CRITICAL SECTION END *****/ } else if (!strncmp(data, "domain=", 7)) { if (tomoyo_is_domain_def(data + 7)) { down_read(&tomoyo_domain_list_lock); @@ -1447,15 +1492,14 @@ static int tomoyo_read_domain_policy(struct tomoyo_io_buffer *head) TOMOYO_DOMAIN_FLAGS_IGNORE_GLOBAL_ALLOW_READ) ignore_global_allow_read = TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "\n"; - if (!tomoyo_io_printf(head, - "%s\n" TOMOYO_KEYWORD_USE_PROFILE "%u\n" - "%s%s%s\n", domain->domainname->name, - domain->profile, quota_exceeded, - transition_failed, - ignore_global_allow_read)) { - done = false; + done = tomoyo_io_printf(head, "%s\n" TOMOYO_KEYWORD_USE_PROFILE + "%u\n%s%s%s\n", + domain->domainname->name, + domain->profile, quota_exceeded, + transition_failed, + ignore_global_allow_read); + if (!done) break; - } head->read_step = 2; acl_loop: if (head->read_step == 3) @@ -1463,24 +1507,22 @@ acl_loop: /* Print ACL entries in the domain. */ down_read(&tomoyo_domain_acl_info_list_lock); list_for_each_cookie(apos, head->read_var2, - &domain->acl_info_list) { + &domain->acl_info_list) { struct tomoyo_acl_info *ptr = list_entry(apos, struct tomoyo_acl_info, - list); - if (!tomoyo_print_entry(head, ptr)) { - done = false; + list); + done = tomoyo_print_entry(head, ptr); + if (!done) break; - } } up_read(&tomoyo_domain_acl_info_list_lock); if (!done) break; head->read_step = 3; tail_mark: - if (!tomoyo_io_printf(head, "\n")) { - done = false; + done = tomoyo_io_printf(head, "\n"); + if (!done) break; - } head->read_step = 1; if (head->read_single_domain) break; @@ -1550,11 +1592,10 @@ static int tomoyo_read_domain_profile(struct tomoyo_io_buffer *head) domain = list_entry(pos, struct tomoyo_domain_info, list); if (domain->is_deleted) continue; - if (!tomoyo_io_printf(head, "%u %s\n", domain->profile, - domain->domainname->name)) { - done = false; + done = tomoyo_io_printf(head, "%u %s\n", domain->profile, + domain->domainname->name); + if (!done) break; - } } up_read(&tomoyo_domain_list_lock); head->read_eof = done; @@ -1594,13 +1635,11 @@ static int tomoyo_read_pid(struct tomoyo_io_buffer *head) const int pid = head->read_step; struct task_struct *p; struct tomoyo_domain_info *domain = NULL; - /***** CRITICAL SECTION START *****/ read_lock(&tasklist_lock); p = find_task_by_vpid(pid); if (p) domain = tomoyo_real_domain(p); read_unlock(&tasklist_lock); - /***** CRITICAL SECTION END *****/ if (domain) tomoyo_io_printf(head, "%d %u %s", pid, domain->profile, domain->domainname->name); @@ -2138,7 +2177,13 @@ static ssize_t tomoyo_write(struct file *file, const char __user *buf, return tomoyo_write_control(file, buf, count); } -/* Operations for /sys/kernel/security/tomoyo/ interface. */ +/* + * tomoyo_operations is a "struct file_operations" which is used for handling + * /sys/kernel/security/tomoyo/ interface. + * + * Some files under /sys/kernel/security/tomoyo/ directory accept open(O_RDWR). + * See tomoyo_io_buffer for internals. + */ static const struct file_operations tomoyo_operations = { .open = tomoyo_open, .release = tomoyo_release, diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 678f4ff16aa..6d6ba09af45 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -26,16 +26,43 @@ struct dentry; struct vfsmount; -/* Temporary buffer for holding pathnames. */ +/* + * tomoyo_page_buffer is a structure which is used for holding a pathname + * obtained from "struct dentry" and "struct vfsmount" pair. + * As of now, it is 4096 bytes. If users complain that 4096 bytes is too small + * (because TOMOYO escapes non ASCII printable characters using \ooo format), + * we will make the buffer larger. + */ struct tomoyo_page_buffer { char buffer[4096]; }; -/* Structure for holding a token. */ +/* + * tomoyo_path_info is a structure which is used for holding a string data + * used by TOMOYO. + * This structure has several fields for supporting pattern matching. + * + * (1) "name" is the '\0' terminated string data. + * (2) "hash" is full_name_hash(name, strlen(name)). + * This allows tomoyo_pathcmp() to compare by hash before actually compare + * using strcmp(). + * (3) "const_len" is the length of the initial segment of "name" which + * consists entirely of non wildcard characters. In other words, the length + * which we can compare two strings using strncmp(). + * (4) "is_dir" is a bool which is true if "name" ends with "/", + * false otherwise. + * TOMOYO distinguishes directory and non-directory. A directory ends with + * "/" and non-directory does not end with "/". + * (5) "is_patterned" is a bool which is true if "name" contains wildcard + * characters, false otherwise. This allows TOMOYO to use "hash" and + * strcmp() for string comparison if "is_patterned" is false. + * (6) "depth" is calculated using the number of "/" characters in "name". + * This allows TOMOYO to avoid comparing two pathnames which never match + * (e.g. whether "/var/www/html/index.html" matches "/tmp/sh-thd-\$"). + */ struct tomoyo_path_info { const char *name; u32 hash; /* = full_name_hash(name, strlen(name)) */ - u16 total_len; /* = strlen(name) */ u16 const_len; /* = tomoyo_const_part_length(name) */ bool is_dir; /* = tomoyo_strendswith(name, "/") */ bool is_patterned; /* = tomoyo_path_contains_pattern(name) */ @@ -51,7 +78,20 @@ struct tomoyo_path_info { */ #define TOMOYO_MAX_PATHNAME_LEN 4000 -/* Structure for holding requested pathname. */ +/* + * tomoyo_path_info_with_data is a structure which is used for holding a + * pathname obtained from "struct dentry" and "struct vfsmount" pair. + * + * "struct tomoyo_path_info_with_data" consists of "struct tomoyo_path_info" + * and buffer for the pathname, while "struct tomoyo_page_buffer" consists of + * buffer for the pathname only. + * + * "struct tomoyo_path_info_with_data" is intended to allow TOMOYO to release + * both "struct tomoyo_path_info" and buffer for the pathname by single kfree() + * so that we don't need to return two pointers to the caller. If the caller + * puts "struct tomoyo_path_info" on stack memory, we will be able to remove + * "struct tomoyo_path_info_with_data". + */ struct tomoyo_path_info_with_data { /* Keep "head" first, for this pointer is passed to tomoyo_free(). */ struct tomoyo_path_info head; @@ -61,7 +101,15 @@ struct tomoyo_path_info_with_data { }; /* - * Common header for holding ACL entries. + * tomoyo_acl_info is a structure which is used for holding + * + * (1) "list" which is linked to the ->acl_info_list of + * "struct tomoyo_domain_info" + * (2) "type" which tells + * (a) type & 0x7F : type of the entry (either + * "struct tomoyo_single_path_acl_record" or + * "struct tomoyo_double_path_acl_record") + * (b) type & 0x80 : whether the entry is marked as "deleted". * * Packing "struct tomoyo_acl_info" allows * "struct tomoyo_single_path_acl_record" to embed "u16" and @@ -81,7 +129,28 @@ struct tomoyo_acl_info { /* This ACL entry is deleted. */ #define TOMOYO_ACL_DELETED 0x80 -/* Structure for domain information. */ +/* + * tomoyo_domain_info is a structure which is used for holding permissions + * (e.g. "allow_read /lib/libc-2.5.so") given to each domain. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_domain_list . + * (2) "acl_info_list" which is linked to "struct tomoyo_acl_info". + * (3) "domainname" which holds the name of the domain. + * (4) "profile" which remembers profile number assigned to this domain. + * (5) "is_deleted" is a bool which is true if this domain is marked as + * "deleted", false otherwise. + * (6) "quota_warned" is a bool which is used for suppressing warning message + * when learning mode learned too much entries. + * (7) "flags" which remembers this domain's attributes. + * + * A domain's lifecycle is an analogy of files on / directory. + * Multiple domains with the same domainname cannot be created (as with + * creating files with the same filename fails with -EEXIST). + * If a process reached a domain, that process can reside in that domain after + * that domain is marked as "deleted" (as with a process can access an already + * open()ed file after that file was unlink()ed). + */ struct tomoyo_domain_info { struct list_head list; struct list_head acl_info_list; @@ -108,10 +177,18 @@ struct tomoyo_domain_info { #define TOMOYO_DOMAIN_FLAGS_TRANSITION_FAILED 2 /* - * Structure for "allow_read/write", "allow_execute", "allow_read", - * "allow_write", "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir", - * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar", - * "allow_truncate", "allow_symlink" and "allow_rewrite" directive. + * tomoyo_single_path_acl_record is a structure which is used for holding an + * entry with one pathname operation (e.g. open(), mkdir()). + * It has following fields. + * + * (1) "head" which is a "struct tomoyo_acl_info". + * (2) "perm" which is a bitmask of permitted operations. + * (3) "filename" is the pathname. + * + * Directives held by this structure are "allow_read/write", "allow_execute", + * "allow_read", "allow_write", "allow_create", "allow_unlink", "allow_mkdir", + * "allow_rmdir", "allow_mkfifo", "allow_mksock", "allow_mkblock", + * "allow_mkchar", "allow_truncate", "allow_symlink" and "allow_rewrite". */ struct tomoyo_single_path_acl_record { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_SINGLE_PATH_ACL */ @@ -120,7 +197,18 @@ struct tomoyo_single_path_acl_record { const struct tomoyo_path_info *filename; }; -/* Structure for "allow_rename" and "allow_link" directive. */ +/* + * tomoyo_double_path_acl_record is a structure which is used for holding an + * entry with two pathnames operation (i.e. link() and rename()). + * It has following fields. + * + * (1) "head" which is a "struct tomoyo_acl_info". + * (2) "perm" which is a bitmask of permitted operations. + * (3) "filename1" is the source/old pathname. + * (4) "filename2" is the destination/new pathname. + * + * Directives held by this structure are "allow_rename" and "allow_link". + */ struct tomoyo_double_path_acl_record { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_DOUBLE_PATH_ACL */ u8 perm; @@ -153,7 +241,29 @@ struct tomoyo_double_path_acl_record { #define TOMOYO_VERBOSE 2 #define TOMOYO_MAX_CONTROL_INDEX 3 -/* Structure for reading/writing policy via securityfs interfaces. */ +/* + * tomoyo_io_buffer is a structure which is used for reading and modifying + * configuration via /sys/kernel/security/tomoyo/ interface. + * It has many fields. ->read_var1 , ->read_var2 , ->write_var1 are used as + * cursors. + * + * Since the content of /sys/kernel/security/tomoyo/domain_policy is a list of + * "struct tomoyo_domain_info" entries and each "struct tomoyo_domain_info" + * entry has a list of "struct tomoyo_acl_info", we need two cursors when + * reading (one is for traversing tomoyo_domain_list and the other is for + * traversing "struct tomoyo_acl_info"->acl_info_list ). + * + * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with + * "select ", TOMOYO seeks the cursor ->read_var1 and ->write_var1 to the + * domain with the domainname specified by the rest of that line (NULL is set + * if seek failed). + * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with + * "delete ", TOMOYO deletes an entry or a domain specified by the rest of that + * line (->write_var1 is set to NULL if a domain was deleted). + * If a line written to /sys/kernel/security/tomoyo/domain_policy starts with + * neither "select " nor "delete ", an entry or a domain specified by that line + * is appended. + */ struct tomoyo_io_buffer { int (*read) (struct tomoyo_io_buffer *); int (*write) (struct tomoyo_io_buffer *); diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 2d6748741a2..1d8b1696057 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -19,11 +19,63 @@ /* The initial domain. */ struct tomoyo_domain_info tomoyo_kernel_domain; -/* The list for "struct tomoyo_domain_info". */ +/* + * tomoyo_domain_list is used for holding list of domains. + * The ->acl_info_list of "struct tomoyo_domain_info" is used for holding + * permissions (e.g. "allow_read /lib/libc-2.5.so") given to each domain. + * + * An entry is added by + * + * # ( echo "<kernel>"; echo "allow_execute /sbin/init" ) > \ + * /sys/kernel/security/tomoyo/domain_policy + * + * and is deleted by + * + * # ( echo "<kernel>"; echo "delete allow_execute /sbin/init" ) > \ + * /sys/kernel/security/tomoyo/domain_policy + * + * and all entries are retrieved by + * + * # cat /sys/kernel/security/tomoyo/domain_policy + * + * A domain is added by + * + * # echo "<kernel>" > /sys/kernel/security/tomoyo/domain_policy + * + * and is deleted by + * + * # echo "delete <kernel>" > /sys/kernel/security/tomoyo/domain_policy + * + * and all domains are retrieved by + * + * # grep '^<kernel>' /sys/kernel/security/tomoyo/domain_policy + * + * Normally, a domainname is monotonically getting longer because a domainname + * which the process will belong to if an execve() operation succeeds is + * defined as a concatenation of "current domainname" + "pathname passed to + * execve()". + * See tomoyo_domain_initializer_list and tomoyo_domain_keeper_list for + * exceptions. + */ LIST_HEAD(tomoyo_domain_list); DECLARE_RWSEM(tomoyo_domain_list_lock); -/* Structure for "initialize_domain" and "no_initialize_domain" keyword. */ +/* + * tomoyo_domain_initializer_entry is a structure which is used for holding + * "initialize_domain" and "no_initialize_domain" entries. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_domain_initializer_list . + * (2) "domainname" which is "a domainname" or "the last component of a + * domainname". This field is NULL if "from" clause is not specified. + * (3) "program" which is a program's pathname. + * (4) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + * (5) "is_not" is a bool which is true if "no_initialize_domain", false + * otherwise. + * (6) "is_last_name" is a bool which is true if "domainname" is "the last + * component of a domainname", false otherwise. + */ struct tomoyo_domain_initializer_entry { struct list_head list; const struct tomoyo_path_info *domainname; /* This may be NULL */ @@ -34,7 +86,23 @@ struct tomoyo_domain_initializer_entry { bool is_last_name; }; -/* Structure for "keep_domain" and "no_keep_domain" keyword. */ +/* + * tomoyo_domain_keeper_entry is a structure which is used for holding + * "keep_domain" and "no_keep_domain" entries. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_domain_keeper_list . + * (2) "domainname" which is "a domainname" or "the last component of a + * domainname". + * (3) "program" which is a program's pathname. + * This field is NULL if "from" clause is not specified. + * (4) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + * (5) "is_not" is a bool which is true if "no_initialize_domain", false + * otherwise. + * (6) "is_last_name" is a bool which is true if "domainname" is "the last + * component of a domainname", false otherwise. + */ struct tomoyo_domain_keeper_entry { struct list_head list; const struct tomoyo_path_info *domainname; @@ -45,7 +113,16 @@ struct tomoyo_domain_keeper_entry { bool is_last_name; }; -/* Structure for "alias" keyword. */ +/* + * tomoyo_alias_entry is a structure which is used for holding "alias" entries. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_alias_list . + * (2) "original_name" which is a dereferenced pathname. + * (3) "aliased_name" which is a symlink's pathname. + * (4) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + */ struct tomoyo_alias_entry { struct list_head list; const struct tomoyo_path_info *original_name; @@ -67,14 +144,12 @@ void tomoyo_set_domain_flag(struct tomoyo_domain_info *domain, { /* We need to serialize because this is bitfield operation. */ static DEFINE_SPINLOCK(lock); - /***** CRITICAL SECTION START *****/ spin_lock(&lock); if (!is_delete) domain->flags |= flags; else domain->flags &= ~flags; spin_unlock(&lock); - /***** CRITICAL SECTION END *****/ } /** @@ -94,7 +169,42 @@ const char *tomoyo_get_last_name(const struct tomoyo_domain_info *domain) return cp0; } -/* The list for "struct tomoyo_domain_initializer_entry". */ +/* + * tomoyo_domain_initializer_list is used for holding list of programs which + * triggers reinitialization of domainname. Normally, a domainname is + * monotonically getting longer. But sometimes, we restart daemon programs. + * It would be convenient for us that "a daemon started upon system boot" and + * "the daemon restarted from console" belong to the same domain. Thus, TOMOYO + * provides a way to shorten domainnames. + * + * An entry is added by + * + * # echo 'initialize_domain /usr/sbin/httpd' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and is deleted by + * + * # echo 'delete initialize_domain /usr/sbin/httpd' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and all entries are retrieved by + * + * # grep ^initialize_domain /sys/kernel/security/tomoyo/exception_policy + * + * In the example above, /usr/sbin/httpd will belong to + * "<kernel> /usr/sbin/httpd" domain. + * + * You may specify a domainname using "from" keyword. + * "initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd" + * will cause "/usr/sbin/httpd" executed from "<kernel> /etc/rc.d/init.d/httpd" + * domain to belong to "<kernel> /usr/sbin/httpd" domain. + * + * You may add "no_" prefix to "initialize_domain". + * "initialize_domain /usr/sbin/httpd" and + * "no_initialize_domain /usr/sbin/httpd from <kernel> /etc/rc.d/init.d/httpd" + * will cause "/usr/sbin/httpd" to belong to "<kernel> /usr/sbin/httpd" domain + * unless executed from "<kernel> /etc/rc.d/init.d/httpd" domain. + */ static LIST_HEAD(tomoyo_domain_initializer_list); static DECLARE_RWSEM(tomoyo_domain_initializer_list_lock); @@ -135,7 +245,6 @@ static int tomoyo_update_domain_initializer_entry(const char *domainname, saved_program = tomoyo_save_name(program); if (!saved_program) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_initializer_list_lock); list_for_each_entry(ptr, &tomoyo_domain_initializer_list, list) { if (ptr->is_not != is_not || @@ -161,7 +270,6 @@ static int tomoyo_update_domain_initializer_entry(const char *domainname, error = 0; out: up_write(&tomoyo_domain_initializer_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -193,13 +301,12 @@ bool tomoyo_read_domain_initializer_policy(struct tomoyo_io_buffer *head) from = " from "; domain = ptr->domainname->name; } - if (!tomoyo_io_printf(head, - "%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN - "%s%s%s\n", no, ptr->program->name, from, - domain)) { - done = false; + done = tomoyo_io_printf(head, + "%s" TOMOYO_KEYWORD_INITIALIZE_DOMAIN + "%s%s%s\n", no, ptr->program->name, + from, domain); + if (!done) break; - } } up_read(&tomoyo_domain_initializer_list_lock); return done; @@ -273,7 +380,44 @@ static bool tomoyo_is_domain_initializer(const struct tomoyo_path_info * return flag; } -/* The list for "struct tomoyo_domain_keeper_entry". */ +/* + * tomoyo_domain_keeper_list is used for holding list of domainnames which + * suppresses domain transition. Normally, a domainname is monotonically + * getting longer. But sometimes, we want to suppress domain transition. + * It would be convenient for us that programs executed from a login session + * belong to the same domain. Thus, TOMOYO provides a way to suppress domain + * transition. + * + * An entry is added by + * + * # echo 'keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and is deleted by + * + * # echo 'delete keep_domain <kernel> /usr/sbin/sshd /bin/bash' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and all entries are retrieved by + * + * # grep ^keep_domain /sys/kernel/security/tomoyo/exception_policy + * + * In the example above, any process which belongs to + * "<kernel> /usr/sbin/sshd /bin/bash" domain will remain in that domain, + * unless explicitly specified by "initialize_domain" or "no_keep_domain". + * + * You may specify a program using "from" keyword. + * "keep_domain /bin/pwd from <kernel> /usr/sbin/sshd /bin/bash" + * will cause "/bin/pwd" executed from "<kernel> /usr/sbin/sshd /bin/bash" + * domain to remain in "<kernel> /usr/sbin/sshd /bin/bash" domain. + * + * You may add "no_" prefix to "keep_domain". + * "keep_domain <kernel> /usr/sbin/sshd /bin/bash" and + * "no_keep_domain /usr/bin/passwd from <kernel> /usr/sbin/sshd /bin/bash" will + * cause "/usr/bin/passwd" to belong to + * "<kernel> /usr/sbin/sshd /bin/bash /usr/bin/passwd" domain, unless + * explicitly specified by "initialize_domain". + */ static LIST_HEAD(tomoyo_domain_keeper_list); static DECLARE_RWSEM(tomoyo_domain_keeper_list_lock); @@ -296,7 +440,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname, struct tomoyo_domain_keeper_entry *ptr; const struct tomoyo_path_info *saved_domainname; const struct tomoyo_path_info *saved_program = NULL; - static DEFINE_MUTEX(lock); int error = -ENOMEM; bool is_last_name = false; @@ -315,7 +458,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname, saved_domainname = tomoyo_save_name(domainname); if (!saved_domainname) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_keeper_list_lock); list_for_each_entry(ptr, &tomoyo_domain_keeper_list, list) { if (ptr->is_not != is_not || @@ -341,7 +483,6 @@ static int tomoyo_update_domain_keeper_entry(const char *domainname, error = 0; out: up_write(&tomoyo_domain_keeper_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -394,13 +535,12 @@ bool tomoyo_read_domain_keeper_policy(struct tomoyo_io_buffer *head) from = " from "; program = ptr->program->name; } - if (!tomoyo_io_printf(head, - "%s" TOMOYO_KEYWORD_KEEP_DOMAIN - "%s%s%s\n", no, program, from, - ptr->domainname->name)) { - done = false; + done = tomoyo_io_printf(head, + "%s" TOMOYO_KEYWORD_KEEP_DOMAIN + "%s%s%s\n", no, program, from, + ptr->domainname->name); + if (!done) break; - } } up_read(&tomoyo_domain_keeper_list_lock); return done; @@ -446,7 +586,36 @@ static bool tomoyo_is_domain_keeper(const struct tomoyo_path_info *domainname, return flag; } -/* The list for "struct tomoyo_alias_entry". */ +/* + * tomoyo_alias_list is used for holding list of symlink's pathnames which are + * allowed to be passed to an execve() request. Normally, the domainname which + * the current process will belong to after execve() succeeds is calculated + * using dereferenced pathnames. But some programs behave differently depending + * on the name passed to argv[0]. For busybox, calculating domainname using + * dereferenced pathnames will cause all programs in the busybox to belong to + * the same domain. Thus, TOMOYO provides a way to allow use of symlink's + * pathname for checking execve()'s permission and calculating domainname which + * the current process will belong to after execve() succeeds. + * + * An entry is added by + * + * # echo 'alias /bin/busybox /bin/cat' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and is deleted by + * + * # echo 'delete alias /bin/busybox /bin/cat' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and all entries are retrieved by + * + * # grep ^alias /sys/kernel/security/tomoyo/exception_policy + * + * In the example above, if /bin/cat is a symlink to /bin/busybox and execution + * of /bin/cat is requested, permission is checked for /bin/cat rather than + * /bin/busybox and domainname which the current process will belong to after + * execve() succeeds is calculated using /bin/cat rather than /bin/busybox . + */ static LIST_HEAD(tomoyo_alias_list); static DECLARE_RWSEM(tomoyo_alias_list_lock); @@ -476,7 +645,6 @@ static int tomoyo_update_alias_entry(const char *original_name, saved_aliased_name = tomoyo_save_name(aliased_name); if (!saved_original_name || !saved_aliased_name) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_alias_list_lock); list_for_each_entry(ptr, &tomoyo_alias_list, list) { if (ptr->original_name != saved_original_name || @@ -499,7 +667,6 @@ static int tomoyo_update_alias_entry(const char *original_name, error = 0; out: up_write(&tomoyo_alias_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -522,12 +689,11 @@ bool tomoyo_read_alias_policy(struct tomoyo_io_buffer *head) ptr = list_entry(pos, struct tomoyo_alias_entry, list); if (ptr->is_deleted) continue; - if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n", - ptr->original_name->name, - ptr->aliased_name->name)) { - done = false; + done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALIAS "%s %s\n", + ptr->original_name->name, + ptr->aliased_name->name); + if (!done) break; - } } up_read(&tomoyo_alias_list_lock); return done; @@ -567,7 +733,6 @@ int tomoyo_delete_domain(char *domainname) name.name = domainname; tomoyo_fill_path_info(&name); - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_list_lock); /* Is there an active domain? */ list_for_each_entry(domain, &tomoyo_domain_list, list) { @@ -581,7 +746,6 @@ int tomoyo_delete_domain(char *domainname) break; } up_write(&tomoyo_domain_list_lock); - /***** EXCLUSIVE SECTION END *****/ return 0; } @@ -600,7 +764,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * struct tomoyo_domain_info *domain = NULL; const struct tomoyo_path_info *saved_domainname; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_list_lock); domain = tomoyo_find_domain(domainname); if (domain) @@ -619,7 +782,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * domain->domainname != saved_domainname) continue; flag = false; - /***** CRITICAL SECTION START *****/ read_lock(&tasklist_lock); for_each_process(p) { if (tomoyo_real_domain(p) != domain) @@ -628,7 +790,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * break; } read_unlock(&tasklist_lock); - /***** CRITICAL SECTION END *****/ if (flag) continue; list_for_each_entry(ptr, &domain->acl_info_list, list) { @@ -651,7 +812,6 @@ struct tomoyo_domain_info *tomoyo_find_or_assign_new_domain(const char * } out: up_write(&tomoyo_domain_list_lock); - /***** EXCLUSIVE SECTION END *****/ return domain; } @@ -739,7 +899,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm, } /* Check execute permission. */ - retval = tomoyo_check_exec_perm(old_domain, &r, tmp); + retval = tomoyo_check_exec_perm(old_domain, &r); if (retval < 0) goto out; diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index 2316da8ec5b..5ae3a571559 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -14,21 +14,50 @@ #include "realpath.h" #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) -/* Structure for "allow_read" keyword. */ +/* + * tomoyo_globally_readable_file_entry is a structure which is used for holding + * "allow_read" entries. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_globally_readable_list . + * (2) "filename" is a pathname which is allowed to open(O_RDONLY). + * (3) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + */ struct tomoyo_globally_readable_file_entry { struct list_head list; const struct tomoyo_path_info *filename; bool is_deleted; }; -/* Structure for "file_pattern" keyword. */ +/* + * tomoyo_pattern_entry is a structure which is used for holding + * "tomoyo_pattern_list" entries. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_pattern_list . + * (2) "pattern" is a pathname pattern which is used for converting pathnames + * to pathname patterns during learning mode. + * (3) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + */ struct tomoyo_pattern_entry { struct list_head list; const struct tomoyo_path_info *pattern; bool is_deleted; }; -/* Structure for "deny_rewrite" keyword. */ +/* + * tomoyo_no_rewrite_entry is a structure which is used for holding + * "deny_rewrite" entries. + * It has following fields. + * + * (1) "list" which is linked to tomoyo_no_rewrite_list . + * (2) "pattern" is a pathname which is by default not permitted to modify + * already existing content. + * (3) "is_deleted" is a bool which is true if marked as deleted, false + * otherwise. + */ struct tomoyo_no_rewrite_entry { struct list_head list; const struct tomoyo_path_info *pattern; @@ -141,7 +170,31 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename, struct tomoyo_domain_info * const domain, const bool is_delete); -/* The list for "struct tomoyo_globally_readable_file_entry". */ +/* + * tomoyo_globally_readable_list is used for holding list of pathnames which + * are by default allowed to be open()ed for reading by any process. + * + * An entry is added by + * + * # echo 'allow_read /lib/libc-2.5.so' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and is deleted by + * + * # echo 'delete allow_read /lib/libc-2.5.so' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and all entries are retrieved by + * + * # grep ^allow_read /sys/kernel/security/tomoyo/exception_policy + * + * In the example above, any process is allowed to + * open("/lib/libc-2.5.so", O_RDONLY). + * One exception is, if the domain which current process belongs to is marked + * as "ignore_global_allow_read", current process can't do so unless explicitly + * given "allow_read /lib/libc-2.5.so" to the domain which current process + * belongs to. + */ static LIST_HEAD(tomoyo_globally_readable_list); static DECLARE_RWSEM(tomoyo_globally_readable_list_lock); @@ -166,7 +219,6 @@ static int tomoyo_update_globally_readable_entry(const char *filename, saved_filename = tomoyo_save_name(filename); if (!saved_filename) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_globally_readable_list_lock); list_for_each_entry(ptr, &tomoyo_globally_readable_list, list) { if (ptr->filename != saved_filename) @@ -187,7 +239,6 @@ static int tomoyo_update_globally_readable_entry(const char *filename, error = 0; out: up_write(&tomoyo_globally_readable_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -249,17 +300,44 @@ bool tomoyo_read_globally_readable_policy(struct tomoyo_io_buffer *head) list); if (ptr->is_deleted) continue; - if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n", - ptr->filename->name)) { - done = false; + done = tomoyo_io_printf(head, TOMOYO_KEYWORD_ALLOW_READ "%s\n", + ptr->filename->name); + if (!done) break; - } } up_read(&tomoyo_globally_readable_list_lock); return done; } -/* The list for "struct tomoyo_pattern_entry". */ +/* tomoyo_pattern_list is used for holding list of pathnames which are used for + * converting pathnames to pathname patterns during learning mode. + * + * An entry is added by + * + * # echo 'file_pattern /proc/\$/mounts' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and is deleted by + * + * # echo 'delete file_pattern /proc/\$/mounts' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and all entries are retrieved by + * + * # grep ^file_pattern /sys/kernel/security/tomoyo/exception_policy + * + * In the example above, if a process which belongs to a domain which is in + * learning mode requested open("/proc/1/mounts", O_RDONLY), + * "allow_read /proc/\$/mounts" is automatically added to the domain which that + * process belongs to. + * + * It is not a desirable behavior that we have to use /proc/\$/ instead of + * /proc/self/ when current process needs to access only current process's + * information. As of now, LSM version of TOMOYO is using __d_path() for + * calculating pathname. Non LSM version of TOMOYO is using its own function + * which pretends as if /proc/self/ is not a symlink; so that we can forbid + * current process from accessing other process's information. + */ static LIST_HEAD(tomoyo_pattern_list); static DECLARE_RWSEM(tomoyo_pattern_list_lock); @@ -284,7 +362,6 @@ static int tomoyo_update_file_pattern_entry(const char *pattern, saved_pattern = tomoyo_save_name(pattern); if (!saved_pattern) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_pattern_list_lock); list_for_each_entry(ptr, &tomoyo_pattern_list, list) { if (saved_pattern != ptr->pattern) @@ -305,7 +382,6 @@ static int tomoyo_update_file_pattern_entry(const char *pattern, error = 0; out: up_write(&tomoyo_pattern_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -373,17 +449,44 @@ bool tomoyo_read_file_pattern(struct tomoyo_io_buffer *head) ptr = list_entry(pos, struct tomoyo_pattern_entry, list); if (ptr->is_deleted) continue; - if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN "%s\n", - ptr->pattern->name)) { - done = false; + done = tomoyo_io_printf(head, TOMOYO_KEYWORD_FILE_PATTERN + "%s\n", ptr->pattern->name); + if (!done) break; - } } up_read(&tomoyo_pattern_list_lock); return done; } -/* The list for "struct tomoyo_no_rewrite_entry". */ +/* + * tomoyo_no_rewrite_list is used for holding list of pathnames which are by + * default forbidden to modify already written content of a file. + * + * An entry is added by + * + * # echo 'deny_rewrite /var/log/messages' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and is deleted by + * + * # echo 'delete deny_rewrite /var/log/messages' > \ + * /sys/kernel/security/tomoyo/exception_policy + * + * and all entries are retrieved by + * + * # grep ^deny_rewrite /sys/kernel/security/tomoyo/exception_policy + * + * In the example above, if a process requested to rewrite /var/log/messages , + * the process can't rewrite unless the domain which that process belongs to + * has "allow_rewrite /var/log/messages" entry. + * + * It is not a desirable behavior that we have to add "\040(deleted)" suffix + * when we want to allow rewriting already unlink()ed file. As of now, + * LSM version of TOMOYO is using __d_path() for calculating pathname. + * Non LSM version of TOMOYO is using its own function which doesn't append + * " (deleted)" suffix if the file is already unlink()ed; so that we don't + * need to worry whether the file is already unlink()ed or not. + */ static LIST_HEAD(tomoyo_no_rewrite_list); static DECLARE_RWSEM(tomoyo_no_rewrite_list_lock); @@ -407,7 +510,6 @@ static int tomoyo_update_no_rewrite_entry(const char *pattern, saved_pattern = tomoyo_save_name(pattern); if (!saved_pattern) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_no_rewrite_list_lock); list_for_each_entry(ptr, &tomoyo_no_rewrite_list, list) { if (ptr->pattern != saved_pattern) @@ -428,7 +530,6 @@ static int tomoyo_update_no_rewrite_entry(const char *pattern, error = 0; out: up_write(&tomoyo_no_rewrite_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -489,11 +590,10 @@ bool tomoyo_read_no_rewrite_policy(struct tomoyo_io_buffer *head) ptr = list_entry(pos, struct tomoyo_no_rewrite_entry, list); if (ptr->is_deleted) continue; - if (!tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE "%s\n", - ptr->pattern->name)) { - done = false; + done = tomoyo_io_printf(head, TOMOYO_KEYWORD_DENY_REWRITE + "%s\n", ptr->pattern->name); + if (!done) break; - } } up_read(&tomoyo_no_rewrite_list_lock); return done; @@ -745,7 +845,6 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename, saved_filename = tomoyo_save_name(filename); if (!saved_filename) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_acl_info_list_lock); if (is_delete) goto delete; @@ -800,7 +899,6 @@ static int tomoyo_update_single_path_acl(const u8 type, const char *filename, } out: up_write(&tomoyo_domain_acl_info_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -836,7 +934,6 @@ static int tomoyo_update_double_path_acl(const u8 type, const char *filename1, saved_filename2 = tomoyo_save_name(filename2); if (!saved_filename1 || !saved_filename2) return -ENOMEM; - /***** EXCLUSIVE SECTION START *****/ down_write(&tomoyo_domain_acl_info_list_lock); if (is_delete) goto delete; @@ -884,7 +981,6 @@ static int tomoyo_update_double_path_acl(const u8 type, const char *filename1, } out: up_write(&tomoyo_domain_acl_info_list_lock); - /***** EXCLUSIVE SECTION END *****/ return error; } @@ -1025,13 +1121,11 @@ int tomoyo_check_file_perm(struct tomoyo_domain_info *domain, * * @domain: Pointer to "struct tomoyo_domain_info". * @filename: Check permission for "execute". - * @tmp: Buffer for temporary use. * * Returns 0 on success, negativevalue otherwise. */ int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain, - const struct tomoyo_path_info *filename, - struct tomoyo_page_buffer *tmp) + const struct tomoyo_path_info *filename) { const u8 mode = tomoyo_check_flags(domain, TOMOYO_MAC_FOR_FILE); diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 40927a84cb6..5f2e3326337 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -220,7 +220,6 @@ void *tomoyo_alloc_element(const unsigned int size) = roundup(size, max(sizeof(void *), sizeof(long))); if (word_aligned_size > PATH_MAX) return NULL; - /***** EXCLUSIVE SECTION START *****/ mutex_lock(&lock); if (buf_used_len + word_aligned_size > PATH_MAX) { if (!tomoyo_quota_for_elements || @@ -251,7 +250,6 @@ void *tomoyo_alloc_element(const unsigned int size) } } mutex_unlock(&lock); - /***** EXCLUSIVE SECTION END *****/ return ptr; } @@ -267,7 +265,16 @@ static unsigned int tomoyo_quota_for_savename; */ #define TOMOYO_MAX_HASH 256 -/* Structure for string data. */ +/* + * tomoyo_name_entry is a structure which is used for linking + * "struct tomoyo_path_info" into tomoyo_name_list . + * + * Since tomoyo_name_list manages a list of strings which are shared by + * multiple processes (whereas "struct tomoyo_path_info" inside + * "struct tomoyo_path_info_with_data" is not shared), a reference counter will + * be added to "struct tomoyo_name_entry" rather than "struct tomoyo_path_info" + * when TOMOYO starts supporting garbage collector. + */ struct tomoyo_name_entry { struct list_head list; struct tomoyo_path_info entry; @@ -281,10 +288,10 @@ struct tomoyo_free_memory_block_list { }; /* - * The list for "struct tomoyo_name_entry". - * - * This list is updated only inside tomoyo_save_name(), thus - * no global mutex exists. + * tomoyo_name_list is used for holding string data used by TOMOYO. + * Since same string data is likely used for multiple times (e.g. + * "/lib/libc-2.5.so"), TOMOYO shares string data in the form of + * "const struct tomoyo_path_info *". */ static struct list_head tomoyo_name_list[TOMOYO_MAX_HASH]; @@ -318,7 +325,6 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name) return NULL; } hash = full_name_hash((const unsigned char *) name, len - 1); - /***** EXCLUSIVE SECTION START *****/ mutex_lock(&lock); list_for_each_entry(ptr, &tomoyo_name_list[hash % TOMOYO_MAX_HASH], list) { @@ -366,7 +372,6 @@ const struct tomoyo_path_info *tomoyo_save_name(const char *name) } out: mutex_unlock(&lock); - /***** EXCLUSIVE SECTION END *****/ return ptr ? &ptr->entry : NULL; } diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index e42be5c4f05..3194d09fe0f 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -262,6 +262,10 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred) return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags); } +/* + * tomoyo_security_ops is a "struct security_operations" which is used for + * registering TOMOYO. + */ static struct security_operations tomoyo_security_ops = { .name = "tomoyo", .cred_prepare = tomoyo_cred_prepare, diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h index 41c6ebafb9c..0fd588a629c 100644 --- a/security/tomoyo/tomoyo.h +++ b/security/tomoyo/tomoyo.h @@ -17,13 +17,11 @@ struct path; struct inode; struct linux_binprm; struct pt_regs; -struct tomoyo_page_buffer; int tomoyo_check_file_perm(struct tomoyo_domain_info *domain, const char *filename, const u8 perm); int tomoyo_check_exec_perm(struct tomoyo_domain_info *domain, - const struct tomoyo_path_info *filename, - struct tomoyo_page_buffer *buf); + const struct tomoyo_path_info *filename); int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, struct path *path, const int flag); int tomoyo_check_1path_perm(struct tomoyo_domain_info *domain, @@ -90,17 +88,10 @@ static inline struct tomoyo_domain_info *tomoyo_domain(void) return current_cred()->security; } -/* Caller holds tasklist_lock spinlock. */ static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct *task) { - /***** CRITICAL SECTION START *****/ - const struct cred *cred = get_task_cred(task); - struct tomoyo_domain_info *domain = cred->security; - - put_cred(cred); - return domain; - /***** CRITICAL SECTION END *****/ + return task_cred_xxx(task, security); } #endif /* !defined(_SECURITY_TOMOYO_TOMOYO_H) */ diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index c3b99def9cb..1eddae94bab 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -85,7 +85,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx) { - union ioapic_redir_entry *pent; + union kvm_ioapic_redirect_entry *pent; int injected = -1; pent = &ioapic->redirtbl[idx]; @@ -142,149 +142,40 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) } } -static int ioapic_inj_irq(struct kvm_ioapic *ioapic, - struct kvm_vcpu *vcpu, - u8 vector, u8 trig_mode, u8 delivery_mode) -{ - ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode, - delivery_mode); - - ASSERT((delivery_mode == IOAPIC_FIXED) || - (delivery_mode == IOAPIC_LOWEST_PRIORITY)); - - return kvm_apic_set_irq(vcpu, vector, trig_mode); -} - -static void ioapic_inj_nmi(struct kvm_vcpu *vcpu) -{ - kvm_inject_nmi(vcpu); - kvm_vcpu_kick(vcpu); -} - -u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, - u8 dest_mode) -{ - u32 mask = 0; - int i; - struct kvm *kvm = ioapic->kvm; - struct kvm_vcpu *vcpu; - - ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode); - - if (dest_mode == 0) { /* Physical mode. */ - if (dest == 0xFF) { /* Broadcast. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) - if (kvm->vcpus[i] && kvm->vcpus[i]->arch.apic) - mask |= 1 << i; - return mask; - } - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; - if (kvm_apic_match_physical_addr(vcpu->arch.apic, dest)) { - if (vcpu->arch.apic) - mask = 1 << i; - break; - } - } - } else if (dest != 0) /* Logical mode, MDA non-zero. */ - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; - if (vcpu->arch.apic && - kvm_apic_match_logical_addr(vcpu->arch.apic, dest)) - mask |= 1 << vcpu->vcpu_id; - } - ioapic_debug("mask %x\n", mask); - return mask; -} - static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) { - u8 dest = ioapic->redirtbl[irq].fields.dest_id; - u8 dest_mode = ioapic->redirtbl[irq].fields.dest_mode; - u8 delivery_mode = ioapic->redirtbl[irq].fields.delivery_mode; - u8 vector = ioapic->redirtbl[irq].fields.vector; - u8 trig_mode = ioapic->redirtbl[irq].fields.trig_mode; - u32 deliver_bitmask; - struct kvm_vcpu *vcpu; - int vcpu_id, r = -1; + union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; + struct kvm_lapic_irq irqe; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " "vector=%x trig_mode=%x\n", - dest, dest_mode, delivery_mode, vector, trig_mode); - - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, dest, - dest_mode); - if (!deliver_bitmask) { - ioapic_debug("no target on destination\n"); - return 0; - } + entry->fields.dest, entry->fields.dest_mode, + entry->fields.delivery_mode, entry->fields.vector, + entry->fields.trig_mode); + + irqe.dest_id = entry->fields.dest_id; + irqe.vector = entry->fields.vector; + irqe.dest_mode = entry->fields.dest_mode; + irqe.trig_mode = entry->fields.trig_mode; + irqe.delivery_mode = entry->fields.delivery_mode << 8; + irqe.level = 1; + irqe.shorthand = 0; - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); #ifdef CONFIG_X86 - if (irq == 0) - vcpu = ioapic->kvm->vcpus[0]; -#endif - if (vcpu != NULL) - r = ioapic_inj_irq(ioapic, vcpu, vector, - trig_mode, delivery_mode); - else - ioapic_debug("null lowest prio vcpu: " - "mask=%x vector=%x delivery_mode=%x\n", - deliver_bitmask, vector, IOAPIC_LOWEST_PRIORITY); - break; - case IOAPIC_FIXED: -#ifdef CONFIG_X86 - if (irq == 0) - deliver_bitmask = 1; -#endif - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { - if (r < 0) - r = 0; - r += ioapic_inj_irq(ioapic, vcpu, vector, - trig_mode, delivery_mode); - } - } - break; - case IOAPIC_NMI: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { - ioapic_inj_nmi(vcpu); - r = 1; - } - else - ioapic_debug("NMI to vcpu %d failed\n", - vcpu->vcpu_id); - } - break; - default: - printk(KERN_WARNING "Unsupported delivery mode %d\n", - delivery_mode); - break; + /* Always delivery PIT interrupt to vcpu 0 */ + if (irq == 0) { + irqe.dest_mode = 0; /* Physical mode. */ + irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id; } - return r; +#endif + return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); } int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { u32 old_irr = ioapic->irr; u32 mask = 1 << irq; - union ioapic_redir_entry entry; + union kvm_ioapic_redirect_entry entry; int ret = 1; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { @@ -305,7 +196,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, int trigger_mode) { - union ioapic_redir_entry *ent; + union kvm_ioapic_redirect_entry *ent; ent = &ioapic->redirtbl[pin]; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index a34bd5e6436..7080b713c16 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -40,22 +40,7 @@ struct kvm_ioapic { u32 id; u32 irr; u32 pad; - union ioapic_redir_entry { - u64 bits; - struct { - u8 vector; - u8 delivery_mode:3; - u8 dest_mode:1; - u8 delivery_status:1; - u8 polarity:1; - u8 remote_irr:1; - u8 trig_mode:1; - u8 mask:1; - u8 reserve:7; - u8 reserved[4]; - u8 dest_id; - } fields; - } redirtbl[IOAPIC_NUM_PINS]; + union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); @@ -79,13 +64,13 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } -struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, - unsigned long bitmap); +int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, + int short_hand, int dest, int dest_mode); +int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); -u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest, - u8 dest_mode); - +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq); #endif diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 4c403750360..15147583abd 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -39,11 +39,16 @@ int kvm_iommu_map_pages(struct kvm *kvm, pfn_t pfn; int i, r = 0; struct iommu_domain *domain = kvm->arch.iommu_domain; + int flags; /* check if iommu exists and in use */ if (!domain) return 0; + flags = IOMMU_READ | IOMMU_WRITE; + if (kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY) + flags |= IOMMU_CACHE; + for (i = 0; i < npages; i++) { /* check if already mapped */ if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) @@ -53,8 +58,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, r = iommu_map_range(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - PAGE_SIZE, - IOMMU_READ | IOMMU_WRITE); + PAGE_SIZE, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%lx\n", pfn); @@ -88,7 +92,7 @@ int kvm_assign_device(struct kvm *kvm, { struct pci_dev *pdev = NULL; struct iommu_domain *domain = kvm->arch.iommu_domain; - int r; + int r, last_flags; /* check if iommu exists and in use */ if (!domain) @@ -107,12 +111,29 @@ int kvm_assign_device(struct kvm *kvm, return r; } + last_flags = kvm->arch.iommu_flags; + if (iommu_domain_has_cap(kvm->arch.iommu_domain, + IOMMU_CAP_CACHE_COHERENCY)) + kvm->arch.iommu_flags |= KVM_IOMMU_CACHE_COHERENCY; + + /* Check if need to update IOMMU page table for guest memory */ + if ((last_flags ^ kvm->arch.iommu_flags) == + KVM_IOMMU_CACHE_COHERENCY) { + kvm_iommu_unmap_memslots(kvm); + r = kvm_iommu_map_memslots(kvm); + if (r) + goto out_unmap; + } + printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n", assigned_dev->host_busnr, PCI_SLOT(assigned_dev->host_devfn), PCI_FUNC(assigned_dev->host_devfn)); return 0; +out_unmap: + kvm_iommu_unmap_memslots(kvm); + return r; } int kvm_deassign_device(struct kvm *kvm, diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 864ac5483ba..a8bd466d00c 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -22,6 +22,9 @@ #include <linux/kvm_host.h> #include <asm/msidef.h> +#ifdef CONFIG_IA64 +#include <asm/iosapic.h> +#endif #include "irq.h" @@ -43,57 +46,73 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); } -static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) +inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) { - int vcpu_id, r = -1; - struct kvm_vcpu *vcpu; - struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); - int dest_id = (e->msi.address_lo & MSI_ADDR_DEST_ID_MASK) - >> MSI_ADDR_DEST_ID_SHIFT; - int vector = (e->msi.data & MSI_DATA_VECTOR_MASK) - >> MSI_DATA_VECTOR_SHIFT; - int dest_mode = test_bit(MSI_ADDR_DEST_MODE_SHIFT, - (unsigned long *)&e->msi.address_lo); - int trig_mode = test_bit(MSI_DATA_TRIGGER_SHIFT, - (unsigned long *)&e->msi.data); - int delivery_mode = test_bit(MSI_DATA_DELIVERY_MODE_SHIFT, - (unsigned long *)&e->msi.data); - u32 deliver_bitmask; - - BUG_ON(!ioapic); - - deliver_bitmask = kvm_ioapic_get_delivery_bitmask(ioapic, - dest_id, dest_mode); - /* IOAPIC delivery mode value is the same as MSI here */ - switch (delivery_mode) { - case IOAPIC_LOWEST_PRIORITY: - vcpu = kvm_get_lowest_prio_vcpu(ioapic->kvm, vector, - deliver_bitmask); - if (vcpu != NULL) - r = kvm_apic_set_irq(vcpu, vector, trig_mode); - else - printk(KERN_INFO "kvm: null lowest priority vcpu!\n"); - break; - case IOAPIC_FIXED: - for (vcpu_id = 0; deliver_bitmask != 0; vcpu_id++) { - if (!(deliver_bitmask & (1 << vcpu_id))) - continue; - deliver_bitmask &= ~(1 << vcpu_id); - vcpu = ioapic->kvm->vcpus[vcpu_id]; - if (vcpu) { - if (r < 0) - r = 0; - r += kvm_apic_set_irq(vcpu, vector, trig_mode); - } +#ifdef CONFIG_IA64 + return irq->delivery_mode == + (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); +#else + return irq->delivery_mode == APIC_DM_LOWEST; +#endif +} + +int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, + struct kvm_lapic_irq *irq) +{ + int i, r = -1; + struct kvm_vcpu *vcpu, *lowest = NULL; + + if (irq->dest_mode == 0 && irq->dest_id == 0xff && + kvm_is_dm_lowest_prio(irq)) + printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + vcpu = kvm->vcpus[i]; + + if (!vcpu || !kvm_apic_present(vcpu)) + continue; + + if (!kvm_apic_match_dest(vcpu, src, irq->shorthand, + irq->dest_id, irq->dest_mode)) + continue; + + if (!kvm_is_dm_lowest_prio(irq)) { + if (r < 0) + r = 0; + r += kvm_apic_set_irq(vcpu, irq); + } else { + if (!lowest) + lowest = vcpu; + else if (kvm_apic_compare_prio(vcpu, lowest) < 0) + lowest = vcpu; } - break; - default: - break; } + + if (lowest) + r = kvm_apic_set_irq(lowest, irq); + return r; } +static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int level) +{ + struct kvm_lapic_irq irq; + + irq.dest_id = (e->msi.address_lo & + MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + irq.vector = (e->msi.data & + MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + irq.dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo; + irq.trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data; + irq.delivery_mode = e->msi.data & 0x700; + irq.level = 1; + irq.shorthand = 0; + + /* TODO Deal with RH bit of MSI message address */ + return kvm_irq_delivery_to_apic(kvm, NULL, &irq); +} + /* This should be called with the kvm->lock mutex held * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) @@ -252,7 +271,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, delta = 8; break; case KVM_IRQCHIP_IOAPIC: - e->set = kvm_set_ioapic_irq; + e->set = kvm_set_ioapic_irq; break; default: goto out; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d0dd390aa5..e21194566b7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -41,6 +41,8 @@ #include <linux/pagemap.h> #include <linux/mman.h> #include <linux/swap.h> +#include <linux/bitops.h> +#include <linux/spinlock.h> #include <asm/processor.h> #include <asm/io.h> @@ -60,9 +62,6 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -static int msi2intx = 1; -module_param(msi2intx, bool, 0); - DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); @@ -95,38 +94,96 @@ static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *h return NULL; } +static int find_index_from_host_irq(struct kvm_assigned_dev_kernel + *assigned_dev, int irq) +{ + int i, index; + struct msix_entry *host_msix_entries; + + host_msix_entries = assigned_dev->host_msix_entries; + + index = -1; + for (i = 0; i < assigned_dev->entries_nr; i++) + if (irq == host_msix_entries[i].vector) { + index = i; + break; + } + if (index < 0) { + printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); + return 0; + } + + return index; +} + static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) { struct kvm_assigned_dev_kernel *assigned_dev; + struct kvm *kvm; + int irq, i; assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, interrupt_work); + kvm = assigned_dev->kvm; /* This is taken to safely inject irq inside the guest. When * the interrupt injection (or the ioapic code) uses a * finer-grained lock, update this */ - mutex_lock(&assigned_dev->kvm->lock); - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI) { - enable_irq(assigned_dev->host_irq); - assigned_dev->host_irq_disabled = false; + mutex_lock(&kvm->lock); + spin_lock_irq(&assigned_dev->assigned_dev_lock); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + struct kvm_guest_msix_entry *guest_entries = + assigned_dev->guest_msix_entries; + for (i = 0; i < assigned_dev->entries_nr; i++) { + if (!(guest_entries[i].flags & + KVM_ASSIGNED_MSIX_PENDING)) + continue; + guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + guest_entries[i].vector, 1); + irq = assigned_dev->host_msix_entries[i].vector; + if (irq != 0) + enable_irq(irq); + assigned_dev->host_irq_disabled = false; + } + } else { + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + if (assigned_dev->irq_requested_type & + KVM_DEV_IRQ_GUEST_MSI) { + enable_irq(assigned_dev->host_irq); + assigned_dev->host_irq_disabled = false; + } } + + spin_unlock_irq(&assigned_dev->assigned_dev_lock); mutex_unlock(&assigned_dev->kvm->lock); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) { + unsigned long flags; struct kvm_assigned_dev_kernel *assigned_dev = (struct kvm_assigned_dev_kernel *) dev_id; + spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int index = find_index_from_host_irq(assigned_dev, irq); + if (index < 0) + goto out; + assigned_dev->guest_msix_entries[index].flags |= + KVM_ASSIGNED_MSIX_PENDING; + } + schedule_work(&assigned_dev->interrupt_work); disable_irq_nosync(irq); assigned_dev->host_irq_disabled = true; +out: + spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); return IRQ_HANDLED; } @@ -134,6 +191,7 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) { struct kvm_assigned_dev_kernel *dev; + unsigned long flags; if (kian->gsi == -1) return; @@ -146,28 +204,30 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) /* The guest irq may be shared so this ack may be * from another device. */ + spin_lock_irqsave(&dev->assigned_dev_lock, flags); if (dev->host_irq_disabled) { enable_irq(dev->host_irq); dev->host_irq_disabled = false; } + spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); } -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) +static void deassign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) { - if (!irqchip_in_kernel(kvm)) - return; - kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); + assigned_dev->ack_notifier.gsi = -1; if (assigned_dev->irq_source_id != -1) kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); assigned_dev->irq_source_id = -1; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); +} - if (!assigned_dev->irq_requested_type) - return; - +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ +static void deassign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ /* * In kvm_free_device_irq, cancel_work_sync return true if: * 1. work is scheduled, and then cancelled. @@ -184,17 +244,64 @@ static void kvm_free_assigned_irq(struct kvm *kvm, * now, the kvm state is still legal for probably we also have to wait * interrupt_work done. */ - disable_irq_nosync(assigned_dev->host_irq); - cancel_work_sync(&assigned_dev->interrupt_work); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int i; + for (i = 0; i < assigned_dev->entries_nr; i++) + disable_irq_nosync(assigned_dev-> + host_msix_entries[i].vector); + + cancel_work_sync(&assigned_dev->interrupt_work); + + for (i = 0; i < assigned_dev->entries_nr; i++) + free_irq(assigned_dev->host_msix_entries[i].vector, + (void *)assigned_dev); + + assigned_dev->entries_nr = 0; + kfree(assigned_dev->host_msix_entries); + kfree(assigned_dev->guest_msix_entries); + pci_disable_msix(assigned_dev->dev); + } else { + /* Deal with MSI and INTx */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); - free_irq(assigned_dev->host_irq, (void *)assigned_dev); + free_irq(assigned_dev->host_irq, (void *)assigned_dev); - if (assigned_dev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) - pci_disable_msi(assigned_dev->dev); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + } - assigned_dev->irq_requested_type = 0; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); } +static int kvm_deassign_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev, + unsigned long irq_requested_type) +{ + unsigned long guest_irq_type, host_irq_type; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + /* no irq assignment to deassign */ + if (!assigned_dev->irq_requested_type) + return -ENXIO; + + host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; + guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; + + if (host_irq_type) + deassign_host_irq(kvm, assigned_dev); + if (guest_irq_type) + deassign_guest_irq(kvm, assigned_dev); + + return 0; +} + +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); +} static void kvm_free_assigned_device(struct kvm *kvm, struct kvm_assigned_dev_kernel @@ -226,190 +333,244 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) } } -static int assigned_device_update_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) +static int assigned_device_enable_host_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) { - adev->guest_irq = airq->guest_irq; - adev->ack_notifier.gsi = airq->guest_irq; + dev->host_irq = dev->dev->irq; + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, + 0, "kvm_assigned_intx_device", (void *)dev)) + return -EIO; + return 0; +} - if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_INTX) - return 0; +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_host_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int r; - if (irqchip_in_kernel(kvm)) { - if (!msi2intx && - (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) { - free_irq(adev->host_irq, (void *)adev); - pci_disable_msi(adev->dev); - } + if (!dev->dev->msi_enabled) { + r = pci_enable_msi(dev->dev); + if (r) + return r; + } - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; + dev->host_irq = dev->dev->irq; + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_msi_device", (void *)dev)) { + pci_disable_msi(dev->dev); + return -EIO; + } - if (airq->host_irq) - adev->host_irq = airq->host_irq; - else - adev->host_irq = adev->dev->irq; + return 0; +} +#endif - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(adev->host_irq, kvm_assigned_dev_intr, - 0, "kvm_assigned_intx_device", (void *)adev)) - return -EIO; +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_host_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int i, r = -EINVAL; + + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (dev->entries_nr == 0) + return r; + + r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + if (r) + return r; + + for (i = 0; i < dev->entries_nr; i++) { + r = request_irq(dev->host_msix_entries[i].vector, + kvm_assigned_dev_intr, 0, + "kvm_assigned_msix_device", + (void *)dev); + /* FIXME: free requested_irq's on failure */ + if (r) + return r; } - adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_INTX | - KVM_ASSIGNED_DEV_HOST_INTX; return 0; } -#ifdef CONFIG_X86 -static int assigned_device_update_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *adev, - struct kvm_assigned_irq *airq) +#endif + +static int assigned_device_enable_guest_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) { - int r; + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = irq->guest_irq; + return 0; +} - adev->guest_irq = airq->guest_irq; - if (airq->flags & KVM_DEV_IRQ_ASSIGN_ENABLE_MSI) { - /* x86 don't care upper address of guest msi message addr */ - adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_MSI; - adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_INTX; - adev->ack_notifier.gsi = -1; - } else if (msi2intx) { - adev->irq_requested_type |= KVM_ASSIGNED_DEV_GUEST_INTX; - adev->irq_requested_type &= ~KVM_ASSIGNED_DEV_GUEST_MSI; - adev->ack_notifier.gsi = airq->guest_irq; - } else { - /* - * Guest require to disable device MSI, we disable MSI and - * re-enable INTx by default again. Notice it's only for - * non-msi2intx. - */ - assigned_device_update_intx(kvm, adev, airq); - return 0; +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_guest_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + return 0; +} +#endif +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_guest_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + return 0; +} +#endif + +static int assign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + __u32 host_irq_type) +{ + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) + return r; + + switch (host_irq_type) { + case KVM_DEV_IRQ_HOST_INTX: + r = assigned_device_enable_host_intx(kvm, dev); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_HOST_MSI: + r = assigned_device_enable_host_msi(kvm, dev); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_HOST_MSIX: + r = assigned_device_enable_host_msix(kvm, dev); + break; +#endif + default: + r = -EINVAL; } - if (adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) - return 0; + if (!r) + dev->irq_requested_type |= host_irq_type; - if (irqchip_in_kernel(kvm)) { - if (!msi2intx) { - if (adev->irq_requested_type & - KVM_ASSIGNED_DEV_HOST_INTX) - free_irq(adev->host_irq, (void *)adev); + return r; +} - r = pci_enable_msi(adev->dev); - if (r) - return r; - } +static int assign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq, + unsigned long guest_irq_type) +{ + int id; + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) + return r; - adev->host_irq = adev->dev->irq; - if (request_irq(adev->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_msi_device", (void *)adev)) - return -EIO; + id = kvm_request_irq_source_id(kvm); + if (id < 0) + return id; + + dev->irq_source_id = id; + + switch (guest_irq_type) { + case KVM_DEV_IRQ_GUEST_INTX: + r = assigned_device_enable_guest_intx(kvm, dev, irq); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_GUEST_MSI: + r = assigned_device_enable_guest_msi(kvm, dev, irq); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_GUEST_MSIX: + r = assigned_device_enable_guest_msix(kvm, dev, irq); + break; +#endif + default: + r = -EINVAL; } - if (!msi2intx) - adev->irq_requested_type = KVM_ASSIGNED_DEV_GUEST_MSI; + if (!r) { + dev->irq_requested_type |= guest_irq_type; + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + } else + kvm_free_irq_source_id(kvm, dev->irq_source_id); - adev->irq_requested_type |= KVM_ASSIGNED_DEV_HOST_MSI; - return 0; + return r; } -#endif +/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq - *assigned_irq) + struct kvm_assigned_irq *assigned_irq) { - int r = 0; + int r = -EINVAL; struct kvm_assigned_dev_kernel *match; - u32 current_flags = 0, changed_flags; + unsigned long host_irq_type, guest_irq_type; - mutex_lock(&kvm->lock); + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + if (!irqchip_in_kernel(kvm)) + return r; + + mutex_lock(&kvm->lock); + r = -ENODEV; match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, assigned_irq->assigned_dev_id); - if (!match) { - mutex_unlock(&kvm->lock); - return -EINVAL; - } - - if (!match->irq_requested_type) { - INIT_WORK(&match->interrupt_work, - kvm_assigned_dev_interrupt_work_handler); - if (irqchip_in_kernel(kvm)) { - /* Register ack nofitier */ - match->ack_notifier.gsi = -1; - match->ack_notifier.irq_acked = - kvm_assigned_dev_ack_irq; - kvm_register_irq_ack_notifier(kvm, - &match->ack_notifier); - - /* Request IRQ source ID */ - r = kvm_request_irq_source_id(kvm); - if (r < 0) - goto out_release; - else - match->irq_source_id = r; - -#ifdef CONFIG_X86 - /* Determine host device irq type, we can know the - * result from dev->msi_enabled */ - if (msi2intx) - pci_enable_msi(match->dev); -#endif - } - } + if (!match) + goto out; - if ((match->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) && - (match->irq_requested_type & KVM_ASSIGNED_DEV_GUEST_MSI)) - current_flags |= KVM_DEV_IRQ_ASSIGN_ENABLE_MSI; + host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); + guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); - changed_flags = assigned_irq->flags ^ current_flags; + r = -EINVAL; + /* can only assign one type at a time */ + if (hweight_long(host_irq_type) > 1) + goto out; + if (hweight_long(guest_irq_type) > 1) + goto out; + if (host_irq_type == 0 && guest_irq_type == 0) + goto out; - if ((changed_flags & KVM_DEV_IRQ_ASSIGN_MSI_ACTION) || - (msi2intx && match->dev->msi_enabled)) { -#ifdef CONFIG_X86 - r = assigned_device_update_msi(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to enable " - "MSI device!\n"); - goto out_release; - } -#else - r = -ENOTTY; -#endif - } else if (assigned_irq->host_irq == 0 && match->dev->irq == 0) { - /* Host device IRQ 0 means don't support INTx */ - if (!msi2intx) { - printk(KERN_WARNING - "kvm: wait device to enable MSI!\n"); - r = 0; - } else { - printk(KERN_WARNING - "kvm: failed to enable MSI device!\n"); - r = -ENOTTY; - goto out_release; - } - } else { - /* Non-sharing INTx mode */ - r = assigned_device_update_intx(kvm, match, assigned_irq); - if (r) { - printk(KERN_WARNING "kvm: failed to enable " - "INTx device!\n"); - goto out_release; - } - } + r = 0; + if (host_irq_type) + r = assign_host_irq(kvm, match, host_irq_type); + if (r) + goto out; + if (guest_irq_type) + r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); +out: mutex_unlock(&kvm->lock); return r; -out_release: +} + +static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = -ENODEV; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + r = kvm_deassign_irq(kvm, match, assigned_irq->flags); +out: mutex_unlock(&kvm->lock); - kvm_free_assigned_device(kvm, match); return r; } @@ -427,7 +588,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, assigned_dev->assigned_dev_id); if (match) { /* device already assigned */ - r = -EINVAL; + r = -EEXIST; goto out; } @@ -464,8 +625,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, match->host_devfn = assigned_dev->devfn; match->flags = assigned_dev->flags; match->dev = dev; + spin_lock_init(&match->assigned_dev_lock); match->irq_source_id = -1; match->kvm = kvm; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); list_add(&match->list, &kvm->arch.assigned_dev_head); @@ -878,6 +1043,8 @@ static void kvm_destroy_vm(struct kvm *kvm) #endif #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm); +#else + kvm_arch_flush_shadow(kvm); #endif kvm_arch_destroy_vm(kvm); mmdrop(mm); @@ -919,9 +1086,8 @@ int __kvm_set_memory_region(struct kvm *kvm, { int r; gfn_t base_gfn; - unsigned long npages; - int largepages; - unsigned long i; + unsigned long npages, ugfn; + unsigned long largepages, i; struct kvm_memory_slot *memslot; struct kvm_memory_slot old, new; @@ -1010,6 +1176,14 @@ int __kvm_set_memory_region(struct kvm *kvm, new.lpage_info[0].write_count = 1; if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) new.lpage_info[largepages-1].write_count = 1; + ugfn = new.userspace_addr >> PAGE_SHIFT; + /* + * If the gfn and userspace address are not aligned wrt each + * other, disable large page support for this slot + */ + if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1)) + for (i = 0; i < largepages; ++i) + new.lpage_info[i].write_count = 1; } /* Allocate page dirty bitmap if needed */ @@ -1043,8 +1217,10 @@ int __kvm_set_memory_region(struct kvm *kvm, kvm_free_physmem_slot(&old, npages ? &new : NULL); /* Slot deletion case: we have to update the current slot */ + spin_lock(&kvm->mmu_lock); if (!npages) *memslot = old; + spin_unlock(&kvm->mmu_lock); #ifdef CONFIG_DMAR /* map the pages in iommu page table */ r = kvm_iommu_map_pages(kvm, base_gfn, npages); @@ -1454,12 +1630,14 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - if (kvm_cpu_has_interrupt(vcpu) || - kvm_cpu_has_pending_timer(vcpu) || - kvm_arch_vcpu_runnable(vcpu)) { + if ((kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_has_interrupt(vcpu)) || + kvm_arch_vcpu_runnable(vcpu)) { set_bit(KVM_REQ_UNHALT, &vcpu->requests); break; } + if (kvm_cpu_has_pending_timer(vcpu)) + break; if (signal_pending(current)) break; @@ -1593,6 +1771,88 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset) return 0; } +#ifdef __KVM_HAVE_MSIX +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, + struct kvm_assigned_msix_nr *entry_nr) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry_nr->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto msix_nr_out; + } + + if (adev->entries_nr == 0) { + adev->entries_nr = entry_nr->entry_nr; + if (adev->entries_nr == 0 || + adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto msix_nr_out; + } + + adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * + entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->host_msix_entries) { + r = -ENOMEM; + goto msix_nr_out; + } + adev->guest_msix_entries = kzalloc( + sizeof(struct kvm_guest_msix_entry) * + entry_nr->entry_nr, GFP_KERNEL); + if (!adev->guest_msix_entries) { + kfree(adev->host_msix_entries); + r = -ENOMEM; + goto msix_nr_out; + } + } else /* Not allowed set MSI-X number twice */ + r = -EINVAL; +msix_nr_out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0, i; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto msix_entry_out; + } + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].vector == 0 || + adev->guest_msix_entries[i].entry == entry->entry) { + adev->guest_msix_entries[i].entry = entry->entry; + adev->guest_msix_entries[i].vector = entry->gsi; + adev->host_msix_entries[i].entry = entry->entry; + break; + } + if (i == adev->entries_nr) { + r = -ENOSPC; + goto msix_entry_out; + } + +msix_entry_out: + mutex_unlock(&kvm->lock); + + return r; +} +#endif + static long kvm_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -1864,6 +2124,11 @@ static long kvm_vm_ioctl(struct file *filp, break; } case KVM_ASSIGN_IRQ: { + r = -EOPNOTSUPP; + break; + } +#ifdef KVM_CAP_ASSIGN_DEV_IRQ + case KVM_ASSIGN_DEV_IRQ: { struct kvm_assigned_irq assigned_irq; r = -EFAULT; @@ -1874,6 +2139,18 @@ static long kvm_vm_ioctl(struct file *filp, goto out; break; } + case KVM_DEASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } +#endif #endif #ifdef KVM_CAP_DEVICE_DEASSIGNMENT case KVM_DEASSIGN_PCI_DEVICE: { @@ -1917,7 +2194,29 @@ static long kvm_vm_ioctl(struct file *filp, vfree(entries); break; } +#ifdef __KVM_HAVE_MSIX + case KVM_ASSIGN_SET_MSIX_NR: { + struct kvm_assigned_msix_nr entry_nr; + r = -EFAULT; + if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) + goto out; + r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); + if (r) + goto out; + break; + } + case KVM_ASSIGN_SET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); + if (r) + goto out; + break; + } #endif +#endif /* KVM_CAP_IRQ_ROUTING */ default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -2112,15 +2411,15 @@ EXPORT_SYMBOL_GPL(kvm_handle_fault_on_reboot); static int kvm_reboot(struct notifier_block *notifier, unsigned long val, void *v) { - if (val == SYS_RESTART) { - /* - * Some (well, at least mine) BIOSes hang on reboot if - * in vmx root mode. - */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); - kvm_rebooting = true; - on_each_cpu(hardware_disable, NULL, 1); - } + /* + * Some (well, at least mine) BIOSes hang on reboot if + * in vmx root mode. + * + * And Intel TXT required VMX off for all cpu when system shutdown. + */ + printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + kvm_rebooting = true; + on_each_cpu(hardware_disable, NULL, 1); return NOTIFY_OK; } @@ -2354,9 +2653,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; -#ifndef CONFIG_X86 - msi2intx = 0; -#endif return 0; |