diff options
119 files changed, 2792 insertions, 1937 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 08687e45e19..1a7f53068ec 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -11,7 +11,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ procfs-guide.xml writing_usb_driver.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ - genericirq.xml + genericirq.xml s390-drivers.xml ### # The build process is as follows (targets): diff --git a/Documentation/DocBook/s390-drivers.tmpl b/Documentation/DocBook/s390-drivers.tmpl new file mode 100644 index 00000000000..254e769282a --- /dev/null +++ b/Documentation/DocBook/s390-drivers.tmpl @@ -0,0 +1,149 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" + "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> + +<book id="s390drivers"> + <bookinfo> + <title>Writing s390 channel device drivers</title> + + <authorgroup> + <author> + <firstname>Cornelia</firstname> + <surname>Huck</surname> + <affiliation> + <address> + <email>cornelia.huck@de.ibm.com</email> + </address> + </affiliation> + </author> + </authorgroup> + + <copyright> + <year>2007</year> + <holder>IBM Corp.</holder> + </copyright> + + <legalnotice> + <para> + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later + version. + </para> + + <para> + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + </para> + + <para> + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + </para> + + <para> + For more details see the file COPYING in the source + distribution of Linux. + </para> + </legalnotice> + </bookinfo> + +<toc></toc> + + <chapter id="intro"> + <title>Introduction</title> + <para> + This document describes the interfaces available for device drivers that + drive s390 based channel attached devices. This includes interfaces for + interaction with the hardware and interfaces for interacting with the + common driver core. Those interfaces are provided by the s390 common I/O + layer. + </para> + <para> + The document assumes a familarity with the technical terms associated + with the s390 channel I/O architecture. For a description of this + architecture, please refer to the "z/Architecture: Principles of + Operation", IBM publication no. SA22-7832. + </para> + <para> + While most I/O devices on a s390 system are typically driven through the + channel I/O mechanism described here, there are various other methods + (like the diag interface). These are out of the scope of this document. + </para> + <para> + Some additional information can also be found in the kernel source + under Documentation/s390/driver-model.txt. + </para> + </chapter> + <chapter id="ccw"> + <title>The ccw bus</title> + <para> + The ccw bus typically contains the majority of devices available to + a s390 system. Named after the channel command word (ccw), the basic + command structure used to address its devices, the ccw bus contains + so-called channel attached devices. They are addressed via subchannels, + visible on the css bus. A device driver, however, will never interact + with the subchannel directly, but only via the device on the ccw bus, + the ccw device. + </para> + <sect1 id="channelIO"> + <title>I/O functions for channel-attached devices</title> + <para> + Some hardware structures have been translated into C structures for use + by the common I/O layer and device drivers. For more information on + the hardware structures represented here, please consult the Principles + of Operation. + </para> +!Iinclude/asm-s390/cio.h + </sect1> + <sect1 id="ccwdev"> + <title>ccw devices</title> + <para> + Devices that want to initiate channel I/O need to attach to the ccw bus. + Interaction with the driver core is done via the common I/O layer, which + provides the abstractions of ccw devices and ccw device drivers. + </para> + <para> + The functions that initiate or terminate channel I/O all act upon a + ccw device structure. Device drivers must not bypass those functions + or strange side effects may happen. + </para> +!Iinclude/asm-s390/ccwdev.h +!Edrivers/s390/cio/device.c +!Edrivers/s390/cio/device_ops.c + </sect1> + <sect1 id="cmf"> + <title>The channel-measurement facility</title> + <para> + The channel-measurement facility provides a means to collect + measurement data which is made available by the channel subsystem + for each channel attached device. + </para> +!Iinclude/asm-s390/cmb.h +!Edrivers/s390/cio/cmf.c + </sect1> + </chapter> + + <chapter id="ccwgroup"> + <title>The ccwgroup bus</title> + <para> + The ccwgroup bus only contains artificial devices, created by the user. + Many networking devices (e.g. qeth) are in fact composed of several + ccw devices (like read, write and data channel for qeth). The + ccwgroup bus provides a mechanism to create a meta-device which + contains those ccw devices as slave devices and can be associated + with the netdevice. + </para> + <sect1 id="ccwgroupdevices"> + <title>ccw group devices</title> +!Iinclude/asm-s390/ccwgroup.h +!Edrivers/s390/cio/ccwgroup.c + </sect1> + </chapter> + +</book> diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index 8ee10ec8829..e79ee2db183 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -407,7 +407,7 @@ raiddev /dev/md0 device /dev/hda5 raid-disk 0 device /dev/hdb1 - raid-disl 1 + raid-disk 1 For linear raid, just change the raid-level above to "raid-level linear", for mirrors, change it to "raid-level 1", and for stripe sets with parity, change @@ -457,6 +457,8 @@ ChangeLog Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. +2.1.29: + - Fix a deadlock when mounting read-write. 2.1.28: - Fix a deadlock. 2.1.27: diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX new file mode 100644 index 00000000000..3a2b96302ec --- /dev/null +++ b/Documentation/s390/00-INDEX @@ -0,0 +1,26 @@ +00-INDEX + - this file. +3270.ChangeLog + - ChangeLog for the UTS Global 3270-support patch (outdated). +3270.txt + - how to use the IBM 3270 display system support. +cds.txt + - s390 common device support (common I/O layer). +CommonIO + - common I/O layer command line parameters, procfs and debugfs entries +config3270.sh + - example configuration for 3270 devices. +DASD + - information on the DASD disk device driver. +Debugging390.txt + - hints for debugging on s390 systems. +driver-model.txt + - information on s390 devices and the driver model. +monreader.txt + - information on accessing the z/VM monitor stream from Linux. +s390dbf.txt + - information on using the s390 debug feature. +TAPE + - information on the driver for channel-attached tapes. +zfcpdump + - information on the s390 SCSI dump tool. diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO index 22f82f21bc6..86320aa3fb0 100644 --- a/Documentation/s390/CommonIO +++ b/Documentation/s390/CommonIO @@ -1,5 +1,5 @@ -S/390 common I/O-Layer - command line parameters and /proc entries -================================================================== +S/390 common I/O-Layer - command line parameters, procfs and debugfs entries +============================================================================ Command line parameters ----------------------- @@ -7,9 +7,9 @@ Command line parameters * cio_msg = yes | no Determines whether information on found devices and sensed device - characteristics should be shown during startup, i. e. messages of the types - "Detected device 0.0.4711 on subchannel 0.0.0042" and "SenseID: Device - 0.0.4711 reports: ...". + characteristics should be shown during startup or when new devices are + found, i. e. messages of the types "Detected device 0.0.4711 on subchannel + 0.0.0042" and "SenseID: Device 0.0.4711 reports: ...". Default is off. @@ -26,8 +26,10 @@ Command line parameters An ignored device can be un-ignored later; see the "/proc entries"-section for details. - The devices must be given either as bus ids (0.0.abcd) or as hexadecimal - device numbers (0xabcd or abcd, for 2.4 backward compatibility). + The devices must be given either as bus ids (0.x.abcd) or as hexadecimal + device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you + give a device number 0xabcd, it will be interpreted as 0.0.abcd. + You can use the 'all' keyword to ignore all devices. The '!' operator will cause the I/O-layer to _not_ ignore a device. The command line is parsed from left to right. @@ -81,31 +83,36 @@ Command line parameters will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored devices. - The devices can be specified either by bus id (0.0.abcd) or, for 2.4 backward - compatibility, by the device number in hexadecimal (0xabcd or abcd). + The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward + compatibility, by the device number in hexadecimal (0xabcd or abcd). Device + numbers given as 0xabcd will be interpreted as 0.0.abcd. + +* For some of the information present in the /proc filesystem in 2.4 (namely, + /proc/subchannels and /proc/chpids), see driver-model.txt. + Information formerly in /proc/irq_count is now in /proc/interrupts. + +debugfs entries +--------------- -* /proc/s390dbf/cio_*/ (S/390 debug feature) +* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature) Some views generated by the debug feature to hold various debug outputs. - - /proc/s390dbf/cio_crw/sprintf + - /sys/kernel/debug/s390dbf/cio_crw/sprintf Messages from the processing of pending channel report words (machine check - handling), which will also show when CONFIG_DEBUG_CRW is defined. + handling). - - /proc/s390dbf/cio_msg/sprintf - Various debug messages from the common I/O-layer; generally, messages which - will also show when CONFIG_DEBUG_IO is defined. + - /sys/kernel/debug/s390dbf/cio_msg/sprintf + Various debug messages from the common I/O-layer, including messages + printed when cio_msg=yes. - - /proc/s390dbf/cio_trace/hex_ascii + - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii Logs the calling of functions in the common I/O-layer and, if applicable, which subchannel they were called for, as well as dumps of some data structures (like irb in an error case). The level of logging can be changed to be more or less verbose by piping to - /proc/s390dbf/cio_*/level a number between 0 and 6; see the documentation on - the S/390 debug feature (Documentation/s390/s390dbf.txt) for details. - -* For some of the information present in the /proc filesystem in 2.4 (namely, - /proc/subchannels and /proc/chpids), see driver-model.txt. - Information formerly in /proc/irq_count is now in /proc/interrupts. + /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the + documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt) + for details. diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt index 58919d6a593..3081927cc2d 100644 --- a/Documentation/s390/cds.txt +++ b/Documentation/s390/cds.txt @@ -286,10 +286,10 @@ first: timeout value -EIO: the common I/O layer terminated the request due to an error state -If the concurrent sense flag in the extended status word in the irb is set, the -field irb->scsw.count describes the number of device specific sense bytes -available in the extended control word irb->scsw.ecw[0]. No device sensing by -the device driver itself is required. +If the concurrent sense flag in the extended status word (esw) in the irb is +set, the field erw.scnt in the esw describes the number of device specific +sense bytes available in the extended control word irb->scsw.ecw[]. No device +sensing by the device driver itself is required. The device interrupt handler can use the following definitions to investigate the primary unit check source coded in sense byte 0 : diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index 62391fb1f61..ac61cf43a7d 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -547,8 +547,7 @@ static void __cpuinit appldata_online_cpu(int cpu) spin_unlock(&appldata_timer_lock); } -static void -appldata_offline_cpu(int cpu) +static void __cpuinit appldata_offline_cpu(int cpu) { del_virt_timer(&per_cpu(appldata_timer, cpu)); if (atomic_dec_and_test(&appldata_expire_count)) { @@ -560,9 +559,9 @@ appldata_offline_cpu(int cpu) spin_unlock(&appldata_timer_lock); } -static int __cpuinit -appldata_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +static int __cpuinit appldata_cpu_notify(struct notifier_block *self, + unsigned long action, + void *hcpu) { switch (action) { case CPU_ONLINE: @@ -608,63 +607,15 @@ static int __init appldata_init(void) register_hotcpu_notifier(&appldata_nb); appldata_sysctl_header = register_sysctl_table(appldata_dir_table); -#ifdef MODULE - appldata_dir_table[0].de->owner = THIS_MODULE; - appldata_table[0].de->owner = THIS_MODULE; - appldata_table[1].de->owner = THIS_MODULE; -#endif P_DEBUG("Base interface initialized.\n"); return 0; } -/* - * appldata_exit() - * - * stop timer, unregister /proc entries - */ -static void __exit appldata_exit(void) -{ - struct list_head *lh; - struct appldata_ops *ops; - int rc, i; +__initcall(appldata_init); - P_DEBUG("Unloading module ...\n"); - /* - * ops list should be empty, but just in case something went wrong... - */ - spin_lock(&appldata_ops_lock); - list_for_each(lh, &appldata_ops_list) { - ops = list_entry(lh, struct appldata_ops, list); - rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, - (unsigned long) ops->data, ops->size, - ops->mod_lvl); - if (rc != 0) { - P_ERROR("STOP DIAG 0xDC for %s failed, " - "return code: %d\n", ops->name, rc); - } - } - spin_unlock(&appldata_ops_lock); - - for_each_online_cpu(i) - appldata_offline_cpu(i); - - appldata_timer_active = 0; - - unregister_sysctl_table(appldata_sysctl_header); - - destroy_workqueue(appldata_wq); - P_DEBUG("... module unloaded!\n"); -} /**************************** init / exit <END> ******************************/ - -module_init(appldata_init); -module_exit(appldata_exit); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Gerald Schaefer"); -MODULE_DESCRIPTION("Linux-VM Monitor Stream, base infrastructure"); - EXPORT_SYMBOL_GPL(appldata_register_ops); EXPORT_SYMBOL_GPL(appldata_unregister_ops); EXPORT_SYMBOL_GPL(appldata_diag); diff --git a/arch/s390/kernel/audit.c b/arch/s390/kernel/audit.c index d1c76fe10f2..f4932c22ebe 100644 --- a/arch/s390/kernel/audit.c +++ b/arch/s390/kernel/audit.c @@ -2,6 +2,7 @@ #include <linux/types.h> #include <linux/audit.h> #include <asm/unistd.h> +#include "audit.h" static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> @@ -40,7 +41,6 @@ int audit_classify_arch(int arch) int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_COMPAT - extern int s390_classify_syscall(unsigned); if (abi == AUDIT_ARCH_S390) return s390_classify_syscall(syscall); #endif @@ -61,11 +61,6 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { #ifdef CONFIG_COMPAT - extern __u32 s390_dir_class[]; - extern __u32 s390_write_class[]; - extern __u32 s390_read_class[]; - extern __u32 s390_chattr_class[]; - extern __u32 s390_signal_class[]; audit_register_class(AUDIT_CLASS_WRITE_32, s390_write_class); audit_register_class(AUDIT_CLASS_READ_32, s390_read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE_32, s390_dir_class); diff --git a/arch/s390/kernel/audit.h b/arch/s390/kernel/audit.h new file mode 100644 index 00000000000..12b56f4b5a7 --- /dev/null +++ b/arch/s390/kernel/audit.h @@ -0,0 +1,15 @@ +#ifndef __ARCH_S390_KERNEL_AUDIT_H +#define __ARCH_S390_KERNEL_AUDIT_H + +#include <linux/types.h> + +#ifdef CONFIG_COMPAT +extern int s390_classify_syscall(unsigned); +extern __u32 s390_dir_class[]; +extern __u32 s390_write_class[]; +extern __u32 s390_read_class[]; +extern __u32 s390_chattr_class[]; +extern __u32 s390_signal_class[]; +#endif /* CONFIG_COMPAT */ + +#endif /* __ARCH_S390_KERNEL_AUDIT_H */ diff --git a/arch/s390/kernel/compat_audit.c b/arch/s390/kernel/compat_audit.c index 0569f5126e4..d6487bf879e 100644 --- a/arch/s390/kernel/compat_audit.c +++ b/arch/s390/kernel/compat_audit.c @@ -1,5 +1,6 @@ #undef __s390x__ #include <asm/unistd.h> +#include "audit.h" unsigned s390_dir_class[] = { #include <asm-generic/audit_dir_write.h> diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 6c89f30c8e3..d8c1131e081 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -2,7 +2,7 @@ * arch/s390/kernel/cpcmd.c * * S390 version - * Copyright (C) 1999,2005 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999,2007 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), * Christian Borntraeger (cborntra@de.ibm.com), */ @@ -21,6 +21,49 @@ static DEFINE_SPINLOCK(cpcmd_lock); static char cpcmd_buf[241]; +static int diag8_noresponse(int cmdlen) +{ + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = cmdlen; + + asm volatile( +#ifndef CONFIG_64BIT + " diag %1,%0,0x8\n" +#else /* CONFIG_64BIT */ + " sam31\n" + " diag %1,%0,0x8\n" + " sam64\n" +#endif /* CONFIG_64BIT */ + : "+d" (reg3) : "d" (reg2) : "cc"); + return reg3; +} + +static int diag8_response(int cmdlen, char *response, int *rlen) +{ + register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; + register unsigned long reg3 asm ("3") = (addr_t) response; + register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; + register unsigned long reg5 asm ("5") = *rlen; + + asm volatile( +#ifndef CONFIG_64BIT + " diag %2,%0,0x8\n" + " brc 8,1f\n" + " ar %1,%4\n" +#else /* CONFIG_64BIT */ + " sam31\n" + " diag %2,%0,0x8\n" + " sam64\n" + " brc 8,1f\n" + " agr %1,%4\n" +#endif /* CONFIG_64BIT */ + "1:\n" + : "+d" (reg4), "+d" (reg5) + : "d" (reg2), "d" (reg3), "d" (*rlen) : "cc"); + *rlen = reg5; + return reg4; +} + /* * __cpcmd has some restrictions over cpcmd * - the response buffer must reside below 2GB (if any) @@ -28,59 +71,27 @@ static char cpcmd_buf[241]; */ int __cpcmd(const char *cmd, char *response, int rlen, int *response_code) { - unsigned cmdlen; - int return_code, return_len; + int cmdlen; + int rc; + int response_len; cmdlen = strlen(cmd); BUG_ON(cmdlen > 240); memcpy(cpcmd_buf, cmd, cmdlen); ASCEBC(cpcmd_buf, cmdlen); - if (response != NULL && rlen > 0) { - register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; - register unsigned long reg3 asm ("3") = (addr_t) response; - register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L; - register unsigned long reg5 asm ("5") = rlen; - + if (response) { memset(response, 0, rlen); - asm volatile( -#ifndef CONFIG_64BIT - " diag %2,%0,0x8\n" - " brc 8,1f\n" - " ar %1,%4\n" -#else /* CONFIG_64BIT */ - " sam31\n" - " diag %2,%0,0x8\n" - " sam64\n" - " brc 8,1f\n" - " agr %1,%4\n" -#endif /* CONFIG_64BIT */ - "1:\n" - : "+d" (reg4), "+d" (reg5) - : "d" (reg2), "d" (reg3), "d" (rlen) : "cc"); - return_code = (int) reg4; - return_len = (int) reg5; - EBCASC(response, rlen); + response_len = rlen; + rc = diag8_response(cmdlen, response, &rlen); + EBCASC(response, response_len); } else { - register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf; - register unsigned long reg3 asm ("3") = cmdlen; - return_len = 0; - asm volatile( -#ifndef CONFIG_64BIT - " diag %1,%0,0x8\n" -#else /* CONFIG_64BIT */ - " sam31\n" - " diag %1,%0,0x8\n" - " sam64\n" -#endif /* CONFIG_64BIT */ - : "+d" (reg3) : "d" (reg2) : "cc"); - return_code = (int) reg3; + rc = diag8_noresponse(cmdlen); } - if (response_code != NULL) - *response_code = return_code; - return return_len; + if (response_code) + *response_code = rc; + return rlen; } - EXPORT_SYMBOL(__cpcmd); int cpcmd(const char *cmd, char *response, int rlen, int *response_code) @@ -109,5 +120,4 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) } return len; } - EXPORT_SYMBOL(cpcmd); diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 50d2235df73..c14a336f630 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1162,6 +1162,7 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) unsigned int value; char separator; char *ptr; + int i; ptr = buffer; insn = find_insn(code); @@ -1169,7 +1170,8 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%.5s\t", insn->name); /* Extract the operands. */ separator = 0; - for (ops = formats[insn->format] + 1; *ops != 0; ops++) { + for (ops = formats[insn->format] + 1, i = 0; + *ops != 0 && i < 6; ops++, i++) { operand = operands + *ops; value = extract_operand(code, operand); if ((operand->flags & OPERAND_INDEX) && value == 0) @@ -1241,7 +1243,6 @@ void show_code(struct pt_regs *regs) } /* Find a starting point for the disassembly. */ while (start < 32) { - hops = 0; for (i = 0, hops = 0; start + i < 32 && hops < 3; hops++) { if (!find_insn(code + start + i)) break; diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 8b8f136d9cc..66b51901c87 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -735,10 +735,10 @@ void do_reipl(void) case REIPL_METHOD_CCW_VM: reipl_get_ascii_loadparm(loadparm); if (strlen(loadparm) == 0) - sprintf(buf, "IPL %X", + sprintf(buf, "IPL %X CLEAR", reipl_block_ccw->ipl_info.ccw.devno); else - sprintf(buf, "IPL %X LOADPARM '%s'", + sprintf(buf, "IPL %X CLEAR LOADPARM '%s'", reipl_block_ccw->ipl_info.ccw.devno, loadparm); __cpcmd(buf, NULL, 0, NULL); break; diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index b4622a3889b..849120e3e28 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -2,6 +2,7 @@ * Written by Martin Schwidefsky (schwidefsky@de.ibm.com) */ +#include <asm/page.h> #include <asm-generic/vmlinux.lds.h> #ifndef CONFIG_64BIT @@ -18,121 +19,142 @@ jiffies = jiffies_64; SECTIONS { - . = 0x00000000; - _text = .; /* Text and read-only data */ - .text : { - *(.text.head) + . = 0x00000000; + .text : { + _text = .; /* Text and read-only data */ + *(.text.head) TEXT_TEXT - SCHED_TEXT - LOCK_TEXT - KPROBES_TEXT - *(.fixup) - *(.gnu.warning) + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + *(.fixup) + *(.gnu.warning) } = 0x0700 - _etext = .; /* End of text section */ + _etext = .; /* End of text section */ - RODATA + RODATA #ifdef CONFIG_SHARED_KERNEL - . = ALIGN(1048576); /* VM shared segments are 1MB aligned */ + . = ALIGN(0x100000); /* VM shared segments are 1MB aligned */ #endif - . = ALIGN(4096); - _eshared = .; /* End of shareable data */ - - . = ALIGN(16); /* Exception table */ - __start___ex_table = .; - __ex_table : { *(__ex_table) } - __stop___ex_table = .; - - NOTES - - BUG_TABLE - - .data : { /* Data */ - DATA_DATA - CONSTRUCTORS - } - - . = ALIGN(4096); - __nosave_begin = .; - .data_nosave : { *(.data.nosave) } - . = ALIGN(4096); - __nosave_end = .; - - . = ALIGN(4096); - .data.page_aligned : { *(.data.idt) } - - . = ALIGN(256); - .data.cacheline_aligned : { *(.data.cacheline_aligned) } - - . = ALIGN(256); - .data.read_mostly : { *(.data.read_mostly) } - _edata = .; /* End of data section */ - - . = ALIGN(8192); /* init_task */ - .data.init_task : { *(.data.init_task) } - - /* will be freed after init */ - . = ALIGN(4096); /* Init code and data */ - __init_begin = .; - .init.text : { - _sinittext = .; - *(.init.text) - _einittext = .; - } - /* - * .exit.text is discarded at runtime, not link time, - * to deal with references from __bug_table - */ - .exit.text : { *(.exit.text) } - - .init.data : { *(.init.data) } - . = ALIGN(256); - __setup_start = .; - .init.setup : { *(.init.setup) } - __setup_end = .; - __initcall_start = .; - .initcall.init : { - INITCALLS - } - __initcall_end = .; - __con_initcall_start = .; - .con_initcall.init : { *(.con_initcall.init) } - __con_initcall_end = .; - SECURITY_INIT + . = ALIGN(PAGE_SIZE); + _eshared = .; /* End of shareable data */ + + . = ALIGN(16); /* Exception table */ + __ex_table : { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + NOTES + BUG_TABLE + + .data : { /* Data */ + DATA_DATA + CONSTRUCTORS + } + + . = ALIGN(PAGE_SIZE); + .data_nosave : { + __nosave_begin = .; + *(.data.nosave) + } + . = ALIGN(PAGE_SIZE); + __nosave_end = .; + + . = ALIGN(PAGE_SIZE); + .data.page_aligned : { + *(.data.idt) + } + + . = ALIGN(0x100); + .data.cacheline_aligned : { + *(.data.cacheline_aligned) + } + + . = ALIGN(0x100); + .data.read_mostly : { + *(.data.read_mostly) + } + _edata = .; /* End of data section */ + + . = ALIGN(2 * PAGE_SIZE); /* init_task */ + .data.init_task : { + *(.data.init_task) + } + + /* will be freed after init */ + . = ALIGN(PAGE_SIZE); /* Init code and data */ + __init_begin = .; + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } + /* + * .exit.text is discarded at runtime, not link time, + * to deal with references from __bug_table + */ + .exit.text : { + *(.exit.text) + } + + .init.data : { + *(.init.data) + } + . = ALIGN(0x100); + .init.setup : { + __setup_start = .; + *(.init.setup) + __setup_end = .; + } + .initcall.init : { + __initcall_start = .; + INITCALLS + __initcall_end = .; + } + + .con_initcall.init : { + __con_initcall_start = .; + *(.con_initcall.init) + __con_initcall_end = .; + } + SECURITY_INIT #ifdef CONFIG_BLK_DEV_INITRD - . = ALIGN(256); - __initramfs_start = .; - .init.ramfs : { *(.init.initramfs) } - . = ALIGN(2); - __initramfs_end = .; + . = ALIGN(0x100); + .init.ramfs : { + __initramfs_start = .; + *(.init.ramfs) + . = ALIGN(2); + __initramfs_end = .; + } #endif - PERCPU(4096) - . = ALIGN(4096); - __init_end = .; - /* freed after init ends here */ - - __bss_start = .; /* BSS */ - .bss : { *(.bss) } - . = ALIGN(2); - __bss_stop = .; - - _end = . ; - - /* Sections to be discarded */ - /DISCARD/ : { - *(.exit.data) *(.exitcall.exit) - } - - /* Stabs debugging sections. */ - .stab 0 : { *(.stab) } - .stabstr 0 : { *(.stabstr) } - .stab.excl 0 : { *(.stab.excl) } - .stab.exclstr 0 : { *(.stab.exclstr) } - .stab.index 0 : { *(.stab.index) } - .stab.indexstr 0 : { *(.stab.indexstr) } - .comment 0 : { *(.comment) } + + PERCPU(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); + __init_end = .; /* freed after init ends here */ + + /* BSS */ + .bss : { + __bss_start = .; + *(.bss) + . = ALIGN(2); + __bss_stop = .; + } + + _end = . ; + + /* Sections to be discarded */ + /DISCARD/ : { + *(.exit.data) + *(.exitcall.exit) + } + + /* Debugging sections. */ + STABS_DEBUG + DWARF_DEBUG } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 54055194e9a..4c1ac341ec8 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -468,7 +468,7 @@ typedef struct { __u64 refselmk; __u64 refcmpmk; __u64 reserved; -} __attribute__ ((packed)) pfault_refbk_t; +} __attribute__ ((packed, aligned(8))) pfault_refbk_t; int pfault_init(void) { diff --git a/block/Kconfig b/block/Kconfig index 2484e0e9d89..e10895647f7 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -62,6 +62,10 @@ config BLK_DEV_BSG protocols (e.g. Task Management Functions and SMP in Serial Attached SCSI). +config BLOCK_COMPAT + bool + default y + endif # BLOCK source block/Kconfig.iosched diff --git a/block/Makefile b/block/Makefile index 3cfe7cebaa6..826108190f0 100644 --- a/block/Makefile +++ b/block/Makefile @@ -11,4 +11,4 @@ obj-$(CONFIG_IOSCHED_DEADLINE) += deadline-iosched.o obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o -obj-$(CONFIG_COMPAT) += compat_ioctl.o +obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 4245b7f80a4..ca4d7f0d09b 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -361,8 +361,7 @@ config BLK_DEV_RAM_SIZE default "4096" help The default value is 4096 kilobytes. Only change this if you know - what are you doing. If you are using IBM S/390, then set this to - 8192. + what are you doing. config BLK_DEV_RAM_BLOCKSIZE int "Default RAM disk block size (bytes)" diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 64fbc9759a3..c65d203a846 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -143,7 +143,7 @@ void mmc_remove_host(struct mmc_host *host) device_del(&host->class_dev); - led_trigger_unregister(host->led); + led_trigger_unregister_simple(host->led); spin_lock(&mmc_host_lock); idr_remove(&mmc_host_idr, host->index); diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index d68accea380..78ed633ceb8 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -2652,10 +2652,10 @@ static int bnx2_poll_work(struct bnx2 *bp, int work_done, int budget) REG_RD(bp, BNX2_HC_COMMAND); } - if (bp->status_blk->status_tx_quick_consumer_index0 != bp->hw_tx_cons) + if (sblk->status_tx_quick_consumer_index0 != bp->hw_tx_cons) bnx2_tx_int(bp); - if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) + if (sblk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) work_done += bnx2_rx_int(bp, budget - work_done); return work_done; @@ -2665,6 +2665,7 @@ static int bnx2_poll(struct napi_struct *napi, int budget) { struct bnx2 *bp = container_of(napi, struct bnx2, napi); int work_done = 0; + struct status_block *sblk = bp->status_blk; while (1) { work_done = bnx2_poll_work(bp, work_done, budget); @@ -2672,16 +2673,19 @@ static int bnx2_poll(struct napi_struct *napi, int budget) if (unlikely(work_done >= budget)) break; + /* bp->last_status_idx is used below to tell the hw how + * much work has been processed, so we must read it before + * checking for more work. + */ + bp->last_status_idx = sblk->status_idx; + rmb(); if (likely(!bnx2_has_work(bp))) { - bp->last_status_idx = bp->status_blk->status_idx; - rmb(); - netif_rx_complete(bp->dev, napi); if (likely(bp->flags & USING_MSI_FLAG)) { REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | bp->last_status_idx); - return 0; + break; } REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index e795c33b982..30b1cca8144 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3576,7 +3576,7 @@ static int tg3_poll_work(struct tg3 *tp, int work_done, int budget) if (sblk->idx[0].tx_consumer != tp->tx_cons) { tg3_tx(tp); if (unlikely(tp->tg3_flags & TG3_FLAG_TX_RECOVERY_PENDING)) - return 0; + return work_done; } /* run RX thread, within the bounds set by NAPI. @@ -3593,6 +3593,7 @@ static int tg3_poll(struct napi_struct *napi, int budget) { struct tg3 *tp = container_of(napi, struct tg3, napi); int work_done = 0; + struct tg3_hw_status *sblk = tp->hw_status; while (1) { work_done = tg3_poll_work(tp, work_done, budget); @@ -3603,15 +3604,17 @@ static int tg3_poll(struct napi_struct *napi, int budget) if (unlikely(work_done >= budget)) break; - if (likely(!tg3_has_work(tp))) { - struct tg3_hw_status *sblk = tp->hw_status; - - if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) { - tp->last_tag = sblk->status_tag; - rmb(); - } else - sblk->status &= ~SD_STATUS_UPDATED; + if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) { + /* tp->last_tag is used in tg3_restart_ints() below + * to tell the hw how much work has been processed, + * so we must read it before checking for more work. + */ + tp->last_tag = sblk->status_tag; + rmb(); + } else + sblk->status &= ~SD_STATUS_UPDATED; + if (likely(!tg3_has_work(tp))) { netif_rx_complete(tp->dev, napi); tg3_restart_ints(tp); break; @@ -3621,9 +3624,10 @@ static int tg3_poll(struct napi_struct *napi, int budget) return work_done; tx_recovery: + /* work_done is guaranteed to be less than budget. */ netif_rx_complete(tp->dev, napi); schedule_work(&tp->reset_task); - return 0; + return work_done; } static void tg3_irq_quiesce(struct tg3 *tp) diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index aeda5268244..d427daeef51 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -53,6 +53,7 @@ #include <linux/genhd.h> #include <linux/hdreg.h> #include <linux/interrupt.h> +#include <linux/log2.h> #include <asm/ccwdev.h> #include <linux/workqueue.h> #include <asm/debug.h> @@ -456,7 +457,7 @@ dasd_free_chunk(struct list_head *chunk_list, void *mem) static inline int dasd_check_blocksize(int bsize) { - if (bsize < 512 || bsize > 4096 || (bsize & (bsize - 1)) != 0) + if (bsize < 512 || bsize > 4096 || !is_power_of_2(bsize)) return -EMEDIUMTYPE; return 0; } diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c index 0fbacc8b106..f231bc21b1c 100644 --- a/drivers/s390/block/xpram.c +++ b/drivers/s390/block/xpram.c @@ -230,7 +230,7 @@ static int xpram_make_request(struct request_queue *q, struct bio *bio) } } set_bit(BIO_UPTODATE, &bio->bi_flags); - bio_end_io(bio, 0); + bio_endio(bio, 0); return 0; fail: bio_io_error(bio); diff --git a/drivers/s390/char/con3215.c b/drivers/s390/char/con3215.c index 6000bdee408..0e1f35c9ed9 100644 --- a/drivers/s390/char/con3215.c +++ b/drivers/s390/char/con3215.c @@ -667,6 +667,9 @@ raw3215_probe (struct ccw_device *cdev) struct raw3215_info *raw; int line; + /* Console is special. */ + if (raw3215[0] && (cdev->dev.driver_data == raw3215[0])) + return 0; raw = kmalloc(sizeof(struct raw3215_info) + RAW3215_INBUF_SIZE, GFP_KERNEL|GFP_DMA); if (raw == NULL) diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c index fd3479119eb..0b040557db0 100644 --- a/drivers/s390/char/con3270.c +++ b/drivers/s390/char/con3270.c @@ -22,6 +22,7 @@ #include <asm/ebcdic.h> #include "raw3270.h" +#include "tty3270.h" #include "ctrlchar.h" #define CON3270_OUTPUT_BUFFER_SIZE 1024 @@ -507,8 +508,6 @@ con3270_write(struct console *co, const char *str, unsigned int count) spin_unlock_irqrestore(&cp->view.lock,flags); } -extern struct tty_driver *tty3270_driver; - static struct tty_driver * con3270_device(struct console *c, int *index) { diff --git a/drivers/s390/char/sclp.c b/drivers/s390/char/sclp.c index fa62e694405..25629b92dec 100644 --- a/drivers/s390/char/sclp.c +++ b/drivers/s390/char/sclp.c @@ -93,6 +93,7 @@ static volatile enum sclp_mask_state_t { #define SCLP_RETRY_INTERVAL 30 static void sclp_process_queue(void); +static void __sclp_make_read_req(void); static int sclp_init_mask(int calculate); static int sclp_init(void); @@ -115,7 +116,6 @@ sclp_service_call(sclp_cmdw_t command, void *sccb) return 0; } -static inline void __sclp_make_read_req(void); static void __sclp_queue_read_req(void) @@ -318,8 +318,7 @@ sclp_read_cb(struct sclp_req *req, void *data) } /* Prepare read event data request. Called while sclp_lock is locked. */ -static inline void -__sclp_make_read_req(void) +static void __sclp_make_read_req(void) { struct sccb_header *sccb; diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c index 9f244c591ee..da25f8e2415 100644 --- a/drivers/s390/char/tape_3590.c +++ b/drivers/s390/char/tape_3590.c @@ -708,16 +708,22 @@ static void tape_3590_med_state_set(struct tape_device *device, c_info = &TAPE_3590_CRYPT_INFO(device); - if (sense->masst == MSENSE_UNASSOCIATED) { + DBF_EVENT(6, "medium state: %x:%x\n", sense->macst, sense->masst); + switch (sense->macst) { + case 0x04: + case 0x05: + case 0x06: tape_med_state_set(device, MS_UNLOADED); TAPE_3590_CRYPT_INFO(device).medium_status = 0; return; - } - if (sense->masst != MSENSE_ASSOCIATED_MOUNT) { - PRINT_ERR("Unknown medium state: %x\n", sense->masst); + case 0x08: + case 0x09: + tape_med_state_set(device, MS_LOADED); + break; + default: + tape_med_state_set(device, MS_UNKNOWN); return; } - tape_med_state_set(device, MS_LOADED); c_info->medium_status |= TAPE390_MEDIUM_LOADED_MASK; if (sense->flags & MSENSE_CRYPT_MASK) { PRINT_INFO("Medium is encrypted (%04x)\n", sense->flags); @@ -835,15 +841,17 @@ tape_3590_unsolicited_irq(struct tape_device *device, struct irb *irb) /* Probably result of halt ssch */ return TAPE_IO_PENDING; else if (irb->scsw.dstat == 0x85) - /* Device Ready -> check medium state */ - tape_3590_schedule_work(device, TO_MSEN); - else if (irb->scsw.dstat & DEV_STAT_ATTENTION) + /* Device Ready */ + DBF_EVENT(3, "unsol.irq! tape ready: %08x\n", device->cdev_id); + else if (irb->scsw.dstat & DEV_STAT_ATTENTION) { tape_3590_schedule_work(device, TO_READ_ATTMSG); - else { + } else { DBF_EVENT(3, "unsol.irq! dev end: %08x\n", device->cdev_id); PRINT_WARN("Unsolicited IRQ (Device End) caught.\n"); tape_dump_sense(device, NULL, irb); } + /* check medium state */ + tape_3590_schedule_work(device, TO_MSEN); return TAPE_IO_SUCCESS; } diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c index bc33068b9ce..70b1980a08b 100644 --- a/drivers/s390/char/tty3270.c +++ b/drivers/s390/char/tty3270.c @@ -25,8 +25,8 @@ #include <asm/ebcdic.h> #include <asm/uaccess.h> - #include "raw3270.h" +#include "tty3270.h" #include "keyboard.h" #define TTY3270_CHAR_BUF_SIZE 256 @@ -1338,8 +1338,11 @@ tty3270_getpar(struct tty3270 *tp, int ix) static void tty3270_goto_xy(struct tty3270 *tp, int cx, int cy) { - tp->cx = min_t(int, tp->view.cols - 1, max_t(int, 0, cx)); - cy = min_t(int, tp->view.rows - 3, max_t(int, 0, cy)); + int max_cx = max(0, cx); + int max_cy = max(0, cy); + + tp->cx = min_t(int, tp->view.cols - 1, max_cx); + cy = min_t(int, tp->view.rows - 3, max_cy); if (cy != tp->cy) { tty3270_convert_line(tp, tp->cy); tp->cy = cy; diff --git a/drivers/s390/char/tty3270.h b/drivers/s390/char/tty3270.h new file mode 100644 index 00000000000..799da57f039 --- /dev/null +++ b/drivers/s390/char/tty3270.h @@ -0,0 +1,16 @@ +/* + * drivers/s390/char/tty3270.h + * + * Copyright IBM Corp. 2007 + * + */ + +#ifndef __DRIVERS_S390_CHAR_TTY3270_H +#define __DRIVERS_S390_CHAR_TTY3270_H + +#include <linux/tty.h> +#include <linux/tty_driver.h> + +extern struct tty_driver *tty3270_driver; + +#endif /* __DRIVERS_S390_CHAR_TTY3270_H */ diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c index 680b9b58b80..6f40facb1c4 100644 --- a/drivers/s390/char/vmwatchdog.c +++ b/drivers/s390/char/vmwatchdog.c @@ -66,8 +66,8 @@ static int __diag288(enum vmwdt_func func, unsigned int timeout, "0: la %0,0\n" "1:\n" EX_TABLE(0b,1b) - : "=d" (err) : "d"(__func), "d"(__timeout), - "d"(__cmdp), "d"(__cmdl), "0" (-EINVAL) : "1", "cc"); + : "+d" (err) : "d"(__func), "d"(__timeout), + "d"(__cmdp), "d"(__cmdl) : "1", "cc"); return err; } diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 3712ede1672..7073daf7798 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -141,15 +141,16 @@ static int memcpy_real(void *dest, unsigned long src, size_t count) if (count == 0) return 0; - flags = __raw_local_irq_stnsm(0xf8); /* switch to real mode */ + flags = __raw_local_irq_stnsm(0xf8UL); /* switch to real mode */ asm volatile ( "0: mvcle %1,%2,0x0\n" "1: jo 0b\n" " lhi %0,0x0\n" "2:\n" EX_TABLE(1b,2b) - : "+d" (rc) - : "d" (_dest), "d" (_src), "d" (_len1), "d" (_len2) + : "+d" (rc), "+d" (_dest), "+d" (_src), "+d" (_len1), + "+d" (_len2), "=m" (*((long*)dest)) + : "m" (*((long*)src)) : "cc", "memory"); __raw_local_irq_ssm(flags); diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index b0a18f5176a..9c3b9ea1e66 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -152,16 +152,24 @@ __ccwgroup_create_symlinks(struct ccwgroup_device *gdev) return 0; } -/* - * try to add a new ccwgroup device for one driver - * argc and argv[] are a list of bus_id's of devices - * belonging to the driver. +/** + * ccwgroup_create() - create and register a ccw group device + * @root: parent device for the new device + * @creator_id: identifier of creating driver + * @cdrv: ccw driver of slave devices + * @argc: number of slave devices + * @argv: bus ids of slave devices + * + * Create and register a new ccw group device as a child of @root. Slave + * devices are obtained from the list of bus ids given in @argv[] and must all + * belong to @cdrv. + * Returns: + * %0 on success and an error code on failure. + * Context: + * non-atomic */ -int -ccwgroup_create(struct device *root, - unsigned int creator_id, - struct ccw_driver *cdrv, - int argc, char *argv[]) +int ccwgroup_create(struct device *root, unsigned int creator_id, + struct ccw_driver *cdrv, int argc, char *argv[]) { struct ccwgroup_device *gdev; int i; @@ -390,8 +398,13 @@ static struct bus_type ccwgroup_bus_type = { .remove = ccwgroup_remove, }; -int -ccwgroup_driver_register (struct ccwgroup_driver *cdriver) +/** + * ccwgroup_driver_register() - register a ccw group driver + * @cdriver: driver to be registered + * + * This function is mainly a wrapper around driver_register(). + */ +int ccwgroup_driver_register(struct ccwgroup_driver *cdriver) { /* register our new driver with the core */ cdriver->driver.bus = &ccwgroup_bus_type; @@ -406,8 +419,13 @@ __ccwgroup_match_all(struct device *dev, void *data) return 1; } -void -ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver) +/** + * ccwgroup_driver_unregister() - deregister a ccw group driver + * @cdriver: driver to be deregistered + * + * This function is mainly a wrapper around driver_unregister(). + */ +void ccwgroup_driver_unregister(struct ccwgroup_driver *cdriver) { struct device *dev; @@ -427,8 +445,16 @@ ccwgroup_driver_unregister (struct ccwgroup_driver *cdriver) driver_unregister(&cdriver->driver); } -int -ccwgroup_probe_ccwdev(struct ccw_device *cdev) +/** + * ccwgroup_probe_ccwdev() - probe function for slave devices + * @cdev: ccw device to be probed + * + * This is a dummy probe function for ccw devices that are slave devices in + * a ccw group device. + * Returns: + * always %0 + */ +int ccwgroup_probe_ccwdev(struct ccw_device *cdev) { return 0; } @@ -452,8 +478,15 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev) return NULL; } -void -ccwgroup_remove_ccwdev(struct ccw_device *cdev) +/** + * ccwgroup_remove_ccwdev() - remove function for slave devices + * @cdev: ccw device to be removed + * + * This is a remove function for ccw devices that are slave devices in a ccw + * group device. It sets the ccw device offline and also deregisters the + * embedding ccw group device. + */ +void ccwgroup_remove_ccwdev(struct ccw_device *cdev) { struct ccwgroup_device *gdev; diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c index 920dd71e643..42c1f4659ad 100644 --- a/drivers/s390/cio/chp.c +++ b/drivers/s390/cio/chp.c @@ -14,7 +14,7 @@ #include <linux/jiffies.h> #include <linux/wait.h> #include <linux/mutex.h> -#include <asm/errno.h> +#include <linux/errno.h> #include <asm/chpid.h> #include <asm/sclp.h> @@ -55,7 +55,7 @@ static wait_queue_head_t cfg_wait_queue; /* Return channel_path struct for given chpid. */ static inline struct channel_path *chpid_to_chp(struct chp_id chpid) { - return css[chpid.cssid]->chps[chpid.id]; + return channel_subsystems[chpid.cssid]->chps[chpid.id]; } /* Set vary state for given chpid. */ @@ -86,7 +86,7 @@ u8 chp_get_sch_opm(struct subchannel *sch) opm = 0; chp_id_init(&chpid); - for (i=0; i < 8; i++) { + for (i = 0; i < 8; i++) { opm <<= 1; chpid.id = sch->schib.pmcw.chpid[i]; if (chp_get_status(chpid) != 0) @@ -118,7 +118,7 @@ static int s390_vary_chpid(struct chp_id chpid, int on) sprintf(dbf_text, on?"varyon%x.%02x":"varyoff%x.%02x", chpid.cssid, chpid.id); - CIO_TRACE_EVENT( 2, dbf_text); + CIO_TRACE_EVENT(2, dbf_text); status = chp_get_status(chpid); if (!on && !status) { @@ -140,9 +140,11 @@ static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf, loff_t off, size_t count) { struct channel_path *chp; + struct device *device; unsigned int size; - chp = to_channelpath(container_of(kobj, struct device, kobj)); + device = container_of(kobj, struct device, kobj); + chp = to_channelpath(device); if (!chp->cmg_chars) return 0; @@ -193,9 +195,11 @@ static ssize_t chp_measurement_read(struct kobject *kobj, { struct channel_path *chp; struct channel_subsystem *css; + struct device *device; unsigned int size; - chp = to_channelpath(container_of(kobj, struct device, kobj)); + device = container_of(kobj, struct device, kobj); + chp = to_channelpath(device); css = to_css(chp->dev.parent); size = sizeof(struct cmg_entry); @@ -353,7 +357,7 @@ static ssize_t chp_shared_show(struct device *dev, static DEVICE_ATTR(shared, 0444, chp_shared_show, NULL); -static struct attribute * chp_attrs[] = { +static struct attribute *chp_attrs[] = { &dev_attr_status.attr, &dev_attr_configure.attr, &dev_attr_type.attr, @@ -395,7 +399,7 @@ int chp_new(struct chp_id chpid) /* fill in status, etc. */ chp->chpid = chpid; chp->state = 1; - chp->dev.parent = &css[chpid.cssid]->device; + chp->dev.parent = &channel_subsystems[chpid.cssid]->device; chp->dev.release = chp_release; snprintf(chp->dev.bus_id, BUS_ID_SIZE, "chp%x.%02x", chpid.cssid, chpid.id); @@ -430,18 +434,18 @@ int chp_new(struct chp_id chpid) device_unregister(&chp->dev); goto out_free; } - mutex_lock(&css[chpid.cssid]->mutex); - if (css[chpid.cssid]->cm_enabled) { + mutex_lock(&channel_subsystems[chpid.cssid]->mutex); + if (channel_subsystems[chpid.cssid]->cm_enabled) { ret = chp_add_cmg_attr(chp); if (ret) { sysfs_remove_group(&chp->dev.kobj, &chp_attr_group); device_unregister(&chp->dev); - mutex_unlock(&css[chpid.cssid]->mutex); + mutex_unlock(&channel_subsystems[chpid.cssid]->mutex); goto out_free; } } - css[chpid.cssid]->chps[chpid.id] = chp; - mutex_unlock(&css[chpid.cssid]->mutex); + channel_subsystems[chpid.cssid]->chps[chpid.id] = chp; + mutex_unlock(&channel_subsystems[chpid.cssid]->mutex); return ret; out_free: kfree(chp); diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index f2708d65be5..46905345159 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -619,6 +619,11 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid) sch->schib.pmcw.ena = 0; if ((sch->lpm & (sch->lpm - 1)) != 0) sch->schib.pmcw.mp = 1; /* multipath mode */ + /* clean up possible residual cmf stuff */ + sch->schib.pmcw.mme = 0; + sch->schib.pmcw.mbfc = 0; + sch->schib.pmcw.mbi = 0; + sch->schib.mba = 0; return 0; out: if (!cio_is_console(schid)) diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c index 34a796913b0..b960f66843e 100644 --- a/drivers/s390/cio/cmf.c +++ b/drivers/s390/cio/cmf.c @@ -45,7 +45,8 @@ #include "ioasm.h" #include "chsc.h" -/* parameter to enable cmf during boot, possible uses are: +/* + * parameter to enable cmf during boot, possible uses are: * "s390cmf" -- enable cmf and allocate 2 MB of ram so measuring can be * used on any subchannel * "s390cmf=<num>" -- enable cmf and allocate enough memory to measure @@ -73,18 +74,20 @@ enum cmb_index { * enum cmb_format - types of supported measurement block formats * * @CMF_BASIC: traditional channel measurement blocks supported - * by all machines that we run on + * by all machines that we run on * @CMF_EXTENDED: improved format that was introduced with the z990 - * machine - * @CMF_AUTODETECT: default: use extended format when running on a z990 - * or later machine, otherwise fall back to basic format - **/ + * machine + * @CMF_AUTODETECT: default: use extended format when running on a machine + * supporting extended format, otherwise fall back to + * basic format + */ enum cmb_format { CMF_BASIC, CMF_EXTENDED, CMF_AUTODETECT = -1, }; -/** + +/* * format - actual format for all measurement blocks * * The format module parameter can be set to a value of 0 (zero) @@ -105,20 +108,21 @@ module_param(format, bool, 0444); * either with the help of a special pool or with kmalloc * @free: free memory allocated with @alloc * @set: enable or disable measurement + * @read: read a measurement entry at an index * @readall: read a measurement block in a common format * @reset: clear the data in the associated measurement block and * reset its time stamp * @align: align an allocated block so that the hardware can use it */ struct cmb_operations { - int (*alloc) (struct ccw_device*); - void(*free) (struct ccw_device*); - int (*set) (struct ccw_device*, u32); - u64 (*read) (struct ccw_device*, int); - int (*readall)(struct ccw_device*, struct cmbdata *); - void (*reset) (struct ccw_device*); - void * (*align) (void *); - + int (*alloc) (struct ccw_device *); + void (*free) (struct ccw_device *); + int (*set) (struct ccw_device *, u32); + u64 (*read) (struct ccw_device *, int); + int (*readall)(struct ccw_device *, struct cmbdata *); + void (*reset) (struct ccw_device *); + void *(*align) (void *); +/* private: */ struct attribute_group *attr_group; }; static struct cmb_operations *cmbops; @@ -130,9 +134,11 @@ struct cmb_data { unsigned long long last_update; /* when last_block was updated */ }; -/* our user interface is designed in terms of nanoseconds, +/* + * Our user interface is designed in terms of nanoseconds, * while the hardware measures total times in its own - * unit.*/ + * unit. + */ static inline u64 time_to_nsec(u32 value) { return ((u64)value) * 128000ull; @@ -159,12 +165,13 @@ static inline u64 time_to_avg_nsec(u32 value, u32 count) return ret; } -/* activate or deactivate the channel monitor. When area is NULL, +/* + * Activate or deactivate the channel monitor. When area is NULL, * the monitor is deactivated. The channel monitor needs to * be active in order to measure subchannels, which also need - * to be enabled. */ -static inline void -cmf_activate(void *area, unsigned int onoff) + * to be enabled. + */ +static inline void cmf_activate(void *area, unsigned int onoff) { register void * __gpr2 asm("2"); register long __gpr1 asm("1"); @@ -175,8 +182,8 @@ cmf_activate(void *area, unsigned int onoff) asm("schm" : : "d" (__gpr2), "d" (__gpr1) ); } -static int -set_schib(struct ccw_device *cdev, u32 mme, int mbfc, unsigned long address) +static int set_schib(struct ccw_device *cdev, u32 mme, int mbfc, + unsigned long address) { int ret; int retry; @@ -466,6 +473,7 @@ static void cmf_generic_reset(struct ccw_device *cdev) * * @mem: pointer to CMBs (only in basic measurement mode) * @list: contains a linked list of all subchannels + * @num_channels: number of channels to be measured * @lock: protect concurrent access to @mem and @list */ struct cmb_area { @@ -481,28 +489,36 @@ static struct cmb_area cmb_area = { .num_channels = 1024, }; - /* ****** old style CMB handling ********/ -/** int maxchannels - * +/* * Basic channel measurement blocks are allocated in one contiguous * block of memory, which can not be moved as long as any channel * is active. Therefore, a maximum number of subchannels needs to * be defined somewhere. This is a module parameter, defaulting to * a resonable value of 1024, or 32 kb of memory. * Current kernels don't allow kmalloc with more than 128kb, so the - * maximum is 4096 + * maximum is 4096. */ module_param_named(maxchannels, cmb_area.num_channels, uint, 0444); /** * struct cmb - basic channel measurement block + * @ssch_rsch_count: number of ssch and rsch + * @sample_count: number of samples + * @device_connect_time: time of device connect + * @function_pending_time: time of function pending + * @device_disconnect_time: time of device disconnect + * @control_unit_queuing_time: time of control unit queuing + * @device_active_only_time: time of device active only + * @reserved: unused in basic measurement mode + * + * The measurement block as used by the hardware. The fields are described + * further in z/Architecture Principles of Operation, chapter 17. * - * cmb as used by the hardware the fields are described in z/Architecture - * Principles of Operation, chapter 17. - * The area to be a contiguous array and may not be reallocated or freed. + * The cmb area made up from these blocks must be a contiguous array and may + * not be reallocated or freed. * Only one cmb area can be present in the system. */ struct cmb { @@ -516,8 +532,9 @@ struct cmb { u32 reserved[2]; }; -/* insert a single device into the cmb_area list - * called with cmb_area.lock held from alloc_cmb +/* + * Insert a single device into the cmb_area list. + * Called with cmb_area.lock held from alloc_cmb. */ static int alloc_cmb_single(struct ccw_device *cdev, struct cmb_data *cmb_data) @@ -532,9 +549,11 @@ static int alloc_cmb_single(struct ccw_device *cdev, goto out; } - /* find first unused cmb in cmb_area.mem. - * this is a little tricky: cmb_area.list - * remains sorted by ->cmb->hw_data pointers */ + /* + * Find first unused cmb in cmb_area.mem. + * This is a little tricky: cmb_area.list + * remains sorted by ->cmb->hw_data pointers. + */ cmb = cmb_area.mem; list_for_each_entry(node, &cmb_area.list, cmb_list) { struct cmb_data *data; @@ -558,8 +577,7 @@ out: return ret; } -static int -alloc_cmb (struct ccw_device *cdev) +static int alloc_cmb(struct ccw_device *cdev) { int ret; struct cmb *mem; @@ -670,7 +688,7 @@ static int set_cmb(struct ccw_device *cdev, u32 mme) return set_schib_wait(cdev, mme, 0, offset); } -static u64 read_cmb (struct ccw_device *cdev, int index) +static u64 read_cmb(struct ccw_device *cdev, int index) { struct cmb *cmb; u32 val; @@ -720,7 +738,7 @@ out: return ret; } -static int readall_cmb (struct ccw_device *cdev, struct cmbdata *data) +static int readall_cmb(struct ccw_device *cdev, struct cmbdata *data) { struct cmb *cmb; struct cmb_data *cmb_data; @@ -793,14 +811,25 @@ static struct cmb_operations cmbops_basic = { .align = align_cmb, .attr_group = &cmf_attr_group, }; - + /* ******** extended cmb handling ********/ /** * struct cmbe - extended channel measurement block + * @ssch_rsch_count: number of ssch and rsch + * @sample_count: number of samples + * @device_connect_time: time of device connect + * @function_pending_time: time of function pending + * @device_disconnect_time: time of device disconnect + * @control_unit_queuing_time: time of control unit queuing + * @device_active_only_time: time of device active only + * @device_busy_time: time of device busy + * @initial_command_response_time: initial command response time + * @reserved: unused * - * cmb as used by the hardware, may be in any 64 bit physical location, - * the fields are described in z/Architecture Principles of Operation, + * The measurement block as used by the hardware. May be in any 64 bit physical + * location. + * The fields are described further in z/Architecture Principles of Operation, * third edition, chapter 17. */ struct cmbe { @@ -816,10 +845,12 @@ struct cmbe { u32 reserved[7]; }; -/* kmalloc only guarantees 8 byte alignment, but we need cmbe +/* + * kmalloc only guarantees 8 byte alignment, but we need cmbe * pointers to be naturally aligned. Make sure to allocate - * enough space for two cmbes */ -static inline struct cmbe* cmbe_align(struct cmbe *c) + * enough space for two cmbes. + */ +static inline struct cmbe *cmbe_align(struct cmbe *c) { unsigned long addr; addr = ((unsigned long)c + sizeof (struct cmbe) - sizeof(long)) & @@ -827,7 +858,7 @@ static inline struct cmbe* cmbe_align(struct cmbe *c) return (struct cmbe*)addr; } -static int alloc_cmbe (struct ccw_device *cdev) +static int alloc_cmbe(struct ccw_device *cdev) { struct cmbe *cmbe; struct cmb_data *cmb_data; @@ -873,7 +904,7 @@ out_free: return ret; } -static void free_cmbe (struct ccw_device *cdev) +static void free_cmbe(struct ccw_device *cdev) { struct cmb_data *cmb_data; @@ -912,7 +943,7 @@ static int set_cmbe(struct ccw_device *cdev, u32 mme) } -static u64 read_cmbe (struct ccw_device *cdev, int index) +static u64 read_cmbe(struct ccw_device *cdev, int index) { struct cmbe *cmb; struct cmb_data *cmb_data; @@ -970,7 +1001,7 @@ out: return ret; } -static int readall_cmbe (struct ccw_device *cdev, struct cmbdata *data) +static int readall_cmbe(struct ccw_device *cdev, struct cmbdata *data) { struct cmbe *cmb; struct cmb_data *cmb_data; @@ -1047,17 +1078,16 @@ static struct cmb_operations cmbops_extended = { .align = align_cmbe, .attr_group = &cmf_attr_group_ext, }; - -static ssize_t -cmb_show_attr(struct device *dev, char *buf, enum cmb_index idx) +static ssize_t cmb_show_attr(struct device *dev, char *buf, enum cmb_index idx) { return sprintf(buf, "%lld\n", (unsigned long long) cmf_read(to_ccwdev(dev), idx)); } -static ssize_t -cmb_show_avg_sample_interval(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t cmb_show_avg_sample_interval(struct device *dev, + struct device_attribute *attr, + char *buf) { struct ccw_device *cdev; long interval; @@ -1079,8 +1109,9 @@ cmb_show_avg_sample_interval(struct device *dev, struct device_attribute *attr, return sprintf(buf, "%ld\n", interval); } -static ssize_t -cmb_show_avg_utilization(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t cmb_show_avg_utilization(struct device *dev, + struct device_attribute *attr, + char *buf) { struct cmbdata data; u64 utilization; @@ -1112,14 +1143,16 @@ cmb_show_avg_utilization(struct device *dev, struct device_attribute *attr, char } #define cmf_attr(name) \ -static ssize_t show_ ## name (struct device * dev, struct device_attribute *attr, char * buf) \ -{ return cmb_show_attr((dev), buf, cmb_ ## name); } \ -static DEVICE_ATTR(name, 0444, show_ ## name, NULL); +static ssize_t show_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ return cmb_show_attr((dev), buf, cmb_##name); } \ +static DEVICE_ATTR(name, 0444, show_##name, NULL); #define cmf_attr_avg(name) \ -static ssize_t show_avg_ ## name (struct device * dev, struct device_attribute *attr, char * buf) \ -{ return cmb_show_attr((dev), buf, cmb_ ## name); } \ -static DEVICE_ATTR(avg_ ## name, 0444, show_avg_ ## name, NULL); +static ssize_t show_avg_##name(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ return cmb_show_attr((dev), buf, cmb_##name); } \ +static DEVICE_ATTR(avg_##name, 0444, show_avg_##name, NULL); cmf_attr(ssch_rsch_count); cmf_attr(sample_count); @@ -1131,7 +1164,8 @@ cmf_attr_avg(device_active_only_time); cmf_attr_avg(device_busy_time); cmf_attr_avg(initial_command_response_time); -static DEVICE_ATTR(avg_sample_interval, 0444, cmb_show_avg_sample_interval, NULL); +static DEVICE_ATTR(avg_sample_interval, 0444, cmb_show_avg_sample_interval, + NULL); static DEVICE_ATTR(avg_utilization, 0444, cmb_show_avg_utilization, NULL); static struct attribute *cmf_attributes[] = { @@ -1172,12 +1206,16 @@ static struct attribute_group cmf_attr_group_ext = { .attrs = cmf_attributes_ext, }; -static ssize_t cmb_enable_show(struct device *dev, struct device_attribute *attr, char *buf) +static ssize_t cmb_enable_show(struct device *dev, + struct device_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", to_ccwdev(dev)->private->cmb ? 1 : 0); } -static ssize_t cmb_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t c) +static ssize_t cmb_enable_store(struct device *dev, + struct device_attribute *attr, const char *buf, + size_t c) { struct ccw_device *cdev; int ret; @@ -1202,9 +1240,16 @@ static ssize_t cmb_enable_store(struct device *dev, struct device_attribute *att DEVICE_ATTR(cmb_enable, 0644, cmb_enable_show, cmb_enable_store); -/* enable_cmf/disable_cmf: module interface for cmf (de)activation */ -int -enable_cmf(struct ccw_device *cdev) +/** + * enable_cmf() - switch on the channel measurement for a specific device + * @cdev: The ccw device to be enabled + * + * Returns %0 for success or a negative error value. + * + * Context: + * non-atomic + */ +int enable_cmf(struct ccw_device *cdev) { int ret; @@ -1225,8 +1270,16 @@ enable_cmf(struct ccw_device *cdev) return ret; } -int -disable_cmf(struct ccw_device *cdev) +/** + * disable_cmf() - switch off the channel measurement for a specific device + * @cdev: The ccw device to be disabled + * + * Returns %0 for success or a negative error value. + * + * Context: + * non-atomic + */ +int disable_cmf(struct ccw_device *cdev) { int ret; @@ -1238,14 +1291,32 @@ disable_cmf(struct ccw_device *cdev) return ret; } -u64 -cmf_read(struct ccw_device *cdev, int index) +/** + * cmf_read() - read one value from the current channel measurement block + * @cdev: the channel to be read + * @index: the index of the value to be read + * + * Returns the value read or %0 if the value cannot be read. + * + * Context: + * any + */ +u64 cmf_read(struct ccw_device *cdev, int index) { return cmbops->read(cdev, index); } -int -cmf_readall(struct ccw_device *cdev, struct cmbdata *data) +/** + * cmf_readall() - read the current channel measurement block + * @cdev: the channel to be read + * @data: a pointer to a data block that will be filled + * + * Returns %0 on success, a negative error value otherwise. + * + * Context: + * any + */ +int cmf_readall(struct ccw_device *cdev, struct cmbdata *data) { return cmbops->readall(cdev, data); } @@ -1257,15 +1328,16 @@ int cmf_reenable(struct ccw_device *cdev) return cmbops->set(cdev, 2); } -static int __init -init_cmf(void) +static int __init init_cmf(void) { char *format_string; char *detect_string = "parameter"; - /* We cannot really autoprobe this. If the user did not give a parameter, - see if we are running on z990 or up, otherwise fall back to basic mode. */ - + /* + * If the user did not give a parameter, see if we are running on a + * machine supporting extended measurement blocks, otherwise fall back + * to basic mode. + */ if (format == CMF_AUTODETECT) { if (!css_characteristics_avail || !css_general_characteristics.ext_mb) { @@ -1284,7 +1356,7 @@ init_cmf(void) cmbops = &cmbops_basic; break; case CMF_EXTENDED: - format_string = "extended"; + format_string = "extended"; cmbops = &cmbops_extended; break; default: diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 5635e656c1a..5d83dd47146 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/errno.h> #include <linux/list.h> +#include <linux/reboot.h> #include "css.h" #include "cio.h" @@ -27,7 +28,7 @@ int css_init_done = 0; static int need_reprobe = 0; static int max_ssid = 0; -struct channel_subsystem *css[__MAX_CSSID + 1]; +struct channel_subsystem *channel_subsystems[__MAX_CSSID + 1]; int css_characteristics_avail = 0; @@ -177,7 +178,7 @@ static int css_register_subchannel(struct subchannel *sch) int ret; /* Initialize the subchannel structure */ - sch->dev.parent = &css[0]->device; + sch->dev.parent = &channel_subsystems[0]->device; sch->dev.bus = &css_bus_type; sch->dev.release = &css_subchannel_release; sch->dev.groups = subch_attr_groups; @@ -606,30 +607,55 @@ static int __init setup_css(int nr) { u32 tod_high; int ret; + struct channel_subsystem *css; - memset(css[nr], 0, sizeof(struct channel_subsystem)); - css[nr]->pseudo_subchannel = - kzalloc(sizeof(*css[nr]->pseudo_subchannel), GFP_KERNEL); - if (!css[nr]->pseudo_subchannel) + css = channel_subsystems[nr]; + memset(css, 0, sizeof(struct channel_subsystem)); + css->pseudo_subchannel = + kzalloc(sizeof(*css->pseudo_subchannel), GFP_KERNEL); + if (!css->pseudo_subchannel) return -ENOMEM; - css[nr]->pseudo_subchannel->dev.parent = &css[nr]->device; - css[nr]->pseudo_subchannel->dev.release = css_subchannel_release; - sprintf(css[nr]->pseudo_subchannel->dev.bus_id, "defunct"); - ret = cio_create_sch_lock(css[nr]->pseudo_subchannel); + css->pseudo_subchannel->dev.parent = &css->device; + css->pseudo_subchannel->dev.release = css_subchannel_release; + sprintf(css->pseudo_subchannel->dev.bus_id, "defunct"); + ret = cio_create_sch_lock(css->pseudo_subchannel); if (ret) { - kfree(css[nr]->pseudo_subchannel); + kfree(css->pseudo_subchannel); return ret; } - mutex_init(&css[nr]->mutex); - css[nr]->valid = 1; - css[nr]->cssid = nr; - sprintf(css[nr]->device.bus_id, "css%x", nr); - css[nr]->device.release = channel_subsystem_release; + mutex_init(&css->mutex); + css->valid = 1; + css->cssid = nr; + sprintf(css->device.bus_id, "css%x", nr); + css->device.release = channel_subsystem_release; tod_high = (u32) (get_clock() >> 32); - css_generate_pgid(css[nr], tod_high); + css_generate_pgid(css, tod_high); return 0; } +static int css_reboot_event(struct notifier_block *this, + unsigned long event, + void *ptr) +{ + int ret, i; + + ret = NOTIFY_DONE; + for (i = 0; i <= __MAX_CSSID; i++) { + struct channel_subsystem *css; + + css = channel_subsystems[i]; + if (css->cm_enabled) + if (chsc_secm(css, 0)) + ret = NOTIFY_BAD; + } + + return ret; +} + +static struct notifier_block css_reboot_notifier = { + .notifier_call = css_reboot_event, +}; + /* * Now that the driver core is running, we can setup our channel subsystem. * The struct subchannel's are created during probing (except for the @@ -670,51 +696,63 @@ init_channel_subsystem (void) } /* Setup css structure. */ for (i = 0; i <= __MAX_CSSID; i++) { - css[i] = kmalloc(sizeof(struct channel_subsystem), GFP_KERNEL); - if (!css[i]) { + struct channel_subsystem *css; + + css = kmalloc(sizeof(struct channel_subsystem), GFP_KERNEL); + if (!css) { ret = -ENOMEM; goto out_unregister; } + channel_subsystems[i] = css; ret = setup_css(i); if (ret) goto out_free; - ret = device_register(&css[i]->device); + ret = device_register(&css->device); if (ret) goto out_free_all; if (css_characteristics_avail && css_chsc_characteristics.secm) { - ret = device_create_file(&css[i]->device, + ret = device_create_file(&css->device, &dev_attr_cm_enable); if (ret) goto out_device; } - ret = device_register(&css[i]->pseudo_subchannel->dev); + ret = device_register(&css->pseudo_subchannel->dev); if (ret) goto out_file; } + ret = register_reboot_notifier(&css_reboot_notifier); + if (ret) + goto out_pseudo; css_init_done = 1; ctl_set_bit(6, 28); for_each_subchannel(__init_channel_subsystem, NULL); return 0; +out_pseudo: + device_unregister(&channel_subsystems[i]->pseudo_subchannel->dev); out_file: - device_remove_file(&css[i]->device, &dev_attr_cm_enable); + device_remove_file(&channel_subsystems[i]->device, + &dev_attr_cm_enable); out_device: - device_unregister(&css[i]->device); + device_unregister(&channel_subsystems[i]->device); out_free_all: - kfree(css[i]->pseudo_subchannel->lock); - kfree(css[i]->pseudo_subchannel); + kfree(channel_subsystems[i]->pseudo_subchannel->lock); + kfree(channel_subsystems[i]->pseudo_subchannel); out_free: - kfree(css[i]); + kfree(channel_subsystems[i]); out_unregister: while (i > 0) { + struct channel_subsystem *css; + i--; - device_unregister(&css[i]->pseudo_subchannel->dev); + css = channel_subsystems[i]; + device_unregister(&css->pseudo_subchannel->dev); if (css_characteristics_avail && css_chsc_characteristics.secm) - device_remove_file(&css[i]->device, + device_remove_file(&css->device, &dev_attr_cm_enable); - device_unregister(&css[i]->device); + device_unregister(&css->device); } out_bus: bus_unregister(&css_bus_type); diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h index 5d65e83ca66..81215ef3243 100644 --- a/drivers/s390/cio/css.h +++ b/drivers/s390/cio/css.h @@ -167,7 +167,7 @@ struct channel_subsystem { #define to_css(dev) container_of(dev, struct channel_subsystem, device) extern struct bus_type css_bus_type; -extern struct channel_subsystem *css[]; +extern struct channel_subsystem *channel_subsystems[]; /* Some helper functions for disconnected state. */ int device_is_disconnected(struct subchannel *); @@ -191,6 +191,5 @@ int sch_is_pseudo_sch(struct subchannel *); extern struct workqueue_struct *slow_path_wq; -int subchannel_add_files (struct device *); extern struct attribute_group *subch_attr_groups[]; #endif diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index e44d92eac8e..39f02b48e4c 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -21,6 +21,7 @@ #include <asm/ccwdev.h> #include <asm/cio.h> #include <asm/param.h> /* HZ */ +#include <asm/cmb.h> #include "cio.h" #include "cio_debug.h" @@ -357,8 +358,18 @@ ccw_device_remove_disconnected(struct ccw_device *cdev) cdev->private->dev_id.devno); } -int -ccw_device_set_offline(struct ccw_device *cdev) +/** + * ccw_device_set_offline() - disable a ccw device for I/O + * @cdev: target ccw device + * + * This function calls the driver's set_offline() function for @cdev, if + * given, and then disables @cdev. + * Returns: + * %0 on success and a negative error value on failure. + * Context: + * enabled, ccw device lock not held + */ +int ccw_device_set_offline(struct ccw_device *cdev) { int ret; @@ -396,8 +407,19 @@ ccw_device_set_offline(struct ccw_device *cdev) return ret; } -int -ccw_device_set_online(struct ccw_device *cdev) +/** + * ccw_device_set_online() - enable a ccw device for I/O + * @cdev: target ccw device + * + * This function first enables @cdev and then calls the driver's set_online() + * function for @cdev, if given. If set_online() returns an error, @cdev is + * disabled again. + * Returns: + * %0 on success and a negative error value on failure. + * Context: + * enabled, ccw device lock not held + */ +int ccw_device_set_online(struct ccw_device *cdev) { int ret; @@ -947,8 +969,7 @@ out: wake_up(&ccw_device_init_wq); } -void -ccw_device_call_sch_unregister(struct work_struct *work) +static void ccw_device_call_sch_unregister(struct work_struct *work) { struct ccw_device_private *priv; struct ccw_device *cdev; @@ -1101,6 +1122,7 @@ io_subchannel_probe (struct subchannel *sch) * device, e.g. the console. */ cdev = sch->dev.driver_data; + cdev->dev.groups = ccwdev_attr_groups; device_initialize(&cdev->dev); ccw_device_register(cdev); /* @@ -1326,8 +1348,19 @@ __ccwdev_check_busid(struct device *dev, void *id) } -struct ccw_device * -get_ccwdev_by_busid(struct ccw_driver *cdrv, const char *bus_id) +/** + * get_ccwdev_by_busid() - obtain device from a bus id + * @cdrv: driver the device is owned by + * @bus_id: bus id of the device to be searched + * + * This function searches all devices owned by @cdrv for a device with a bus + * id matching @bus_id. + * Returns: + * If a match is found, its reference count of the found device is increased + * and it is returned; else %NULL is returned. + */ +struct ccw_device *get_ccwdev_by_busid(struct ccw_driver *cdrv, + const char *bus_id) { struct device *dev; struct device_driver *drv; @@ -1401,16 +1434,34 @@ ccw_device_remove (struct device *dev) return 0; } +static void ccw_device_shutdown(struct device *dev) +{ + struct ccw_device *cdev; + + cdev = to_ccwdev(dev); + if (cdev->drv && cdev->drv->shutdown) + cdev->drv->shutdown(cdev); + disable_cmf(cdev); +} + struct bus_type ccw_bus_type = { .name = "ccw", .match = ccw_bus_match, .uevent = ccw_uevent, .probe = ccw_device_probe, .remove = ccw_device_remove, + .shutdown = ccw_device_shutdown, }; -int -ccw_driver_register (struct ccw_driver *cdriver) +/** + * ccw_driver_register() - register a ccw driver + * @cdriver: driver to be registered + * + * This function is mainly a wrapper around driver_register(). + * Returns: + * %0 on success and a negative error value on failure. + */ +int ccw_driver_register(struct ccw_driver *cdriver) { struct device_driver *drv = &cdriver->driver; @@ -1420,8 +1471,13 @@ ccw_driver_register (struct ccw_driver *cdriver) return driver_register(drv); } -void -ccw_driver_unregister (struct ccw_driver *cdriver) +/** + * ccw_driver_unregister() - deregister a ccw driver + * @cdriver: driver to be deregistered + * + * This function is mainly a wrapper around driver_unregister(). + */ +void ccw_driver_unregister(struct ccw_driver *cdriver) { driver_unregister(&cdriver->driver); } diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h index b66338b7657..0d408960043 100644 --- a/drivers/s390/cio/device.h +++ b/drivers/s390/cio/device.h @@ -80,7 +80,6 @@ void io_subchannel_recog_done(struct ccw_device *cdev); int ccw_device_cancel_halt_clear(struct ccw_device *); void ccw_device_do_unreg_rereg(struct work_struct *); -void ccw_device_call_sch_unregister(struct work_struct *); void ccw_device_move_to_orphanage(struct work_struct *); int ccw_device_is_orphan(struct ccw_device *); diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c index 8633dc53769..8867443b806 100644 --- a/drivers/s390/cio/device_fsm.c +++ b/drivers/s390/cio/device_fsm.c @@ -446,7 +446,8 @@ static void __ccw_device_get_common_pgid(struct ccw_device *cdev) if (cdev->private->pgid[last].inf.ps.state1 == SNID_STATE1_RESET) /* No previous pgid found */ - memcpy(&cdev->private->pgid[0], &css[0]->global_pgid, + memcpy(&cdev->private->pgid[0], + &channel_subsystems[0]->global_pgid, sizeof(struct pgid)); else /* Use existing pgid */ @@ -543,51 +544,6 @@ ccw_device_recog_timeout(struct ccw_device *cdev, enum dev_event dev_event) } -static void -ccw_device_nopath_notify(struct work_struct *work) -{ - struct ccw_device_private *priv; - struct ccw_device *cdev; - struct subchannel *sch; - int ret; - unsigned long flags; - - priv = container_of(work, struct ccw_device_private, kick_work); - cdev = priv->cdev; - spin_lock_irqsave(cdev->ccwlock, flags); - sch = to_subchannel(cdev->dev.parent); - /* Extra sanity. */ - if (sch->lpm) - goto out_unlock; - if (sch->driver && sch->driver->notify) { - spin_unlock_irqrestore(cdev->ccwlock, flags); - ret = sch->driver->notify(&sch->dev, CIO_NO_PATH); - spin_lock_irqsave(cdev->ccwlock, flags); - } else - ret = 0; - if (!ret) { - if (get_device(&sch->dev)) { - /* Driver doesn't want to keep device. */ - cio_disable_subchannel(sch); - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister); - queue_work(ccw_device_work, - &cdev->private->kick_work); - } else - put_device(&sch->dev); - } - } else { - cio_disable_subchannel(sch); - ccw_device_set_timeout(cdev, 0); - cdev->private->flags.fake_irb = 0; - cdev->private->state = DEV_STATE_DISCONNECTED; - wake_up(&cdev->private->wait_q); - } -out_unlock: - spin_unlock_irqrestore(cdev->ccwlock, flags); -} - void ccw_device_verify_done(struct ccw_device *cdev, int err) { @@ -631,12 +587,9 @@ ccw_device_verify_done(struct ccw_device *cdev, int err) default: /* Reset oper notify indication after verify error. */ cdev->private->flags.donotify = 0; - if (cdev->online) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify); - queue_work(ccw_device_notify_work, - &cdev->private->kick_work); - } else + if (cdev->online) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else ccw_device_done(cdev, DEV_STATE_NOT_OPER); break; } @@ -690,11 +643,7 @@ ccw_device_disband_done(struct ccw_device *cdev, int err) break; default: cdev->private->flags.donotify = 0; - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister); - queue_work(ccw_device_work, &cdev->private->kick_work); - } + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); ccw_device_done(cdev, DEV_STATE_NOT_OPER); break; } @@ -765,59 +714,16 @@ ccw_device_recog_notoper(struct ccw_device *cdev, enum dev_event dev_event) } /* - * Handle not operational event while offline. + * Handle not operational event in non-special state. */ -static void -ccw_device_offline_notoper(struct ccw_device *cdev, enum dev_event dev_event) +static void ccw_device_generic_notoper(struct ccw_device *cdev, + enum dev_event dev_event) { struct subchannel *sch; cdev->private->state = DEV_STATE_NOT_OPER; sch = to_subchannel(cdev->dev.parent); - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister); - queue_work(ccw_device_work, &cdev->private->kick_work); - } - wake_up(&cdev->private->wait_q); -} - -/* - * Handle not operational event while online. - */ -static void -ccw_device_online_notoper(struct ccw_device *cdev, enum dev_event dev_event) -{ - struct subchannel *sch; - int ret; - - sch = to_subchannel(cdev->dev.parent); - if (sch->driver->notify) { - spin_unlock_irq(cdev->ccwlock); - ret = sch->driver->notify(&sch->dev, - sch->lpm ? CIO_GONE : CIO_NO_PATH); - spin_lock_irq(cdev->ccwlock); - } else - ret = 0; - if (ret) { - ccw_device_set_timeout(cdev, 0); - cdev->private->flags.fake_irb = 0; - cdev->private->state = DEV_STATE_DISCONNECTED; - wake_up(&cdev->private->wait_q); - return; - } - cdev->private->state = DEV_STATE_NOT_OPER; - cio_disable_subchannel(sch); - if (sch->schib.scsw.actl != 0) { - // FIXME: not-oper indication to device driver ? - ccw_device_call_handler(cdev); - } - if (get_device(&cdev->dev)) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_call_sch_unregister); - queue_work(ccw_device_work, &cdev->private->kick_work); - } - wake_up(&cdev->private->wait_q); + css_schedule_eval(sch->schid); } /* @@ -915,18 +821,9 @@ ccw_device_online_timeout(struct ccw_device *cdev, enum dev_event dev_event) cdev->private->state = DEV_STATE_TIMEOUT_KILL; return; } - if (ret == -ENODEV) { - struct subchannel *sch; - - sch = to_subchannel(cdev->dev.parent); - if (!sch->lpm) { - PREPARE_WORK(&cdev->private->kick_work, - ccw_device_nopath_notify); - queue_work(ccw_device_notify_work, - &cdev->private->kick_work); - } else - dev_fsm_event(cdev, DEV_EVENT_NOTOPER); - } else if (cdev->handler) + if (ret == -ENODEV) + dev_fsm_event(cdev, DEV_EVENT_NOTOPER); + else if (cdev->handler) cdev->handler(cdev, cdev->private->intparm, ERR_PTR(-ETIMEDOUT)); } @@ -1233,7 +1130,7 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_SENSE_PGID] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_sense_pgid_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, @@ -1245,50 +1142,50 @@ fsm_func_t *dev_jumptable[NR_DEV_STATES][NR_DEV_EVENTS] = { [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_OFFLINE] = { - [DEV_EVENT_NOTOPER] = ccw_device_offline_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_offline_irq, [DEV_EVENT_TIMEOUT] = ccw_device_nop, [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_VERIFY] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_verify_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, [DEV_EVENT_VERIFY] = ccw_device_delay_verify, }, [DEV_STATE_ONLINE] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_irq, [DEV_EVENT_TIMEOUT] = ccw_device_online_timeout, [DEV_EVENT_VERIFY] = ccw_device_online_verify, }, [DEV_STATE_W4SENSE] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_w4sense, [DEV_EVENT_TIMEOUT] = ccw_device_nop, [DEV_EVENT_VERIFY] = ccw_device_online_verify, }, [DEV_STATE_DISBAND_PGID] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_disband_irq, [DEV_EVENT_TIMEOUT] = ccw_device_onoff_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_BOXED] = { - [DEV_EVENT_NOTOPER] = ccw_device_offline_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_stlck_done, [DEV_EVENT_TIMEOUT] = ccw_device_stlck_done, [DEV_EVENT_VERIFY] = ccw_device_nop, }, /* states to wait for i/o completion before doing something */ [DEV_STATE_CLEAR_VERIFY] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_clear_verify, [DEV_EVENT_TIMEOUT] = ccw_device_nop, [DEV_EVENT_VERIFY] = ccw_device_nop, }, [DEV_STATE_TIMEOUT_KILL] = { - [DEV_EVENT_NOTOPER] = ccw_device_online_notoper, + [DEV_EVENT_NOTOPER] = ccw_device_generic_notoper, [DEV_EVENT_INTERRUPT] = ccw_device_killing_irq, [DEV_EVENT_TIMEOUT] = ccw_device_killing_timeout, [DEV_EVENT_VERIFY] = ccw_device_nop, //FIXME diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 14eba854b15..7fd2dadc329 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -25,6 +25,16 @@ #include "device.h" #include "chp.h" +/** + * ccw_device_set_options_mask() - set some options and unset the rest + * @cdev: device for which the options are to be set + * @flags: options to be set + * + * All flags specified in @flags are set, all flags not specified in @flags + * are cleared. + * Returns: + * %0 on success, -%EINVAL on an invalid flag combination. + */ int ccw_device_set_options_mask(struct ccw_device *cdev, unsigned long flags) { /* @@ -40,6 +50,15 @@ int ccw_device_set_options_mask(struct ccw_device *cdev, unsigned long flags) return 0; } +/** + * ccw_device_set_options() - set some options + * @cdev: device for which the options are to be set + * @flags: options to be set + * + * All flags specified in @flags are set, the remainder is left untouched. + * Returns: + * %0 on success, -%EINVAL if an invalid flag combination would ensue. + */ int ccw_device_set_options(struct ccw_device *cdev, unsigned long flags) { /* @@ -59,6 +78,13 @@ int ccw_device_set_options(struct ccw_device *cdev, unsigned long flags) return 0; } +/** + * ccw_device_clear_options() - clear some options + * @cdev: device for which the options are to be cleared + * @flags: options to be cleared + * + * All flags specified in @flags are cleared, the remainder is left untouched. + */ void ccw_device_clear_options(struct ccw_device *cdev, unsigned long flags) { cdev->private->options.fast &= (flags & CCWDEV_EARLY_NOTIFICATION) == 0; @@ -67,8 +93,22 @@ void ccw_device_clear_options(struct ccw_device *cdev, unsigned long flags) cdev->private->options.force &= (flags & CCWDEV_ALLOW_FORCE) == 0; } -int -ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) +/** + * ccw_device_clear() - terminate I/O request processing + * @cdev: target ccw device + * @intparm: interruption parameter; value is only used if no I/O is + * outstanding, otherwise the intparm associated with the I/O request + * is returned + * + * ccw_device_clear() calls csch on @cdev's subchannel. + * Returns: + * %0 on success, + * -%ENODEV on device not operational, + * -%EINVAL on invalid device state. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) { struct subchannel *sch; int ret; @@ -89,10 +129,33 @@ ccw_device_clear(struct ccw_device *cdev, unsigned long intparm) return ret; } -int -ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, __u8 key, - unsigned long flags) +/** + * ccw_device_start_key() - start a s390 channel program with key + * @cdev: target ccw device + * @cpa: logical start address of channel program + * @intparm: user specific interruption parameter; will be presented back to + * @cdev's interrupt handler. Allows a device driver to associate + * the interrupt with a particular I/O request. + * @lpm: defines the channel path to be used for a specific I/O request. A + * value of 0 will make cio use the opm. + * @key: storage key to be used for the I/O + * @flags: additional flags; defines the action to be performed for I/O + * processing. + * + * Start a S/390 channel program. When the interrupt arrives, the + * IRQ handler is called, either immediately, delayed (dev-end missing, + * or sense required) or never (no IRQ handler registered). + * Returns: + * %0, if the operation was successful; + * -%EBUSY, if the device is busy, or status pending; + * -%EACCES, if no path specified in @lpm is operational; + * -%ENODEV, if the device is not operational. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, __u8 key, + unsigned long flags) { struct subchannel *sch; int ret; @@ -135,11 +198,38 @@ ccw_device_start_key(struct ccw_device *cdev, struct ccw1 *cpa, return ret; } - -int -ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, __u8 key, - unsigned long flags, int expires) +/** + * ccw_device_start_timeout_key() - start a s390 channel program with timeout and key + * @cdev: target ccw device + * @cpa: logical start address of channel program + * @intparm: user specific interruption parameter; will be presented back to + * @cdev's interrupt handler. Allows a device driver to associate + * the interrupt with a particular I/O request. + * @lpm: defines the channel path to be used for a specific I/O request. A + * value of 0 will make cio use the opm. + * @key: storage key to be used for the I/O + * @flags: additional flags; defines the action to be performed for I/O + * processing. + * @expires: timeout value in jiffies + * + * Start a S/390 channel program. When the interrupt arrives, the + * IRQ handler is called, either immediately, delayed (dev-end missing, + * or sense required) or never (no IRQ handler registered). + * This function notifies the device driver if the channel program has not + * completed during the time specified by @expires. If a timeout occurs, the + * channel program is terminated via xsch, hsch or csch, and the device's + * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT). + * Returns: + * %0, if the operation was successful; + * -%EBUSY, if the device is busy, or status pending; + * -%EACCES, if no path specified in @lpm is operational; + * -%ENODEV, if the device is not operational. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, __u8 key, + unsigned long flags, int expires) { int ret; @@ -152,18 +242,67 @@ ccw_device_start_timeout_key(struct ccw_device *cdev, struct ccw1 *cpa, return ret; } -int -ccw_device_start(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, unsigned long flags) +/** + * ccw_device_start() - start a s390 channel program + * @cdev: target ccw device + * @cpa: logical start address of channel program + * @intparm: user specific interruption parameter; will be presented back to + * @cdev's interrupt handler. Allows a device driver to associate + * the interrupt with a particular I/O request. + * @lpm: defines the channel path to be used for a specific I/O request. A + * value of 0 will make cio use the opm. + * @flags: additional flags; defines the action to be performed for I/O + * processing. + * + * Start a S/390 channel program. When the interrupt arrives, the + * IRQ handler is called, either immediately, delayed (dev-end missing, + * or sense required) or never (no IRQ handler registered). + * Returns: + * %0, if the operation was successful; + * -%EBUSY, if the device is busy, or status pending; + * -%EACCES, if no path specified in @lpm is operational; + * -%ENODEV, if the device is not operational. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_start(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, unsigned long flags) { return ccw_device_start_key(cdev, cpa, intparm, lpm, PAGE_DEFAULT_KEY, flags); } -int -ccw_device_start_timeout(struct ccw_device *cdev, struct ccw1 *cpa, - unsigned long intparm, __u8 lpm, unsigned long flags, - int expires) +/** + * ccw_device_start_timeout() - start a s390 channel program with timeout + * @cdev: target ccw device + * @cpa: logical start address of channel program + * @intparm: user specific interruption parameter; will be presented back to + * @cdev's interrupt handler. Allows a device driver to associate + * the interrupt with a particular I/O request. + * @lpm: defines the channel path to be used for a specific I/O request. A + * value of 0 will make cio use the opm. + * @flags: additional flags; defines the action to be performed for I/O + * processing. + * @expires: timeout value in jiffies + * + * Start a S/390 channel program. When the interrupt arrives, the + * IRQ handler is called, either immediately, delayed (dev-end missing, + * or sense required) or never (no IRQ handler registered). + * This function notifies the device driver if the channel program has not + * completed during the time specified by @expires. If a timeout occurs, the + * channel program is terminated via xsch, hsch or csch, and the device's + * interrupt handler will be called with an irb containing ERR_PTR(-%ETIMEDOUT). + * Returns: + * %0, if the operation was successful; + * -%EBUSY, if the device is busy, or status pending; + * -%EACCES, if no path specified in @lpm is operational; + * -%ENODEV, if the device is not operational. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_start_timeout(struct ccw_device *cdev, struct ccw1 *cpa, + unsigned long intparm, __u8 lpm, + unsigned long flags, int expires) { return ccw_device_start_timeout_key(cdev, cpa, intparm, lpm, PAGE_DEFAULT_KEY, flags, @@ -171,8 +310,23 @@ ccw_device_start_timeout(struct ccw_device *cdev, struct ccw1 *cpa, } -int -ccw_device_halt(struct ccw_device *cdev, unsigned long intparm) +/** + * ccw_device_halt() - halt I/O request processing + * @cdev: target ccw device + * @intparm: interruption parameter; value is only used if no I/O is + * outstanding, otherwise the intparm associated with the I/O request + * is returned + * + * ccw_device_halt() calls hsch on @cdev's subchannel. + * Returns: + * %0 on success, + * -%ENODEV on device not operational, + * -%EINVAL on invalid device state, + * -%EBUSY on device busy or interrupt pending. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_halt(struct ccw_device *cdev, unsigned long intparm) { struct subchannel *sch; int ret; @@ -193,8 +347,20 @@ ccw_device_halt(struct ccw_device *cdev, unsigned long intparm) return ret; } -int -ccw_device_resume(struct ccw_device *cdev) +/** + * ccw_device_resume() - resume channel program execution + * @cdev: target ccw device + * + * ccw_device_resume() calls rsch on @cdev's subchannel. + * Returns: + * %0 on success, + * -%ENODEV on device not operational, + * -%EINVAL on invalid device state, + * -%EBUSY on device busy or interrupt pending. + * Context: + * Interrupts disabled, ccw device lock held + */ +int ccw_device_resume(struct ccw_device *cdev) { struct subchannel *sch; @@ -260,11 +426,21 @@ ccw_device_call_handler(struct ccw_device *cdev) return 1; } -/* - * Search for CIW command in extended sense data. +/** + * ccw_device_get_ciw() - Search for CIW command in extended sense data. + * @cdev: ccw device to inspect + * @ct: command type to look for + * + * During SenseID, command information words (CIWs) describing special + * commands available to the device may have been stored in the extended + * sense data. This function searches for CIWs of a specified command + * type in the extended sense data. + * Returns: + * %NULL if no extended sense data has been stored or if no CIW of the + * specified command type could be found, + * else a pointer to the CIW of the specified command type. */ -struct ciw * -ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct) +struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct) { int ciw_cnt; @@ -276,8 +452,14 @@ ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct) return NULL; } -__u8 -ccw_device_get_path_mask(struct ccw_device *cdev) +/** + * ccw_device_get_path_mask() - get currently available paths + * @cdev: ccw device to be queried + * Returns: + * %0 if no subchannel for the device is available, + * else the mask of currently available paths for the ccw device's subchannel. + */ +__u8 ccw_device_get_path_mask(struct ccw_device *cdev) { struct subchannel *sch; @@ -357,8 +539,7 @@ out_unlock: return ret; } -void * -ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no) +void *ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no) { struct subchannel *sch; struct chp_id chpid; diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c index d8d479876ec..40a3208c7cf 100644 --- a/drivers/s390/cio/qdio.c +++ b/drivers/s390/cio/qdio.c @@ -1024,9 +1024,9 @@ __qdio_outbound_processing(struct qdio_q *q) } static void -qdio_outbound_processing(struct qdio_q *q) +qdio_outbound_processing(unsigned long q) { - __qdio_outbound_processing(q); + __qdio_outbound_processing((struct qdio_q *) q); } /************************* INBOUND ROUTINES *******************************/ @@ -1449,9 +1449,10 @@ out: } static void -tiqdio_inbound_processing(struct qdio_q *q) +tiqdio_inbound_processing(unsigned long q) { - __tiqdio_inbound_processing(q, atomic_read(&spare_indicator_usecount)); + __tiqdio_inbound_processing((struct qdio_q *) q, + atomic_read(&spare_indicator_usecount)); } static void @@ -1494,9 +1495,9 @@ again: } static void -qdio_inbound_processing(struct qdio_q *q) +qdio_inbound_processing(unsigned long q) { - __qdio_inbound_processing(q); + __qdio_inbound_processing((struct qdio_q *) q); } /************************* MAIN ROUTINES *******************************/ @@ -1760,12 +1761,15 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, q->handler=input_handler; q->dev_st_chg_ind=irq_ptr->dev_st_chg_ind; - q->tasklet.data=(unsigned long)q; /* q->is_thinint_q isn't valid at this time, but - * irq_ptr->is_thinint_irq is */ - q->tasklet.func=(void(*)(unsigned long)) - ((irq_ptr->is_thinint_irq)?&tiqdio_inbound_processing: - &qdio_inbound_processing); + * irq_ptr->is_thinint_irq is + */ + if (irq_ptr->is_thinint_irq) + tasklet_init(&q->tasklet, tiqdio_inbound_processing, + (unsigned long) q); + else + tasklet_init(&q->tasklet, qdio_inbound_processing, + (unsigned long) q); /* actually this is not used for inbound queues. yet. */ atomic_set(&q->busy_siga_counter,0); @@ -1836,13 +1840,10 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev, q->last_move_ftc=0; q->handler=output_handler; - q->tasklet.data=(unsigned long)q; - q->tasklet.func=(void(*)(unsigned long)) - &qdio_outbound_processing; - q->timer.function=(void(*)(unsigned long)) - &qdio_outbound_processing; - q->timer.data = (long)q; - init_timer(&q->timer); + tasklet_init(&q->tasklet, qdio_outbound_processing, + (unsigned long) q); + setup_timer(&q->timer, qdio_outbound_processing, + (unsigned long) q); atomic_set(&q->busy_siga_counter,0); q->timing.busy_start=0; @@ -3726,7 +3727,7 @@ qdio_performance_stats_store(struct bus_type *bus, const char *buf, size_t count #endif /* CONFIG_64BIT */ } } else { - QDIO_PRINT_WARN("QDIO performance_stats: write 0 or 1 to this file!\n"); + QDIO_PRINT_ERR("QDIO performance_stats: write 0 or 1 to this file!\n"); return -EINVAL; } return count; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 90bd2201451..d334b0f7a1e 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -1231,8 +1231,9 @@ static void ap_reset_domain(void) { int i; - for (i = 0; i < AP_DEVICES; i++) - ap_reset_queue(AP_MKQID(i, ap_domain_index)); + if (ap_domain_index != -1) + for (i = 0; i < AP_DEVICES; i++) + ap_reset_queue(AP_MKQID(i, ap_domain_index)); } static void ap_reset_all(void) diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c index 2a9349ad68b..44253fdd413 100644 --- a/drivers/s390/crypto/zcrypt_mono.c +++ b/drivers/s390/crypto/zcrypt_mono.c @@ -45,7 +45,7 @@ /** * The module initialization code. */ -int __init zcrypt_init(void) +static int __init zcrypt_init(void) { int rc; @@ -86,7 +86,7 @@ out: /** * The module termination code. */ -void __exit zcrypt_exit(void) +static void __exit zcrypt_exit(void) { zcrypt_cex2a_exit(); zcrypt_pcixcc_exit(); diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c index 64948788d30..70b9ddc8cf9 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.c +++ b/drivers/s390/crypto/zcrypt_pcixcc.c @@ -277,7 +277,7 @@ static int XCRB_msg_to_type6CPRB_msgX(struct zcrypt_device *zdev, }; struct { struct type6_hdr hdr; - struct ica_CPRBX cprbx; + struct CPRBX cprbx; } __attribute__((packed)) *msg = ap_msg->message; int rcblen = CEIL4(xcRB->request_control_blk_length); @@ -432,14 +432,17 @@ static int convert_type86_ica(struct zcrypt_device *zdev, } if (service_rc == 8 && service_rs == 770) { PDEBUG("Invalid key length on PCIXCC/CEX2C\n"); - zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD; - return -EAGAIN; + return -EINVAL; } if (service_rc == 8 && service_rs == 783) { PDEBUG("Extended bitlengths not enabled on PCIXCC/CEX2C\n"); zdev->min_mod_size = PCIXCC_MIN_MOD_SIZE_OLD; return -EAGAIN; } + if (service_rc == 12 && service_rs == 769) { + PDEBUG("Invalid key on PCIXCC/CEX2C\n"); + return -EINVAL; + } PRINTK("Unknown service rc/rs (PCIXCC/CEX2C): %d/%d\n", service_rc, service_rs); zdev->online = 0; diff --git a/drivers/s390/crypto/zcrypt_pcixcc.h b/drivers/s390/crypto/zcrypt_pcixcc.h index a78ff307fd1..8cb7d7a6973 100644 --- a/drivers/s390/crypto/zcrypt_pcixcc.h +++ b/drivers/s390/crypto/zcrypt_pcixcc.h @@ -28,51 +28,6 @@ #ifndef _ZCRYPT_PCIXCC_H_ #define _ZCRYPT_PCIXCC_H_ -/** - * CPRBX - * Note that all shorts and ints are big-endian. - * All pointer fields are 16 bytes long, and mean nothing. - * - * A request CPRB is followed by a request_parameter_block. - * - * The request (or reply) parameter block is organized thus: - * function code - * VUD block - * key block - */ -struct CPRBX { - unsigned short cprb_len; /* CPRB length 220 */ - unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ - unsigned char pad_000[3]; /* Alignment pad bytes */ - unsigned char func_id[2]; /* function id 0x5432 */ - unsigned char cprb_flags[4]; /* Flags */ - unsigned int req_parml; /* request parameter buffer len */ - unsigned int req_datal; /* request data buffer */ - unsigned int rpl_msgbl; /* reply message block length */ - unsigned int rpld_parml; /* replied parameter block len */ - unsigned int rpl_datal; /* reply data block len */ - unsigned int rpld_datal; /* replied data block len */ - unsigned int req_extbl; /* request extension block len */ - unsigned char pad_001[4]; /* reserved */ - unsigned int rpld_extbl; /* replied extension block len */ - unsigned char req_parmb[16]; /* request parm block 'address' */ - unsigned char req_datab[16]; /* request data block 'address' */ - unsigned char rpl_parmb[16]; /* reply parm block 'address' */ - unsigned char rpl_datab[16]; /* reply data block 'address' */ - unsigned char req_extb[16]; /* request extension block 'addr'*/ - unsigned char rpl_extb[16]; /* reply extension block 'addres'*/ - unsigned short ccp_rtcode; /* server return code */ - unsigned short ccp_rscode; /* server reason code */ - unsigned int mac_data_len; /* Mac Data Length */ - unsigned char logon_id[8]; /* Logon Identifier */ - unsigned char mac_value[8]; /* Mac Value */ - unsigned char mac_content_flgs;/* Mac content flag byte */ - unsigned char pad_002; /* Alignment */ - unsigned short domain; /* Domain */ - unsigned char pad_003[12]; /* Domain masks */ - unsigned char pad_004[36]; /* reserved */ -} __attribute__((packed)); - int zcrypt_pcixcc_init(void); void zcrypt_pcixcc_exit(void); diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index 1c8f71a5985..c0d1c0eb320 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -28,7 +28,7 @@ static void zfcp_ccw_remove(struct ccw_device *); static int zfcp_ccw_set_online(struct ccw_device *); static int zfcp_ccw_set_offline(struct ccw_device *); static int zfcp_ccw_notify(struct ccw_device *, int); -static void zfcp_ccw_shutdown(struct device *); +static void zfcp_ccw_shutdown(struct ccw_device *); static struct ccw_device_id zfcp_ccw_device_id[] = { {CCW_DEVICE_DEVTYPE(ZFCP_CONTROL_UNIT_TYPE, @@ -51,9 +51,7 @@ static struct ccw_driver zfcp_ccw_driver = { .set_online = zfcp_ccw_set_online, .set_offline = zfcp_ccw_set_offline, .notify = zfcp_ccw_notify, - .driver = { - .shutdown = zfcp_ccw_shutdown, - }, + .shutdown = zfcp_ccw_shutdown, }; MODULE_DEVICE_TABLE(ccw, zfcp_ccw_device_id); @@ -277,12 +275,12 @@ zfcp_ccw_register(void) * Makes sure that QDIO queues are down when the system gets stopped. */ static void -zfcp_ccw_shutdown(struct device *dev) +zfcp_ccw_shutdown(struct ccw_device *cdev) { struct zfcp_adapter *adapter; down(&zfcp_data.config_sema); - adapter = dev_get_drvdata(dev); + adapter = dev_get_drvdata(&cdev->dev); zfcp_erp_adapter_shutdown(adapter, 0); zfcp_erp_wait(adapter); up(&zfcp_data.config_sema); diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 5f3212440f6..ffa3bf75694 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -19,8 +19,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include <asm/debug.h> #include <linux/ctype.h> +#include <asm/debug.h> #include "zfcp_ext.h" static u32 dbfsize = 4; @@ -35,17 +35,17 @@ static int zfcp_dbf_stck(char *out_buf, const char *label, unsigned long long stck) { unsigned long long sec; - struct timespec xtime; + struct timespec dbftime; int len = 0; stck -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096); sec = stck >> 12; do_div(sec, 1000000); - xtime.tv_sec = sec; + dbftime.tv_sec = sec; stck -= (sec * 1000000) << 12; - xtime.tv_nsec = ((stck * 1000) >> 12); + dbftime.tv_nsec = ((stck * 1000) >> 12); len += sprintf(out_buf + len, "%-24s%011lu:%06lu\n", - label, xtime.tv_sec, xtime.tv_nsec); + label, dbftime.tv_sec, dbftime.tv_nsec); return len; } diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index d8cd75ce2d9..16b4418ab25 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -54,7 +54,7 @@ static int zfcp_erp_strategy_check_adapter(struct zfcp_adapter *, int); static int zfcp_erp_strategy_statechange(int, u32, struct zfcp_adapter *, struct zfcp_port *, struct zfcp_unit *, int); -static inline int zfcp_erp_strategy_statechange_detected(atomic_t *, u32); +static int zfcp_erp_strategy_statechange_detected(atomic_t *, u32); static int zfcp_erp_strategy_followup_actions(int, struct zfcp_adapter *, struct zfcp_port *, struct zfcp_unit *, int); @@ -106,8 +106,8 @@ static void zfcp_erp_action_cleanup(int, struct zfcp_adapter *, static void zfcp_erp_action_ready(struct zfcp_erp_action *); static int zfcp_erp_action_exists(struct zfcp_erp_action *); -static inline void zfcp_erp_action_to_ready(struct zfcp_erp_action *); -static inline void zfcp_erp_action_to_running(struct zfcp_erp_action *); +static void zfcp_erp_action_to_ready(struct zfcp_erp_action *); +static void zfcp_erp_action_to_running(struct zfcp_erp_action *); static void zfcp_erp_memwait_handler(unsigned long); @@ -952,7 +952,7 @@ zfcp_erp_memwait_handler(unsigned long data) * action gets an appropriate flag and will be processed * accordingly */ -void zfcp_erp_timeout_handler(unsigned long data) +static void zfcp_erp_timeout_handler(unsigned long data) { struct zfcp_erp_action *erp_action = (struct zfcp_erp_action *) data; struct zfcp_adapter *adapter = erp_action->adapter; @@ -1491,7 +1491,7 @@ zfcp_erp_strategy_statechange(int action, return retval; } -static inline int +static int zfcp_erp_strategy_statechange_detected(atomic_t * target_status, u32 erp_status) { return @@ -2001,7 +2001,7 @@ zfcp_erp_adapter_strategy_generic(struct zfcp_erp_action *erp_action, int close) * returns: 0 - successful setup * !0 - failed setup */ -int +static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *erp_action) { int retval; @@ -3248,8 +3248,7 @@ static void zfcp_erp_action_dismiss_unit(struct zfcp_unit *unit) zfcp_erp_action_dismiss(&unit->erp_action); } -static inline void -zfcp_erp_action_to_running(struct zfcp_erp_action *erp_action) +static void zfcp_erp_action_to_running(struct zfcp_erp_action *erp_action) { struct zfcp_adapter *adapter = erp_action->adapter; @@ -3258,8 +3257,7 @@ zfcp_erp_action_to_running(struct zfcp_erp_action *erp_action) list_move(&erp_action->list, &erp_action->adapter->erp_running_head); } -static inline void -zfcp_erp_action_to_ready(struct zfcp_erp_action *erp_action) +static void zfcp_erp_action_to_ready(struct zfcp_erp_action *erp_action) { struct zfcp_adapter *adapter = erp_action->adapter; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 74901e981e1..d2fc2384c3b 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -491,6 +491,7 @@ struct dlm_ls { uint64_t ls_recover_seq; struct dlm_recover *ls_recover_args; struct rw_semaphore ls_in_recovery; /* block local requests */ + struct rw_semaphore ls_recv_active; /* block dlm_recv */ struct list_head ls_requestqueue;/* queue remote requests */ struct mutex ls_requestqueue_mutex; char *ls_recover_buf; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 2082daf083d..3915b8e1414 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3638,55 +3638,8 @@ static void receive_lookup_reply(struct dlm_ls *ls, struct dlm_message *ms) dlm_put_lkb(lkb); } -int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) +static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms) { - struct dlm_message *ms = (struct dlm_message *) hd; - struct dlm_ls *ls; - int error = 0; - - if (!recovery) - dlm_message_in(ms); - - ls = dlm_find_lockspace_global(hd->h_lockspace); - if (!ls) { - log_print("drop message %d from %d for unknown lockspace %d", - ms->m_type, nodeid, hd->h_lockspace); - return -EINVAL; - } - - /* recovery may have just ended leaving a bunch of backed-up requests - in the requestqueue; wait while dlm_recoverd clears them */ - - if (!recovery) - dlm_wait_requestqueue(ls); - - /* recovery may have just started while there were a bunch of - in-flight requests -- save them in requestqueue to be processed - after recovery. we can't let dlm_recvd block on the recovery - lock. if dlm_recoverd is calling this function to clear the - requestqueue, it needs to be interrupted (-EINTR) if another - recovery operation is starting. */ - - while (1) { - if (dlm_locking_stopped(ls)) { - if (recovery) { - error = -EINTR; - goto out; - } - error = dlm_add_requestqueue(ls, nodeid, hd); - if (error == -EAGAIN) - continue; - else { - error = -EINTR; - goto out; - } - } - - if (dlm_lock_recovery_try(ls)) - break; - schedule(); - } - switch (ms->m_type) { /* messages sent to a master node */ @@ -3761,17 +3714,90 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery) log_error(ls, "unknown message type %d", ms->m_type); } - dlm_unlock_recovery(ls); - out: - dlm_put_lockspace(ls); dlm_astd_wake(); - return error; } +/* If the lockspace is in recovery mode (locking stopped), then normal + messages are saved on the requestqueue for processing after recovery is + done. When not in recovery mode, we wait for dlm_recoverd to drain saved + messages off the requestqueue before we process new ones. This occurs right + after recovery completes when we transition from saving all messages on + requestqueue, to processing all the saved messages, to processing new + messages as they arrive. */ -/* - * Recovery related - */ +static void dlm_receive_message(struct dlm_ls *ls, struct dlm_message *ms, + int nodeid) +{ + if (dlm_locking_stopped(ls)) { + dlm_add_requestqueue(ls, nodeid, (struct dlm_header *) ms); + } else { + dlm_wait_requestqueue(ls); + _receive_message(ls, ms); + } +} + +/* This is called by dlm_recoverd to process messages that were saved on + the requestqueue. */ + +void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms) +{ + _receive_message(ls, ms); +} + +/* This is called by the midcomms layer when something is received for + the lockspace. It could be either a MSG (normal message sent as part of + standard locking activity) or an RCOM (recovery message sent as part of + lockspace recovery). */ + +void dlm_receive_buffer(struct dlm_header *hd, int nodeid) +{ + struct dlm_message *ms = (struct dlm_message *) hd; + struct dlm_rcom *rc = (struct dlm_rcom *) hd; + struct dlm_ls *ls; + int type = 0; + + switch (hd->h_cmd) { + case DLM_MSG: + dlm_message_in(ms); + type = ms->m_type; + break; + case DLM_RCOM: + dlm_rcom_in(rc); + type = rc->rc_type; + break; + default: + log_print("invalid h_cmd %d from %u", hd->h_cmd, nodeid); + return; + } + + if (hd->h_nodeid != nodeid) { + log_print("invalid h_nodeid %d from %d lockspace %x", + hd->h_nodeid, nodeid, hd->h_lockspace); + return; + } + + ls = dlm_find_lockspace_global(hd->h_lockspace); + if (!ls) { + log_print("invalid h_lockspace %x from %d cmd %d type %d", + hd->h_lockspace, nodeid, hd->h_cmd, type); + + if (hd->h_cmd == DLM_RCOM && type == DLM_RCOM_STATUS) + dlm_send_ls_not_ready(nodeid, rc); + return; + } + + /* this rwsem allows dlm_ls_stop() to wait for all dlm_recv threads to + be inactive (in this ls) before transitioning to recovery mode */ + + down_read(&ls->ls_recv_active); + if (hd->h_cmd == DLM_MSG) + dlm_receive_message(ls, ms, nodeid); + else + dlm_receive_rcom(ls, rc, nodeid); + up_read(&ls->ls_recv_active); + + dlm_put_lockspace(ls); +} static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) { @@ -4429,7 +4455,8 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, if (lvb_in && ua->lksb.sb_lvbptr) memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN); - ua->castparam = ua_tmp->castparam; + if (ua_tmp->castparam) + ua->castparam = ua_tmp->castparam; ua->user_lksb = ua_tmp->user_lksb; error = set_unlock_args(flags, ua, &args); @@ -4474,7 +4501,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp, goto out; ua = (struct dlm_user_args *)lkb->lkb_astparam; - ua->castparam = ua_tmp->castparam; + if (ua_tmp->castparam) + ua->castparam = ua_tmp->castparam; ua->user_lksb = ua_tmp->user_lksb; error = set_unlock_args(flags, ua, &args); diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 1720313c22d..ada04680a1e 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -16,7 +16,8 @@ void dlm_print_rsb(struct dlm_rsb *r); void dlm_dump_rsb(struct dlm_rsb *r); void dlm_print_lkb(struct dlm_lkb *lkb); -int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery); +void dlm_receive_message_saved(struct dlm_ls *ls, struct dlm_message *ms); +void dlm_receive_buffer(struct dlm_header *hd, int nodeid); int dlm_modes_compat(int mode1, int mode2); int dlm_find_rsb(struct dlm_ls *ls, char *name, int namelen, unsigned int flags, struct dlm_rsb **r_ret); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 1dc72105ab1..628eaa669e6 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -519,6 +519,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, ls->ls_recover_seq = 0; ls->ls_recover_args = NULL; init_rwsem(&ls->ls_in_recovery); + init_rwsem(&ls->ls_recv_active); INIT_LIST_HEAD(&ls->ls_requestqueue); mutex_init(&ls->ls_requestqueue_mutex); mutex_init(&ls->ls_clear_proc_locks); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9e9d2e82f40..58bf3f5cdbe 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -334,18 +334,8 @@ static void close_connection(struct connection *con, bool and_other) con->rx_page = NULL; } - /* If we are an 'othercon' then NULL the pointer to us - from the parent and tidy ourself up */ - if (test_bit(CF_IS_OTHERCON, &con->flags)) { - struct connection *parent = __nodeid2con(con->nodeid, 0); - parent->othercon = NULL; - kmem_cache_free(con_cache, con); - } - else { - /* Parent connections get reused */ - con->retries = 0; - mutex_unlock(&con->sock_mutex); - } + con->retries = 0; + mutex_unlock(&con->sock_mutex); } /* We only send shutdown messages to nodes that are not part of the cluster */ @@ -731,6 +721,8 @@ static int tcp_accept_from_sock(struct connection *con) INIT_WORK(&othercon->swork, process_send_sockets); INIT_WORK(&othercon->rwork, process_recv_sockets); set_bit(CF_IS_OTHERCON, &othercon->flags); + } + if (!othercon->sock) { newcon->othercon = othercon; othercon->sock = newsock; newsock->sk->sk_user_data = othercon; @@ -1272,14 +1264,15 @@ static void send_to_sock(struct connection *con) if (len) { ret = sendpage(con->sock, e->page, offset, len, msg_flags); - if (ret == -EAGAIN || ret == 0) + if (ret == -EAGAIN || ret == 0) { + cond_resched(); goto out; + } if (ret <= 0) goto send_error; - } else { + } /* Don't starve people filling buffers */ cond_resched(); - } spin_lock(&con->writequeue_lock); e->offset += ret; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index d09977528f6..e9cdcab306e 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -18,10 +18,6 @@ #include "rcom.h" #include "config.h" -/* - * Following called by dlm_recoverd thread - */ - static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new) { struct dlm_member *memb = NULL; @@ -250,18 +246,30 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) return error; } -/* - * Following called from lockspace.c - */ +/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before + dlm_ls_start() is called on any of them to start the new recovery. */ int dlm_ls_stop(struct dlm_ls *ls) { int new; /* - * A stop cancels any recovery that's in progress (see RECOVERY_STOP, - * dlm_recovery_stopped()) and prevents any new locks from being - * processed (see RUNNING, dlm_locking_stopped()). + * Prevent dlm_recv from being in the middle of something when we do + * the stop. This includes ensuring dlm_recv isn't processing a + * recovery message (rcom), while dlm_recoverd is aborting and + * resetting things from an in-progress recovery. i.e. we want + * dlm_recoverd to abort its recovery without worrying about dlm_recv + * processing an rcom at the same time. Stopping dlm_recv also makes + * it easy for dlm_receive_message() to check locking stopped and add a + * message to the requestqueue without races. + */ + + down_write(&ls->ls_recv_active); + + /* + * Abort any recovery that's in progress (see RECOVERY_STOP, + * dlm_recovery_stopped()) and tell any other threads running in the + * dlm to quit any processing (see RUNNING, dlm_locking_stopped()). */ spin_lock(&ls->ls_recover_lock); @@ -271,8 +279,14 @@ int dlm_ls_stop(struct dlm_ls *ls) spin_unlock(&ls->ls_recover_lock); /* + * Let dlm_recv run again, now any normal messages will be saved on the + * requestqueue for later. + */ + + up_write(&ls->ls_recv_active); + + /* * This in_recovery lock does two things: - * * 1) Keeps this function from returning until all threads are out * of locking routines and locking is truely stopped. * 2) Keeps any new requests from being processed until it's unlocked @@ -284,9 +298,8 @@ int dlm_ls_stop(struct dlm_ls *ls) /* * The recoverd suspend/resume makes sure that dlm_recoverd (if - * running) has noticed the clearing of RUNNING above and quit - * processing the previous recovery. This will be true for all nodes - * before any nodes start the new recovery. + * running) has noticed RECOVERY_STOP above and quit processing the + * previous recovery. */ dlm_recoverd_suspend(ls); diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c index a5126e0c68a..f8c69dda16a 100644 --- a/fs/dlm/midcomms.c +++ b/fs/dlm/midcomms.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -27,7 +27,6 @@ #include "dlm_internal.h" #include "lowcomms.h" #include "config.h" -#include "rcom.h" #include "lock.h" #include "midcomms.h" @@ -117,19 +116,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base, offset &= (limit - 1); len -= msglen; - switch (msg->h_cmd) { - case DLM_MSG: - dlm_receive_message(msg, nodeid, 0); - break; - - case DLM_RCOM: - dlm_receive_rcom(msg, nodeid); - break; - - default: - log_print("unknown msg type %x from %u: %u %u %u %u", - msg->h_cmd, nodeid, msglen, len, offset, ret); - } + dlm_receive_buffer(msg, nodeid); } if (msg != (struct dlm_header *) __tmp) diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 188b91c027e..ae2fd97fa4a 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) dlm_recover_process_copy(ls, rc_in); } -static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) +/* If the lockspace doesn't exist then still send a status message + back; it's possible that it just doesn't have its global_id yet. */ + +int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct rcom_config *rf; @@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) return rv; } -/* Called by dlm_recvd; corresponds to dlm_receive_message() but special +/* Called by dlm_recv; corresponds to dlm_receive_message() but special recovery-only comms are sent through here. */ -void dlm_receive_rcom(struct dlm_header *hd, int nodeid) +void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { - struct dlm_rcom *rc = (struct dlm_rcom *) hd; - struct dlm_ls *ls; - - dlm_rcom_in(rc); - - /* If the lockspace doesn't exist then still send a status message - back; it's possible that it just doesn't have its global_id yet. */ - - ls = dlm_find_lockspace_global(hd->h_lockspace); - if (!ls) { - log_print("lockspace %x from %d type %x not found", - hd->h_lockspace, nodeid, rc->rc_type); - if (rc->rc_type == DLM_RCOM_STATUS) - send_ls_not_ready(nodeid, rc); - return; - } - if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); @@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) if (is_old_reply(ls, rc)) goto out; - if (nodeid != rc->rc_header.h_nodeid) { - log_error(ls, "bad rcom nodeid %d from %d", - rc->rc_header.h_nodeid, nodeid); - goto out; - } - switch (rc->rc_type) { case DLM_RCOM_STATUS: receive_rcom_status(ls, rc); @@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); } out: - dlm_put_lockspace(ls); + return; } diff --git a/fs/dlm/rcom.h b/fs/dlm/rcom.h index d7984321ff4..b09abd29ba3 100644 --- a/fs/dlm/rcom.h +++ b/fs/dlm/rcom.h @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -18,7 +18,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid); int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name,int last_len); int dlm_send_rcom_lookup(struct dlm_rsb *r, int dir_nodeid); int dlm_send_rcom_lock(struct dlm_rsb *r, struct dlm_lkb *lkb); -void dlm_receive_rcom(struct dlm_header *hd, int nodeid); +void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid); +int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in); #endif diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 66575997861..4b89e20eebe 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c @@ -24,19 +24,28 @@ /* If the start for which we're re-enabling locking (seq) has been superseded - by a newer stop (ls_recover_seq), we need to leave locking disabled. */ + by a newer stop (ls_recover_seq), we need to leave locking disabled. + + We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees + locking stopped and b) adds a message to the requestqueue, but dlm_recoverd + enables locking and clears the requestqueue between a and b. */ static int enable_locking(struct dlm_ls *ls, uint64_t seq) { int error = -EINTR; + down_write(&ls->ls_recv_active); + spin_lock(&ls->ls_recover_lock); if (ls->ls_recover_seq == seq) { set_bit(LSFL_RUNNING, &ls->ls_flags); + /* unblocks processes waiting to enter the dlm */ up_write(&ls->ls_in_recovery); error = 0; } spin_unlock(&ls->ls_recover_lock); + + up_write(&ls->ls_recv_active); return error; } diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c index 65008d79c96..0de04f17cce 100644 --- a/fs/dlm/requestqueue.c +++ b/fs/dlm/requestqueue.c @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -20,7 +20,7 @@ struct rq_entry { struct list_head list; int nodeid; - char request[1]; + char request[0]; }; /* @@ -30,42 +30,39 @@ struct rq_entry { * lockspace is enabled on some while still suspended on others. */ -int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) +void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd) { struct rq_entry *e; int length = hd->h_length; - int rv = 0; e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL); if (!e) { - log_print("dlm_add_requestqueue: out of memory\n"); - return 0; + log_print("dlm_add_requestqueue: out of memory len %d", length); + return; } e->nodeid = nodeid; memcpy(e->request, hd, length); - /* We need to check dlm_locking_stopped() after taking the mutex to - avoid a race where dlm_recoverd enables locking and runs - process_requestqueue between our earlier dlm_locking_stopped check - and this addition to the requestqueue. */ - mutex_lock(&ls->ls_requestqueue_mutex); - if (dlm_locking_stopped(ls)) - list_add_tail(&e->list, &ls->ls_requestqueue); - else { - log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid); - kfree(e); - rv = -EAGAIN; - } + list_add_tail(&e->list, &ls->ls_requestqueue); mutex_unlock(&ls->ls_requestqueue_mutex); - return rv; } +/* + * Called by dlm_recoverd to process normal messages saved while recovery was + * happening. Normal locking has been enabled before this is called. dlm_recv + * upon receiving a message, will wait for all saved messages to be drained + * here before processing the message it got. If a new dlm_ls_stop() arrives + * while we're processing these saved messages, it may block trying to suspend + * dlm_recv if dlm_recv is waiting for us in dlm_wait_requestqueue. In that + * case, we don't abort since locking_stopped is still 0. If dlm_recv is not + * waiting for us, then this processing may be aborted due to locking_stopped. + */ + int dlm_process_requestqueue(struct dlm_ls *ls) { struct rq_entry *e; - struct dlm_header *hd; int error = 0; mutex_lock(&ls->ls_requestqueue_mutex); @@ -79,14 +76,7 @@ int dlm_process_requestqueue(struct dlm_ls *ls) e = list_entry(ls->ls_requestqueue.next, struct rq_entry, list); mutex_unlock(&ls->ls_requestqueue_mutex); - hd = (struct dlm_header *) e->request; - error = dlm_receive_message(hd, e->nodeid, 1); - - if (error == -EINTR) { - /* entry is left on requestqueue */ - log_debug(ls, "process_requestqueue abort eintr"); - break; - } + dlm_receive_message_saved(ls, (struct dlm_message *)e->request); mutex_lock(&ls->ls_requestqueue_mutex); list_del(&e->list); @@ -106,10 +96,12 @@ int dlm_process_requestqueue(struct dlm_ls *ls) /* * After recovery is done, locking is resumed and dlm_recoverd takes all the - * saved requests and processes them as they would have been by dlm_recvd. At - * the same time, dlm_recvd will start receiving new requests from remote - * nodes. We want to delay dlm_recvd processing new requests until - * dlm_recoverd has finished processing the old saved requests. + * saved requests and processes them as they would have been by dlm_recv. At + * the same time, dlm_recv will start receiving new requests from remote nodes. + * We want to delay dlm_recv processing new requests until dlm_recoverd has + * finished processing the old saved requests. We don't check for locking + * stopped here because dlm_ls_stop won't stop locking until it's suspended us + * (dlm_recv). */ void dlm_wait_requestqueue(struct dlm_ls *ls) @@ -118,8 +110,6 @@ void dlm_wait_requestqueue(struct dlm_ls *ls) mutex_lock(&ls->ls_requestqueue_mutex); if (list_empty(&ls->ls_requestqueue)) break; - if (dlm_locking_stopped(ls)) - break; mutex_unlock(&ls->ls_requestqueue_mutex); schedule(); } diff --git a/fs/dlm/requestqueue.h b/fs/dlm/requestqueue.h index 6a53ea03335..aba34fc05ee 100644 --- a/fs/dlm/requestqueue.h +++ b/fs/dlm/requestqueue.h @@ -1,7 +1,7 @@ /****************************************************************************** ******************************************************************************* ** -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -13,7 +13,7 @@ #ifndef __REQUESTQUEUE_DOT_H__ #define __REQUESTQUEUE_DOT_H__ -int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); +void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd); int dlm_process_requestqueue(struct dlm_ls *ls); void dlm_wait_requestqueue(struct dlm_ls *ls); void dlm_purge_requestqueue(struct dlm_ls *ls); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index cd805a66880..93fa427bb5f 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -93,9 +93,10 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, map_bh(bh, inode->i_sb, block); set_buffer_uptodate(bh); + if (!gfs2_is_jdata(ip)) + mark_buffer_dirty(bh); if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) gfs2_trans_add_bh(ip->i_gl, bh, 0); - mark_buffer_dirty(bh); if (release) { unlock_page(page); @@ -1085,6 +1086,33 @@ static int do_shrink(struct gfs2_inode *ip, u64 size) return error; } +static int do_touch(struct gfs2_inode *ip, u64 size) +{ + struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + struct buffer_head *dibh; + int error; + + error = gfs2_trans_begin(sdp, RES_DINODE, 0); + if (error) + return error; + + down_write(&ip->i_rw_mutex); + + error = gfs2_meta_inode_buffer(ip, &dibh); + if (error) + goto do_touch_out; + + ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; + gfs2_trans_add_bh(ip->i_gl, dibh, 1); + gfs2_dinode_out(ip, dibh->b_data); + brelse(dibh); + +do_touch_out: + up_write(&ip->i_rw_mutex); + gfs2_trans_end(sdp); + return error; +} + /** * gfs2_truncatei - make a file a given size * @ip: the inode @@ -1105,8 +1133,11 @@ int gfs2_truncatei(struct gfs2_inode *ip, u64 size) if (size > ip->i_di.di_size) error = do_grow(ip, size); - else + else if (size < ip->i_di.di_size) error = do_shrink(ip, size); + else + /* update time stamps */ + error = do_touch(ip, size); return error; } diff --git a/fs/gfs2/daemon.c b/fs/gfs2/daemon.c index 3548d9f31e0..3731ab0771d 100644 --- a/fs/gfs2/daemon.c +++ b/fs/gfs2/daemon.c @@ -35,30 +35,6 @@ The kthread functions used to start these daemons block and flush signals. */ /** - * gfs2_scand - Look for cached glocks and inodes to toss from memory - * @sdp: Pointer to GFS2 superblock - * - * One of these daemons runs, finding candidates to add to sd_reclaim_list. - * See gfs2_glockd() - */ - -int gfs2_scand(void *data) -{ - struct gfs2_sbd *sdp = data; - unsigned long t; - - while (!kthread_should_stop()) { - gfs2_scand_internal(sdp); - t = gfs2_tune_get(sdp, gt_scand_secs) * HZ; - if (freezing(current)) - refrigerator(); - schedule_timeout_interruptible(t); - } - - return 0; -} - -/** * gfs2_glockd - Reclaim unused glock structures * @sdp: Pointer to GFS2 superblock * diff --git a/fs/gfs2/daemon.h b/fs/gfs2/daemon.h index 801007120fb..0de9b355795 100644 --- a/fs/gfs2/daemon.h +++ b/fs/gfs2/daemon.h @@ -10,7 +10,6 @@ #ifndef __DAEMON_DOT_H__ #define __DAEMON_DOT_H__ -int gfs2_scand(void *data); int gfs2_glockd(void *data); int gfs2_recoverd(void *data); int gfs2_logd(void *data); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 2beb2f401aa..9949bb746a5 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1043,6 +1043,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) error = gfs2_meta_inode_buffer(dip, &dibh); if (!gfs2_assert_withdraw(GFS2_SB(&dip->i_inode), !error)) { + gfs2_trans_add_bh(dip->i_gl, dibh, 1); dip->i_di.di_blocks++; gfs2_set_inode_blocks(&dip->i_inode); gfs2_dinode_out(dip, dibh->b_data); @@ -1501,7 +1502,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) inode = gfs2_inode_lookup(dir->i_sb, be16_to_cpu(dent->de_type), be64_to_cpu(dent->de_inum.no_addr), - be64_to_cpu(dent->de_inum.no_formal_ino)); + be64_to_cpu(dent->de_inum.no_formal_ino), 0); brelse(bh); return inode; } diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c index 1ab3e9d7388..aa8dbf303f6 100644 --- a/fs/gfs2/eaops.c +++ b/fs/gfs2/eaops.c @@ -200,28 +200,28 @@ static int security_eo_remove(struct gfs2_inode *ip, struct gfs2_ea_request *er) return gfs2_ea_remove_i(ip, er); } -static struct gfs2_eattr_operations gfs2_user_eaops = { +static const struct gfs2_eattr_operations gfs2_user_eaops = { .eo_get = user_eo_get, .eo_set = user_eo_set, .eo_remove = user_eo_remove, .eo_name = "user", }; -struct gfs2_eattr_operations gfs2_system_eaops = { +const struct gfs2_eattr_operations gfs2_system_eaops = { .eo_get = system_eo_get, .eo_set = system_eo_set, .eo_remove = system_eo_remove, .eo_name = "system", }; -static struct gfs2_eattr_operations gfs2_security_eaops = { +static const struct gfs2_eattr_operations gfs2_security_eaops = { .eo_get = security_eo_get, .eo_set = security_eo_set, .eo_remove = security_eo_remove, .eo_name = "security", }; -struct gfs2_eattr_operations *gfs2_ea_ops[] = { +const struct gfs2_eattr_operations *gfs2_ea_ops[] = { NULL, &gfs2_user_eaops, &gfs2_system_eaops, diff --git a/fs/gfs2/eaops.h b/fs/gfs2/eaops.h index 508b4f7a244..da2f7fbbb40 100644 --- a/fs/gfs2/eaops.h +++ b/fs/gfs2/eaops.h @@ -22,9 +22,9 @@ struct gfs2_eattr_operations { unsigned int gfs2_ea_name2type(const char *name, const char **truncated_name); -extern struct gfs2_eattr_operations gfs2_system_eaops; +extern const struct gfs2_eattr_operations gfs2_system_eaops; -extern struct gfs2_eattr_operations *gfs2_ea_ops[]; +extern const struct gfs2_eattr_operations *gfs2_ea_ops[]; #endif /* __EAOPS_DOT_H__ */ diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 3f0974e1afe..a37efe4aae6 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -25,8 +25,10 @@ #include <asm/uaccess.h> #include <linux/seq_file.h> #include <linux/debugfs.h> -#include <linux/module.h> -#include <linux/kallsyms.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> #include "gfs2.h" #include "incore.h" @@ -48,7 +50,6 @@ struct glock_iter { int hash; /* hash bucket index */ struct gfs2_sbd *sdp; /* incore superblock */ struct gfs2_glock *gl; /* current glock struct */ - struct hlist_head *hb_list; /* current hash bucket ptr */ struct seq_file *seq; /* sequence file for debugfs */ char string[512]; /* scratch space */ }; @@ -59,8 +60,13 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); static void gfs2_glock_drop_th(struct gfs2_glock *gl); +static void run_queue(struct gfs2_glock *gl); + static DECLARE_RWSEM(gfs2_umount_flush_sem); static struct dentry *gfs2_root; +static struct task_struct *scand_process; +static unsigned int scand_secs = 5; +static struct workqueue_struct *glock_workqueue; #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) @@ -276,6 +282,18 @@ static struct gfs2_glock *gfs2_glock_find(const struct gfs2_sbd *sdp, return gl; } +static void glock_work_func(struct work_struct *work) +{ + struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); + + spin_lock(&gl->gl_spin); + if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags)) + set_bit(GLF_DEMOTE, &gl->gl_flags); + run_queue(gl); + spin_unlock(&gl->gl_spin); + gfs2_glock_put(gl); +} + /** * gfs2_glock_get() - Get a glock, or create one if one doesn't exist * @sdp: The GFS2 superblock @@ -315,6 +333,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_name = name; atomic_set(&gl->gl_ref, 1); gl->gl_state = LM_ST_UNLOCKED; + gl->gl_demote_state = LM_ST_EXCLUSIVE; gl->gl_hash = hash; gl->gl_owner_pid = 0; gl->gl_ip = 0; @@ -323,10 +342,12 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, gl->gl_req_bh = NULL; gl->gl_vn = 0; gl->gl_stamp = jiffies; + gl->gl_tchange = jiffies; gl->gl_object = NULL; gl->gl_sbd = sdp; gl->gl_aspace = NULL; lops_init_le(&gl->gl_le, &gfs2_glock_lops); + INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); /* If this glock protects actual on-disk data or metadata blocks, create a VFS inode to manage the pages/buffers holding them. */ @@ -440,6 +461,8 @@ static void wait_on_holder(struct gfs2_holder *gh) static void gfs2_demote_wake(struct gfs2_glock *gl) { + BUG_ON(!spin_is_locked(&gl->gl_spin)); + gl->gl_demote_state = LM_ST_EXCLUSIVE; clear_bit(GLF_DEMOTE, &gl->gl_flags); smp_mb__after_clear_bit(); wake_up_bit(&gl->gl_flags, GLF_DEMOTE); @@ -545,12 +568,14 @@ static int rq_demote(struct gfs2_glock *gl) return 0; } set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); if (gl->gl_demote_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) + gl->gl_state != LM_ST_EXCLUSIVE) { + spin_unlock(&gl->gl_spin); gfs2_glock_drop_th(gl); - else + } else { + spin_unlock(&gl->gl_spin); gfs2_glock_xmote_th(gl, NULL); + } spin_lock(&gl->gl_spin); return 0; @@ -679,24 +704,25 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ -static void handle_callback(struct gfs2_glock *gl, unsigned int state, int remote) +static void handle_callback(struct gfs2_glock *gl, unsigned int state, + int remote, unsigned long delay) { + int bit = delay ? GLF_PENDING_DEMOTE : GLF_DEMOTE; + spin_lock(&gl->gl_spin); - if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { + set_bit(bit, &gl->gl_flags); + if (gl->gl_demote_state == LM_ST_EXCLUSIVE) { gl->gl_demote_state = state; gl->gl_demote_time = jiffies; if (remote && gl->gl_ops->go_type == LM_TYPE_IOPEN && gl->gl_object) { - struct inode *inode = igrab(gl->gl_object); + gfs2_glock_schedule_for_reclaim(gl); spin_unlock(&gl->gl_spin); - if (inode) { - d_prune_aliases(inode); - iput(inode); - } return; } - } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { - gl->gl_demote_state = state; + } else if (gl->gl_demote_state != LM_ST_UNLOCKED && + gl->gl_demote_state != state) { + gl->gl_demote_state = LM_ST_UNLOCKED; } spin_unlock(&gl->gl_spin); } @@ -723,6 +749,7 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) } gl->gl_state = new_state; + gl->gl_tchange = jiffies; } /** @@ -760,10 +787,20 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) if (!gh) { gl->gl_stamp = jiffies; - if (ret & LM_OUT_CANCELED) + if (ret & LM_OUT_CANCELED) { op_done = 0; - else + } else { + spin_lock(&gl->gl_spin); + if (gl->gl_state != gl->gl_demote_state) { + gl->gl_req_bh = NULL; + spin_unlock(&gl->gl_spin); + gfs2_glock_drop_th(gl); + gfs2_glock_put(gl); + return; + } gfs2_demote_wake(gl); + spin_unlock(&gl->gl_spin); + } } else { spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); @@ -799,7 +836,6 @@ out: gl->gl_req_gh = NULL; gl->gl_req_bh = NULL; clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); spin_unlock(&gl->gl_spin); } @@ -817,7 +853,7 @@ out: * */ -void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) +static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) { struct gfs2_sbd *sdp = gl->gl_sbd; int flags = gh ? gh->gh_flags : 0; @@ -871,7 +907,6 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); - gfs2_demote_wake(gl); if (glops->go_inval) glops->go_inval(gl, DIO_METADATA); @@ -884,10 +919,10 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) } spin_lock(&gl->gl_spin); + gfs2_demote_wake(gl); gl->gl_req_gh = NULL; gl->gl_req_bh = NULL; clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); spin_unlock(&gl->gl_spin); gfs2_glock_put(gl); @@ -1067,24 +1102,31 @@ static void add_to_queue(struct gfs2_holder *gh) if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); - printk(KERN_INFO "lock type : %d lock state : %d\n", - existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); - printk(KERN_INFO "lock type : %d lock state : %d\n", - gl->gl_name.ln_type, gl->gl_state); - BUG(); - } - - existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); - if (existing) { - print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - BUG(); + if (!(gh->gh_flags & GL_FLOCK)) { + existing = find_holder_by_owner(&gl->gl_holders, + gh->gh_owner_pid); + if (existing) { + print_symbol(KERN_WARNING "original: %s\n", + existing->gh_ip); + printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); + printk(KERN_INFO "lock type : %d lock state : %d\n", + existing->gh_gl->gl_name.ln_type, + existing->gh_gl->gl_state); + print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); + printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); + printk(KERN_INFO "lock type : %d lock state : %d\n", + gl->gl_name.ln_type, gl->gl_state); + BUG(); + } + + existing = find_holder_by_owner(&gl->gl_waiters3, + gh->gh_owner_pid); + if (existing) { + print_symbol(KERN_WARNING "original: %s\n", + existing->gh_ip); + print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); + BUG(); + } } if (gh->gh_flags & LM_FLAG_PRIORITY) @@ -1195,9 +1237,10 @@ void gfs2_glock_dq(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; const struct gfs2_glock_operations *glops = gl->gl_ops; + unsigned delay = 0; if (gh->gh_flags & GL_NOCACHE) - handle_callback(gl, LM_ST_UNLOCKED, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); gfs2_glmutex_lock(gl); @@ -1215,8 +1258,14 @@ void gfs2_glock_dq(struct gfs2_holder *gh) } clear_bit(GLF_LOCK, &gl->gl_flags); - run_queue(gl); spin_unlock(&gl->gl_spin); + + gfs2_glock_hold(gl); + if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && + !test_bit(GLF_DEMOTE, &gl->gl_flags)) + delay = gl->gl_ops->go_min_hold_time; + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); } void gfs2_glock_dq_wait(struct gfs2_holder *gh) @@ -1443,18 +1492,21 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, unsigned int state) { struct gfs2_glock *gl; + unsigned long delay = 0; + unsigned long holdtime; + unsigned long now = jiffies; gl = gfs2_glock_find(sdp, name); if (!gl) return; - handle_callback(gl, state, 1); - - spin_lock(&gl->gl_spin); - run_queue(gl); - spin_unlock(&gl->gl_spin); + holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; + if (time_before(now, holdtime)) + delay = holdtime - now; - gfs2_glock_put(gl); + handle_callback(gl, state, 1, delay); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) + gfs2_glock_put(gl); } /** @@ -1495,7 +1547,8 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data) return; if (!gfs2_assert_warn(sdp, gl->gl_req_bh)) gl->gl_req_bh(gl, async->lc_ret); - gfs2_glock_put(gl); + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + gfs2_glock_put(gl); up_read(&gfs2_umount_flush_sem); return; } @@ -1588,7 +1641,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp) if (gfs2_glmutex_trylock(gl)) { if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl)) - handle_callback(gl, LM_ST_UNLOCKED, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); gfs2_glmutex_unlock(gl); } @@ -1617,7 +1670,7 @@ static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, goto out; gl = list_entry(head->first, struct gfs2_glock, gl_list); while(1) { - if (gl->gl_sbd == sdp) { + if (!sdp || gl->gl_sbd == sdp) { gfs2_glock_hold(gl); read_unlock(gl_lock_addr(hash)); if (prev) @@ -1635,6 +1688,7 @@ out: read_unlock(gl_lock_addr(hash)); if (prev) gfs2_glock_put(prev); + cond_resched(); return has_entries; } @@ -1663,20 +1717,6 @@ out_schedule: } /** - * gfs2_scand_internal - Look for glocks and inodes to toss from memory - * @sdp: the filesystem - * - */ - -void gfs2_scand_internal(struct gfs2_sbd *sdp) -{ - unsigned int x; - - for (x = 0; x < GFS2_GL_HASH_SIZE; x++) - examine_bucket(scan_glock, sdp, x); -} - -/** * clear_glock - look at a glock and see if we can free it from glock cache * @gl: the glock to look at * @@ -1701,7 +1741,7 @@ static void clear_glock(struct gfs2_glock *gl) if (gfs2_glmutex_trylock(gl)) { if (list_empty(&gl->gl_holders) && gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0); + handle_callback(gl, LM_ST_UNLOCKED, 0, 0); gfs2_glmutex_unlock(gl); } } @@ -1843,7 +1883,7 @@ static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) spin_lock(&gl->gl_spin); - print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, + print_dbg(gi, "Glock 0x%p (%u, 0x%llx)\n", gl, gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); print_dbg(gi, " gl_flags ="); for (x = 0; x < 32; x++) { @@ -1963,6 +2003,35 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) return error; } +/** + * gfs2_scand - Look for cached glocks and inodes to toss from memory + * @sdp: Pointer to GFS2 superblock + * + * One of these daemons runs, finding candidates to add to sd_reclaim_list. + * See gfs2_glockd() + */ + +static int gfs2_scand(void *data) +{ + unsigned x; + unsigned delay; + + while (!kthread_should_stop()) { + for (x = 0; x < GFS2_GL_HASH_SIZE; x++) + examine_bucket(scan_glock, NULL, x); + if (freezing(current)) + refrigerator(); + delay = scand_secs; + if (delay < 1) + delay = 1; + schedule_timeout_interruptible(delay * HZ); + } + + return 0; +} + + + int __init gfs2_glock_init(void) { unsigned i; @@ -1974,52 +2043,69 @@ int __init gfs2_glock_init(void) rwlock_init(&gl_hash_locks[i]); } #endif + + scand_process = kthread_run(gfs2_scand, NULL, "gfs2_scand"); + if (IS_ERR(scand_process)) + return PTR_ERR(scand_process); + + glock_workqueue = create_workqueue("glock_workqueue"); + if (IS_ERR(glock_workqueue)) { + kthread_stop(scand_process); + return PTR_ERR(glock_workqueue); + } + return 0; } +void gfs2_glock_exit(void) +{ + destroy_workqueue(glock_workqueue); + kthread_stop(scand_process); +} + +module_param(scand_secs, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scand_secs, "The number of seconds between scand runs"); + static int gfs2_glock_iter_next(struct glock_iter *gi) { + struct gfs2_glock *gl; + +restart: read_lock(gl_lock_addr(gi->hash)); - while (1) { - if (!gi->hb_list) { /* If we don't have a hash bucket yet */ - gi->hb_list = &gl_hash_table[gi->hash].hb_list; - if (hlist_empty(gi->hb_list)) { - read_unlock(gl_lock_addr(gi->hash)); - gi->hash++; - read_lock(gl_lock_addr(gi->hash)); - gi->hb_list = NULL; - if (gi->hash >= GFS2_GL_HASH_SIZE) { - read_unlock(gl_lock_addr(gi->hash)); - return 1; - } - else - continue; - } - if (!hlist_empty(gi->hb_list)) { - gi->gl = list_entry(gi->hb_list->first, - struct gfs2_glock, - gl_list); - } - } else { - if (gi->gl->gl_list.next == NULL) { - read_unlock(gl_lock_addr(gi->hash)); - gi->hash++; - read_lock(gl_lock_addr(gi->hash)); - gi->hb_list = NULL; - continue; - } - gi->gl = list_entry(gi->gl->gl_list.next, - struct gfs2_glock, gl_list); - } + gl = gi->gl; + if (gl) { + gi->gl = hlist_entry(gl->gl_list.next, + struct gfs2_glock, gl_list); if (gi->gl) - break; + gfs2_glock_hold(gi->gl); } read_unlock(gl_lock_addr(gi->hash)); + if (gl) + gfs2_glock_put(gl); + if (gl && gi->gl == NULL) + gi->hash++; + while(gi->gl == NULL) { + if (gi->hash >= GFS2_GL_HASH_SIZE) + return 1; + read_lock(gl_lock_addr(gi->hash)); + gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, + struct gfs2_glock, gl_list); + if (gi->gl) + gfs2_glock_hold(gi->gl); + read_unlock(gl_lock_addr(gi->hash)); + gi->hash++; + } + + if (gi->sdp != gi->gl->gl_sbd) + goto restart; + return 0; } static void gfs2_glock_iter_free(struct glock_iter *gi) { + if (gi->gl) + gfs2_glock_put(gi->gl); kfree(gi); } @@ -2033,9 +2119,8 @@ static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) gi->sdp = sdp; gi->hash = 0; - gi->gl = NULL; - gi->hb_list = NULL; gi->seq = NULL; + gi->gl = NULL; memset(gi->string, 0, sizeof(gi->string)); if (gfs2_glock_iter_next(gi)) { @@ -2055,7 +2140,7 @@ static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) if (!gi) return NULL; - while (n--) { + while(n--) { if (gfs2_glock_iter_next(gi)) { gfs2_glock_iter_free(gi); return NULL; @@ -2082,7 +2167,9 @@ static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) { - /* nothing for now */ + struct glock_iter *gi = iter_ptr; + if (gi) + gfs2_glock_iter_free(gi); } static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) @@ -2095,7 +2182,7 @@ static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) return 0; } -static struct seq_operations gfs2_glock_seq_ops = { +static const struct seq_operations gfs2_glock_seq_ops = { .start = gfs2_glock_seq_start, .next = gfs2_glock_seq_next, .stop = gfs2_glock_seq_stop, diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 7721ca3fff9..b16f604eea9 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -26,6 +26,7 @@ #define GL_SKIP 0x00000100 #define GL_ATIME 0x00000200 #define GL_NOCACHE 0x00000400 +#define GL_FLOCK 0x00000800 #define GL_NOCANCEL 0x00001000 #define GLR_TRYFAILED 13 @@ -132,11 +133,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data); void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); void gfs2_reclaim_glock(struct gfs2_sbd *sdp); - -void gfs2_scand_internal(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); int __init gfs2_glock_init(void); +void gfs2_glock_exit(void); + int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); int gfs2_register_debugfs(void); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 777ca46010e..4670dcb2a87 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -41,7 +41,6 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) struct list_head *head = &gl->gl_ail_list; struct gfs2_bufdata *bd; struct buffer_head *bh; - u64 blkno; int error; blocks = atomic_read(&gl->gl_ail_count); @@ -57,19 +56,12 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) bd = list_entry(head->next, struct gfs2_bufdata, bd_ail_gl_list); bh = bd->bd_bh; - blkno = bh->b_blocknr; + gfs2_remove_from_ail(NULL, bd); + bd->bd_bh = NULL; + bh->b_private = NULL; + bd->bd_blkno = bh->b_blocknr; gfs2_assert_withdraw(sdp, !buffer_busy(bh)); - - bd->bd_ail = NULL; - list_del(&bd->bd_ail_st_list); - list_del(&bd->bd_ail_gl_list); - atomic_dec(&gl->gl_ail_count); - brelse(bh); - gfs2_log_unlock(sdp); - - gfs2_trans_add_revoke(sdp, blkno); - - gfs2_log_lock(sdp); + gfs2_trans_add_revoke(sdp, bd); } gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); gfs2_log_unlock(sdp); @@ -156,9 +148,11 @@ static void inode_go_sync(struct gfs2_glock *gl) ip = NULL; if (test_bit(GLF_DIRTY, &gl->gl_flags)) { - if (ip) + if (ip && !gfs2_is_jdata(ip)) filemap_fdatawrite(ip->i_inode.i_mapping); gfs2_log_flush(gl->gl_sbd, gl); + if (ip && gfs2_is_jdata(ip)) + filemap_fdatawrite(ip->i_inode.i_mapping); gfs2_meta_sync(gl); if (ip) { struct address_space *mapping = ip->i_inode.i_mapping; @@ -452,6 +446,7 @@ const struct gfs2_glock_operations gfs2_inode_glops = { .go_lock = inode_go_lock, .go_unlock = inode_go_unlock, .go_type = LM_TYPE_INODE, + .go_min_hold_time = HZ / 10, }; const struct gfs2_glock_operations gfs2_rgrp_glops = { @@ -462,6 +457,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, .go_type = LM_TYPE_RGRP, + .go_min_hold_time = HZ / 10, }; const struct gfs2_glock_operations gfs2_trans_glops = { diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 170ba93829c..eaddfb5a8e6 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -11,6 +11,7 @@ #define __INCORE_DOT_H__ #include <linux/fs.h> +#include <linux/workqueue.h> #define DIO_WAIT 0x00000010 #define DIO_METADATA 0x00000020 @@ -113,7 +114,13 @@ struct gfs2_bufdata { struct buffer_head *bd_bh; struct gfs2_glock *bd_gl; - struct list_head bd_list_tr; + union { + struct list_head list_tr; + u64 blkno; + } u; +#define bd_list_tr u.list_tr +#define bd_blkno u.blkno + struct gfs2_log_element bd_le; struct gfs2_ail *bd_ail; @@ -130,6 +137,7 @@ struct gfs2_glock_operations { int (*go_lock) (struct gfs2_holder *gh); void (*go_unlock) (struct gfs2_holder *gh); const int go_type; + const unsigned long go_min_hold_time; }; enum { @@ -161,6 +169,7 @@ enum { GLF_LOCK = 1, GLF_STICKY = 2, GLF_DEMOTE = 3, + GLF_PENDING_DEMOTE = 4, GLF_DIRTY = 5, }; @@ -193,6 +202,7 @@ struct gfs2_glock { u64 gl_vn; unsigned long gl_stamp; + unsigned long gl_tchange; void *gl_object; struct list_head gl_reclaim; @@ -203,6 +213,7 @@ struct gfs2_glock { struct gfs2_log_element gl_le; struct list_head gl_ail_list; atomic_t gl_ail_count; + struct delayed_work gl_work; }; struct gfs2_alloc { @@ -293,11 +304,6 @@ struct gfs2_file { struct gfs2_holder f_fl_gh; }; -struct gfs2_revoke { - struct gfs2_log_element rv_le; - u64 rv_blkno; -}; - struct gfs2_revoke_replay { struct list_head rr_list; u64 rr_blkno; @@ -335,12 +341,6 @@ struct gfs2_quota_data { unsigned long qd_last_touched; }; -struct gfs2_log_buf { - struct list_head lb_list; - struct buffer_head *lb_bh; - struct buffer_head *lb_real; -}; - struct gfs2_trans { unsigned long tr_ip; @@ -429,7 +429,6 @@ struct gfs2_tune { unsigned int gt_log_flush_secs; unsigned int gt_jindex_refresh_secs; /* Check for new journal index */ - unsigned int gt_scand_secs; unsigned int gt_recoverd_secs; unsigned int gt_logd_secs; unsigned int gt_quotad_secs; @@ -574,7 +573,6 @@ struct gfs2_sbd { /* Daemon stuff */ - struct task_struct *sd_scand_process; struct task_struct *sd_recoverd_process; struct task_struct *sd_logd_process; struct task_struct *sd_quotad_process; @@ -609,13 +607,13 @@ struct gfs2_sbd { unsigned int sd_log_num_revoke; unsigned int sd_log_num_rg; unsigned int sd_log_num_databuf; - unsigned int sd_log_num_jdata; struct list_head sd_log_le_gl; struct list_head sd_log_le_buf; struct list_head sd_log_le_revoke; struct list_head sd_log_le_rg; struct list_head sd_log_le_databuf; + struct list_head sd_log_le_ordered; unsigned int sd_log_blks_free; struct mutex sd_log_reserve_mutex; @@ -627,7 +625,8 @@ struct gfs2_sbd { unsigned long sd_log_flush_time; struct rw_semaphore sd_log_flush_lock; - struct list_head sd_log_flush_list; + atomic_t sd_log_in_flight; + wait_queue_head_t sd_log_flush_wait; unsigned int sd_log_flush_head; u64 sd_log_flush_wrapped; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 34f7bcdea1e..5f6dc32946c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -77,6 +77,49 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) return iget5_locked(sb, hash, iget_test, iget_set, &no_addr); } +struct gfs2_skip_data { + u64 no_addr; + int skipped; +}; + +static int iget_skip_test(struct inode *inode, void *opaque) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_skip_data *data = opaque; + + if (ip->i_no_addr == data->no_addr && inode->i_private != NULL){ + if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)){ + data->skipped = 1; + return 0; + } + return 1; + } + return 0; +} + +static int iget_skip_set(struct inode *inode, void *opaque) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_skip_data *data = opaque; + + if (data->skipped) + return 1; + inode->i_ino = (unsigned long)(data->no_addr); + ip->i_no_addr = data->no_addr; + return 0; +} + +static struct inode *gfs2_iget_skip(struct super_block *sb, + u64 no_addr) +{ + struct gfs2_skip_data data; + unsigned long hash = (unsigned long)no_addr; + + data.no_addr = no_addr; + data.skipped = 0; + return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data); +} + /** * GFS2 lookup code fills in vfs inode contents based on info obtained * from directory entry inside gfs2_inode_lookup(). This has caused issues @@ -112,6 +155,7 @@ void gfs2_set_iop(struct inode *inode) * @sb: The super block * @no_addr: The inode number * @type: The type of the inode + * @skip_freeing: set this not return an inode if it is currently being freed. * * Returns: A VFS inode, or an error */ @@ -119,13 +163,19 @@ void gfs2_set_iop(struct inode *inode) struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, u64 no_addr, - u64 no_formal_ino) + u64 no_formal_ino, int skip_freeing) { - struct inode *inode = gfs2_iget(sb, no_addr); - struct gfs2_inode *ip = GFS2_I(inode); + struct inode *inode; + struct gfs2_inode *ip; struct gfs2_glock *io_gl; int error; + if (skip_freeing) + inode = gfs2_iget_skip(sb, no_addr); + else + inode = gfs2_iget(sb, no_addr); + ip = GFS2_I(inode); + if (!inode) return ERR_PTR(-ENOBUFS); @@ -244,6 +294,11 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) return 0; } +static void gfs2_inode_bh(struct gfs2_inode *ip, struct buffer_head *bh) +{ + ip->i_cache[0] = bh; +} + /** * gfs2_inode_refresh - Refresh the incore copy of the dinode * @ip: The GFS2 inode @@ -688,7 +743,7 @@ out: static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, const struct gfs2_inum_host *inum, unsigned int mode, unsigned int uid, unsigned int gid, - const u64 *generation, dev_t dev) + const u64 *generation, dev_t dev, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); struct gfs2_dinode *di; @@ -743,13 +798,15 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, di->di_mtime_nsec = cpu_to_be32(tv.tv_nsec); di->di_ctime_nsec = cpu_to_be32(tv.tv_nsec); memset(&di->di_reserved, 0, sizeof(di->di_reserved)); + + set_buffer_uptodate(dibh); - brelse(dibh); + *bhp = dibh; } static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, unsigned int mode, const struct gfs2_inum_host *inum, - const u64 *generation, dev_t dev) + const u64 *generation, dev_t dev, struct buffer_head **bhp) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); unsigned int uid, gid; @@ -770,7 +827,7 @@ static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl, if (error) goto out_quota; - init_dinode(dip, gl, inum, mode, uid, gid, generation, dev); + init_dinode(dip, gl, inum, mode, uid, gid, generation, dev, bhp); gfs2_quota_change(dip, +1, uid, gid); gfs2_trans_end(sdp); @@ -909,6 +966,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, struct gfs2_inum_host inum = { .no_addr = 0, .no_formal_ino = 0 }; int error; u64 generation; + struct buffer_head *bh=NULL; if (!name->len || name->len > GFS2_FNAMESIZE) return ERR_PTR(-ENAMETOOLONG); @@ -935,16 +993,18 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, if (error) goto fail_gunlock; - error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev); + error = make_dinode(dip, ghs[1].gh_gl, mode, &inum, &generation, dev, &bh); if (error) goto fail_gunlock2; inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, - inum.no_formal_ino); + inum.no_formal_ino, 0); if (IS_ERR(inode)) goto fail_gunlock2; + gfs2_inode_bh(GFS2_I(inode), bh); + error = gfs2_inode_refresh(GFS2_I(inode)); if (error) goto fail_gunlock2; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 4517ac82c01..351ac87ab38 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -49,7 +49,8 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, void gfs2_inode_attr_in(struct gfs2_inode *ip); void gfs2_set_iop(struct inode *inode); struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, - u64 no_addr, u64 no_formal_ino); + u64 no_addr, u64 no_formal_ino, + int skip_freeing); struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr); int gfs2_inode_refresh(struct gfs2_inode *ip); diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index 24d70f73b65..9e8265d2837 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/spinlock.h> -#include <linux/module.h> #include <linux/types.h> #include <linux/string.h> #include <linux/list.h> diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c index fba1f1d87e4..1f7b038530b 100644 --- a/fs/gfs2/locking/dlm/plock.c +++ b/fs/gfs2/locking/dlm/plock.c @@ -346,15 +346,16 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, static unsigned int dev_poll(struct file *file, poll_table *wait) { + unsigned int mask = 0; + poll_wait(file, &send_wq, wait); spin_lock(&ops_lock); - if (!list_empty(&send_list)) { - spin_unlock(&ops_lock); - return POLLIN | POLLRDNORM; - } + if (!list_empty(&send_list)) + mask = POLLIN | POLLRDNORM; spin_unlock(&ops_lock); - return 0; + + return mask; } static const struct file_operations dev_fops = { diff --git a/fs/gfs2/locking/dlm/thread.c b/fs/gfs2/locking/dlm/thread.c index 1aca51e4509..bd938f06481 100644 --- a/fs/gfs2/locking/dlm/thread.c +++ b/fs/gfs2/locking/dlm/thread.c @@ -268,20 +268,16 @@ static inline int check_drop(struct gdlm_ls *ls) return 0; } -static int gdlm_thread(void *data) +static int gdlm_thread(void *data, int blist) { struct gdlm_ls *ls = (struct gdlm_ls *) data; struct gdlm_lock *lp = NULL; - int blist = 0; uint8_t complete, blocking, submit, drop; DECLARE_WAITQUEUE(wait, current); /* Only thread1 is allowed to do blocking callbacks since gfs may wait for a completion callback within a blocking cb. */ - if (current == ls->thread1) - blist = 1; - while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); add_wait_queue(&ls->thread_wait, &wait); @@ -333,12 +329,22 @@ static int gdlm_thread(void *data) return 0; } +static int gdlm_thread1(void *data) +{ + return gdlm_thread(data, 1); +} + +static int gdlm_thread2(void *data) +{ + return gdlm_thread(data, 0); +} + int gdlm_init_threads(struct gdlm_ls *ls) { struct task_struct *p; int error; - p = kthread_run(gdlm_thread, ls, "lock_dlm1"); + p = kthread_run(gdlm_thread1, ls, "lock_dlm1"); error = IS_ERR(p); if (error) { log_error("can't start lock_dlm1 thread %d", error); @@ -346,7 +352,7 @@ int gdlm_init_threads(struct gdlm_ls *ls) } ls->thread1 = p; - p = kthread_run(gdlm_thread, ls, "lock_dlm2"); + p = kthread_run(gdlm_thread2, ls, "lock_dlm2"); error = IS_ERR(p); if (error) { log_error("can't start lock_dlm2 thread %d", error); diff --git a/fs/gfs2/locking/nolock/main.c b/fs/gfs2/locking/nolock/main.c index 0d149c8c493..d3b8ce6fbbe 100644 --- a/fs/gfs2/locking/nolock/main.c +++ b/fs/gfs2/locking/nolock/main.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/slab.h> -#include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/fs.h> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index f49a12e2408..7df70247325 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -60,6 +60,26 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, } /** + * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters + * @mapping: The associated mapping (maybe NULL) + * @bd: The gfs2_bufdata to remove + * + * The log lock _must_ be held when calling this function + * + */ + +void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd) +{ + bd->bd_ail = NULL; + list_del_init(&bd->bd_ail_st_list); + list_del_init(&bd->bd_ail_gl_list); + atomic_dec(&bd->bd_gl->gl_ail_count); + if (mapping) + gfs2_meta_cache_flush(GFS2_I(mapping->host)); + brelse(bd->bd_bh); +} + +/** * gfs2_ail1_start_one - Start I/O on a part of the AIL * @sdp: the filesystem * @tr: the part of the AIL @@ -83,17 +103,9 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) gfs2_assert(sdp, bd->bd_ail == ai); - if (!bh){ - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); - continue; - } - if (!buffer_busy(bh)) { - if (!buffer_uptodate(bh)) { - gfs2_log_unlock(sdp); + if (!buffer_uptodate(bh)) gfs2_io_error_bh(sdp, bh); - gfs2_log_lock(sdp); - } list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); continue; } @@ -103,9 +115,16 @@ static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); + get_bh(bh); gfs2_log_unlock(sdp); - wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); + lock_buffer(bh); + if (test_clear_buffer_dirty(bh)) { + bh->b_end_io = end_buffer_write_sync; + submit_bh(WRITE, bh); + } else { + unlock_buffer(bh); + brelse(bh); + } gfs2_log_lock(sdp); retry = 1; @@ -130,11 +149,6 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl bd_ail_st_list) { bh = bd->bd_bh; - if (!bh){ - list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); - continue; - } - gfs2_assert(sdp, bd->bd_ail == ai); if (buffer_busy(bh)) { @@ -155,13 +169,14 @@ static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int fl static void gfs2_ail1_start(struct gfs2_sbd *sdp, int flags) { - struct list_head *head = &sdp->sd_ail1_list; + struct list_head *head; u64 sync_gen; struct list_head *first; struct gfs2_ail *first_ai, *ai, *tmp; int done = 0; gfs2_log_lock(sdp); + head = &sdp->sd_ail1_list; if (list_empty(head)) { gfs2_log_unlock(sdp); return; @@ -233,11 +248,7 @@ static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) bd = list_entry(head->prev, struct gfs2_bufdata, bd_ail_st_list); gfs2_assert(sdp, bd->bd_ail == ai); - bd->bd_ail = NULL; - list_del(&bd->bd_ail_st_list); - list_del(&bd->bd_ail_gl_list); - atomic_dec(&bd->bd_gl->gl_ail_count); - brelse(bd->bd_bh); + gfs2_remove_from_ail(bd->bd_bh->b_page->mapping, bd); } } @@ -439,10 +450,10 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) return tail; } -static inline void log_incr_head(struct gfs2_sbd *sdp) +void gfs2_log_incr_head(struct gfs2_sbd *sdp) { if (sdp->sd_log_flush_head == sdp->sd_log_tail) - gfs2_assert_withdraw(sdp, sdp->sd_log_flush_head == sdp->sd_log_head); + BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { sdp->sd_log_flush_head = 0; @@ -451,6 +462,23 @@ static inline void log_incr_head(struct gfs2_sbd *sdp) } /** + * gfs2_log_write_endio - End of I/O for a log buffer + * @bh: The buffer head + * @uptodate: I/O Status + * + */ + +static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) +{ + struct gfs2_sbd *sdp = bh->b_private; + bh->b_private = NULL; + + end_buffer_write_sync(bh, uptodate); + if (atomic_dec_and_test(&sdp->sd_log_in_flight)) + wake_up(&sdp->sd_log_flush_wait); +} + +/** * gfs2_log_get_buf - Get and initialize a buffer to use for log control data * @sdp: The GFS2 superblock * @@ -460,25 +488,43 @@ static inline void log_incr_head(struct gfs2_sbd *sdp) struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) { u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); - struct gfs2_log_buf *lb; struct buffer_head *bh; - lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); - list_add(&lb->lb_list, &sdp->sd_log_flush_list); - - bh = lb->lb_bh = sb_getblk(sdp->sd_vfs, blkno); + bh = sb_getblk(sdp->sd_vfs, blkno); lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); clear_buffer_dirty(bh); - unlock_buffer(bh); - - log_incr_head(sdp); + gfs2_log_incr_head(sdp); + atomic_inc(&sdp->sd_log_in_flight); + bh->b_private = sdp; + bh->b_end_io = gfs2_log_write_endio; return bh; } /** + * gfs2_fake_write_endio - + * @bh: The buffer head + * @uptodate: The I/O Status + * + */ + +static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) +{ + struct buffer_head *real_bh = bh->b_private; + struct gfs2_bufdata *bd = real_bh->b_private; + struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; + + end_buffer_write_sync(bh, uptodate); + free_buffer_head(bh); + unlock_buffer(real_bh); + brelse(real_bh); + if (atomic_dec_and_test(&sdp->sd_log_in_flight)) + wake_up(&sdp->sd_log_flush_wait); +} + +/** * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log * @sdp: the filesystem * @data: the data the buffer_head should point to @@ -490,22 +536,20 @@ struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, struct buffer_head *real) { u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); - struct gfs2_log_buf *lb; struct buffer_head *bh; - lb = kzalloc(sizeof(struct gfs2_log_buf), GFP_NOFS | __GFP_NOFAIL); - list_add(&lb->lb_list, &sdp->sd_log_flush_list); - lb->lb_real = real; - - bh = lb->lb_bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); + bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); atomic_set(&bh->b_count, 1); - bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate); + bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); set_bh_page(bh, real->b_page, bh_offset(real)); bh->b_blocknr = blkno; bh->b_size = sdp->sd_sb.sb_bsize; bh->b_bdev = sdp->sd_vfs->s_bdev; + bh->b_private = real; + bh->b_end_io = gfs2_fake_write_endio; - log_incr_head(sdp); + gfs2_log_incr_head(sdp); + atomic_inc(&sdp->sd_log_in_flight); return bh; } @@ -572,45 +616,75 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) gfs2_assert_withdraw(sdp, !pull); sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); - log_incr_head(sdp); + gfs2_log_incr_head(sdp); } static void log_flush_commit(struct gfs2_sbd *sdp) { - struct list_head *head = &sdp->sd_log_flush_list; - struct gfs2_log_buf *lb; - struct buffer_head *bh; - int flushcount = 0; + DEFINE_WAIT(wait); + + if (atomic_read(&sdp->sd_log_in_flight)) { + do { + prepare_to_wait(&sdp->sd_log_flush_wait, &wait, + TASK_UNINTERRUPTIBLE); + if (atomic_read(&sdp->sd_log_in_flight)) + io_schedule(); + } while(atomic_read(&sdp->sd_log_in_flight)); + finish_wait(&sdp->sd_log_flush_wait, &wait); + } - while (!list_empty(head)) { - lb = list_entry(head->next, struct gfs2_log_buf, lb_list); - list_del(&lb->lb_list); - bh = lb->lb_bh; + log_write_header(sdp, 0, 0); +} - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); - if (lb->lb_real) { - while (atomic_read(&bh->b_count) != 1) /* Grrrr... */ - schedule(); - free_buffer_head(bh); - } else +static void gfs2_ordered_write(struct gfs2_sbd *sdp) +{ + struct gfs2_bufdata *bd; + struct buffer_head *bh; + LIST_HEAD(written); + + gfs2_log_lock(sdp); + while (!list_empty(&sdp->sd_log_le_ordered)) { + bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); + list_move(&bd->bd_le.le_list, &written); + bh = bd->bd_bh; + if (!buffer_dirty(bh)) + continue; + get_bh(bh); + gfs2_log_unlock(sdp); + lock_buffer(bh); + if (test_clear_buffer_dirty(bh)) { + bh->b_end_io = end_buffer_write_sync; + submit_bh(WRITE, bh); + } else { + unlock_buffer(bh); brelse(bh); - kfree(lb); - flushcount++; + } + gfs2_log_lock(sdp); } + list_splice(&written, &sdp->sd_log_le_ordered); + gfs2_log_unlock(sdp); +} - /* If nothing was journaled, the header is unplanned and unwanted. */ - if (flushcount) { - log_write_header(sdp, 0, 0); - } else { - unsigned int tail; - tail = current_tail(sdp); +static void gfs2_ordered_wait(struct gfs2_sbd *sdp) +{ + struct gfs2_bufdata *bd; + struct buffer_head *bh; - gfs2_ail1_empty(sdp, 0); - if (sdp->sd_log_tail != tail) - log_pull_tail(sdp, tail); + gfs2_log_lock(sdp); + while (!list_empty(&sdp->sd_log_le_ordered)) { + bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list); + bh = bd->bd_bh; + if (buffer_locked(bh)) { + get_bh(bh); + gfs2_log_unlock(sdp); + wait_on_buffer(bh); + brelse(bh); + gfs2_log_lock(sdp); + continue; + } + list_del_init(&bd->bd_le.le_list); } + gfs2_log_unlock(sdp); } /** @@ -640,10 +714,16 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) INIT_LIST_HEAD(&ai->ai_ail1_list); INIT_LIST_HEAD(&ai->ai_ail2_list); - gfs2_assert_withdraw(sdp, - sdp->sd_log_num_buf + sdp->sd_log_num_jdata == - sdp->sd_log_commited_buf + - sdp->sd_log_commited_databuf); + if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { + printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, + sdp->sd_log_commited_buf); + gfs2_assert_withdraw(sdp, 0); + } + if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { + printk(KERN_INFO "GFS2: log databuf %u %u\n", + sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); + gfs2_assert_withdraw(sdp, 0); + } gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); @@ -651,8 +731,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) sdp->sd_log_flush_wrapped = 0; ai->ai_first = sdp->sd_log_flush_head; + gfs2_ordered_write(sdp); lops_before_commit(sdp); - if (!list_empty(&sdp->sd_log_flush_list)) + gfs2_ordered_wait(sdp); + + if (sdp->sd_log_head != sdp->sd_log_flush_head) log_flush_commit(sdp); else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ gfs2_log_lock(sdp); @@ -744,7 +827,6 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_gl); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); - gfs2_assert_withdraw(sdp, !sdp->sd_log_num_jdata); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 8e7aa0f2910..dae28240062 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -52,12 +52,14 @@ int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags); int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks); void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks); +void gfs2_log_incr_head(struct gfs2_sbd *sdp); struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp); struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, struct buffer_head *real); void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl); void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); +void gfs2_remove_from_ail(struct address_space *mapping, struct gfs2_bufdata *bd); void gfs2_log_shutdown(struct gfs2_sbd *sdp); void gfs2_meta_syncfs(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 3b395c41b2f..6c27cea761c 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -27,7 +27,104 @@ #include "trans.h" #include "util.h" -static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +/** + * gfs2_pin - Pin a buffer in memory + * @sdp: The superblock + * @bh: The buffer to be pinned + * + * The log lock must be held when calling this function + */ +static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) +{ + struct gfs2_bufdata *bd; + + gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); + + clear_buffer_dirty(bh); + if (test_set_buffer_pinned(bh)) + gfs2_assert_withdraw(sdp, 0); + if (!buffer_uptodate(bh)) + gfs2_io_error_bh(sdp, bh); + bd = bh->b_private; + /* If this buffer is in the AIL and it has already been written + * to in-place disk block, remove it from the AIL. + */ + if (bd->bd_ail) + list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); + get_bh(bh); +} + +/** + * gfs2_unpin - Unpin a buffer + * @sdp: the filesystem the buffer belongs to + * @bh: The buffer to unpin + * @ai: + * + */ + +static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, + struct gfs2_ail *ai) +{ + struct gfs2_bufdata *bd = bh->b_private; + + gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); + + if (!buffer_pinned(bh)) + gfs2_assert_withdraw(sdp, 0); + + lock_buffer(bh); + mark_buffer_dirty(bh); + clear_buffer_pinned(bh); + + gfs2_log_lock(sdp); + if (bd->bd_ail) { + list_del(&bd->bd_ail_st_list); + brelse(bh); + } else { + struct gfs2_glock *gl = bd->bd_gl; + list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); + atomic_inc(&gl->gl_ail_count); + } + bd->bd_ail = ai; + list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); + gfs2_log_unlock(sdp); + unlock_buffer(bh); +} + + +static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh) +{ + return (struct gfs2_log_descriptor *)bh->b_data; +} + +static inline __be64 *bh_log_ptr(struct buffer_head *bh) +{ + struct gfs2_log_descriptor *ld = bh_log_desc(bh); + return (__force __be64 *)(ld + 1); +} + +static inline __be64 *bh_ptr_end(struct buffer_head *bh) +{ + return (__force __be64 *)(bh->b_data + bh->b_size); +} + + +static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) +{ + struct buffer_head *bh = gfs2_log_get_buf(sdp); + struct gfs2_log_descriptor *ld = bh_log_desc(bh); + ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); + ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); + ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); + ld->ld_type = cpu_to_be32(ld_type); + ld->ld_length = 0; + ld->ld_data1 = 0; + ld->ld_data2 = 0; + memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); + return bh; +} + +static void __glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) { struct gfs2_glock *gl; struct gfs2_trans *tr = current->journal_info; @@ -38,15 +135,19 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) return; - gfs2_log_lock(sdp); - if (!list_empty(&le->le_list)){ - gfs2_log_unlock(sdp); + if (!list_empty(&le->le_list)) return; - } + gfs2_glock_hold(gl); set_bit(GLF_DIRTY, &gl->gl_flags); sdp->sd_log_num_gl++; list_add(&le->le_list, &sdp->sd_log_le_gl); +} + +static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) +{ + gfs2_log_lock(sdp); + __glock_lo_add(sdp, le); gfs2_log_unlock(sdp); } @@ -71,30 +172,25 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); struct gfs2_trans *tr; + lock_buffer(bd->bd_bh); gfs2_log_lock(sdp); - if (!list_empty(&bd->bd_list_tr)) { - gfs2_log_unlock(sdp); - return; - } + if (!list_empty(&bd->bd_list_tr)) + goto out; tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); - gfs2_log_unlock(sdp); - if (!list_empty(&le->le_list)) - return; - - gfs2_trans_add_gl(bd->bd_gl); - + goto out; + __glock_lo_add(sdp, &bd->bd_gl->gl_le); gfs2_meta_check(sdp, bd->bd_bh); gfs2_pin(sdp, bd->bd_bh); - gfs2_log_lock(sdp); sdp->sd_log_num_buf++; list_add(&le->le_list, &sdp->sd_log_le_buf); - gfs2_log_unlock(sdp); - tr->tr_num_buf_new++; +out: + gfs2_log_unlock(sdp); + unlock_buffer(bd->bd_bh); } static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) @@ -117,8 +213,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) struct buffer_head *bh; struct gfs2_log_descriptor *ld; struct gfs2_bufdata *bd1 = NULL, *bd2; - unsigned int total = sdp->sd_log_num_buf; - unsigned int offset = BUF_OFFSET; + unsigned int total; unsigned int limit; unsigned int num; unsigned n; @@ -127,22 +222,20 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) limit = buf_limit(sdp); /* for 4k blocks, limit = 503 */ + gfs2_log_lock(sdp); + total = sdp->sd_log_num_buf; bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); while(total) { num = total; if (total > limit) num = limit; - bh = gfs2_log_get_buf(sdp); - ld = (struct gfs2_log_descriptor *)bh->b_data; - ptr = (__be64 *)(bh->b_data + offset); - ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); - ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); - ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); - ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA); + gfs2_log_unlock(sdp); + bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); + gfs2_log_lock(sdp); + ld = bh_log_desc(bh); + ptr = bh_log_ptr(bh); ld->ld_length = cpu_to_be32(num + 1); ld->ld_data1 = cpu_to_be32(num); - ld->ld_data2 = cpu_to_be32(0); - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); n = 0; list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, @@ -152,21 +245,27 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) break; } - set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); + gfs2_log_unlock(sdp); + submit_bh(WRITE, bh); + gfs2_log_lock(sdp); n = 0; list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, bd_le.le_list) { + get_bh(bd2->bd_bh); + gfs2_log_unlock(sdp); + lock_buffer(bd2->bd_bh); bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); + submit_bh(WRITE, bh); + gfs2_log_lock(sdp); if (++n >= num) break; } + BUG_ON(total < num); total -= num; } + gfs2_log_unlock(sdp); } static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) @@ -270,11 +369,8 @@ static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) tr = current->journal_info; tr->tr_touched = 1; tr->tr_num_revoke++; - - gfs2_log_lock(sdp); sdp->sd_log_num_revoke++; list_add(&le->le_list, &sdp->sd_log_le_revoke); - gfs2_log_unlock(sdp); } static void revoke_lo_before_commit(struct gfs2_sbd *sdp) @@ -284,32 +380,25 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) struct buffer_head *bh; unsigned int offset; struct list_head *head = &sdp->sd_log_le_revoke; - struct gfs2_revoke *rv; + struct gfs2_bufdata *bd; if (!sdp->sd_log_num_revoke) return; - bh = gfs2_log_get_buf(sdp); - ld = (struct gfs2_log_descriptor *)bh->b_data; - ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); - ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); - ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); - ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE); + bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); + ld = bh_log_desc(bh); ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, sizeof(u64))); ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); - ld->ld_data2 = cpu_to_be32(0); - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); offset = sizeof(struct gfs2_log_descriptor); while (!list_empty(head)) { - rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); - list_del_init(&rv->rv_le.le_list); + bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); + list_del_init(&bd->bd_le.le_list); sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); + submit_bh(WRITE, bh); bh = gfs2_log_get_buf(sdp); mh = (struct gfs2_meta_header *)bh->b_data; @@ -319,15 +408,14 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) offset = sizeof(struct gfs2_meta_header); } - *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno); - kfree(rv); + *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); + kmem_cache_free(gfs2_bufdata_cachep, bd); offset += sizeof(u64); } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); + submit_bh(WRITE, bh); } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, @@ -466,222 +554,136 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) struct address_space *mapping = bd->bd_bh->b_page->mapping; struct gfs2_inode *ip = GFS2_I(mapping->host); + lock_buffer(bd->bd_bh); gfs2_log_lock(sdp); - if (!list_empty(&bd->bd_list_tr)) { - gfs2_log_unlock(sdp); - return; - } + if (!list_empty(&bd->bd_list_tr)) + goto out; tr->tr_touched = 1; if (gfs2_is_jdata(ip)) { tr->tr_num_buf++; list_add(&bd->bd_list_tr, &tr->tr_list_buf); } - gfs2_log_unlock(sdp); if (!list_empty(&le->le_list)) - return; + goto out; - gfs2_trans_add_gl(bd->bd_gl); + __glock_lo_add(sdp, &bd->bd_gl->gl_le); if (gfs2_is_jdata(ip)) { - sdp->sd_log_num_jdata++; gfs2_pin(sdp, bd->bd_bh); tr->tr_num_databuf_new++; + sdp->sd_log_num_databuf++; + list_add(&le->le_list, &sdp->sd_log_le_databuf); + } else { + list_add(&le->le_list, &sdp->sd_log_le_ordered); } - gfs2_log_lock(sdp); - sdp->sd_log_num_databuf++; - list_add(&le->le_list, &sdp->sd_log_le_databuf); +out: gfs2_log_unlock(sdp); + unlock_buffer(bd->bd_bh); } -static int gfs2_check_magic(struct buffer_head *bh) +static void gfs2_check_magic(struct buffer_head *bh) { - struct page *page = bh->b_page; void *kaddr; __be32 *ptr; - int rv = 0; - kaddr = kmap_atomic(page, KM_USER0); + clear_buffer_escaped(bh); + kaddr = kmap_atomic(bh->b_page, KM_USER0); ptr = kaddr + bh_offset(bh); if (*ptr == cpu_to_be32(GFS2_MAGIC)) - rv = 1; + set_buffer_escaped(bh); kunmap_atomic(kaddr, KM_USER0); - - return rv; } -/** - * databuf_lo_before_commit - Scan the data buffers, writing as we go - * - * Here we scan through the lists of buffers and make the assumption - * that any buffer thats been pinned is being journaled, and that - * any unpinned buffer is an ordered write data buffer and therefore - * will be written back rather than journaled. - */ -static void databuf_lo_before_commit(struct gfs2_sbd *sdp) +static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, + struct list_head *list, struct list_head *done, + unsigned int n) { - LIST_HEAD(started); - struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; - struct buffer_head *bh = NULL,*bh1 = NULL; + struct buffer_head *bh1; struct gfs2_log_descriptor *ld; - unsigned int limit; - unsigned int total_dbuf; - unsigned int total_jdata = sdp->sd_log_num_jdata; - unsigned int num, n; - __be64 *ptr = NULL; + struct gfs2_bufdata *bd; + __be64 *ptr; + + if (!bh) + return; - limit = databuf_limit(sdp); + ld = bh_log_desc(bh); + ld->ld_length = cpu_to_be32(n + 1); + ld->ld_data1 = cpu_to_be32(n); - /* - * Start writing ordered buffers, write journaled buffers - * into the log along with a header - */ + ptr = bh_log_ptr(bh); + + get_bh(bh); + submit_bh(WRITE, bh); gfs2_log_lock(sdp); - total_dbuf = sdp->sd_log_num_databuf; - bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, - bd_le.le_list); - while(total_dbuf) { - num = total_jdata; - if (num > limit) - num = limit; - n = 0; - list_for_each_entry_safe_continue(bd1, bdt, - &sdp->sd_log_le_databuf, - bd_le.le_list) { - /* store off the buffer head in a local ptr since - * gfs2_bufdata might change when we drop the log lock - */ - bh1 = bd1->bd_bh; - - /* An ordered write buffer */ - if (bh1 && !buffer_pinned(bh1)) { - list_move(&bd1->bd_le.le_list, &started); - if (bd1 == bd2) { - bd2 = NULL; - bd2 = list_prepare_entry(bd2, - &sdp->sd_log_le_databuf, - bd_le.le_list); - } - total_dbuf--; - if (bh1) { - if (buffer_dirty(bh1)) { - get_bh(bh1); - - gfs2_log_unlock(sdp); - - ll_rw_block(SWRITE, 1, &bh1); - brelse(bh1); - - gfs2_log_lock(sdp); - } - continue; - } - continue; - } else if (bh1) { /* A journaled buffer */ - int magic; - gfs2_log_unlock(sdp); - if (!bh) { - bh = gfs2_log_get_buf(sdp); - ld = (struct gfs2_log_descriptor *) - bh->b_data; - ptr = (__be64 *)(bh->b_data + - DATABUF_OFFSET); - ld->ld_header.mh_magic = - cpu_to_be32(GFS2_MAGIC); - ld->ld_header.mh_type = - cpu_to_be32(GFS2_METATYPE_LD); - ld->ld_header.mh_format = - cpu_to_be32(GFS2_FORMAT_LD); - ld->ld_type = - cpu_to_be32(GFS2_LOG_DESC_JDATA); - ld->ld_length = cpu_to_be32(num + 1); - ld->ld_data1 = cpu_to_be32(num); - ld->ld_data2 = cpu_to_be32(0); - memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); - } - magic = gfs2_check_magic(bh1); - *ptr++ = cpu_to_be64(bh1->b_blocknr); - *ptr++ = cpu_to_be64((__u64)magic); - clear_buffer_escaped(bh1); - if (unlikely(magic != 0)) - set_buffer_escaped(bh1); - gfs2_log_lock(sdp); - if (++n >= num) - break; - } else if (!bh1) { - total_dbuf--; - sdp->sd_log_num_databuf--; - list_del_init(&bd1->bd_le.le_list); - if (bd1 == bd2) { - bd2 = NULL; - bd2 = list_prepare_entry(bd2, - &sdp->sd_log_le_databuf, - bd_le.le_list); - } - kmem_cache_free(gfs2_bufdata_cachep, bd1); - } + while(!list_empty(list)) { + bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); + list_move_tail(&bd->bd_le.le_list, done); + get_bh(bd->bd_bh); + while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) { + gfs2_log_incr_head(sdp); + ptr += 2; } gfs2_log_unlock(sdp); - if (bh) { - set_buffer_mapped(bh); - set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); - bh = NULL; + lock_buffer(bd->bd_bh); + if (buffer_escaped(bd->bd_bh)) { + void *kaddr; + bh1 = gfs2_log_get_buf(sdp); + kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); + memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), + bh1->b_size); + kunmap_atomic(kaddr, KM_USER0); + *(__be32 *)bh1->b_data = 0; + clear_buffer_escaped(bd->bd_bh); + unlock_buffer(bd->bd_bh); + brelse(bd->bd_bh); + } else { + bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); } - n = 0; + submit_bh(WRITE, bh1); gfs2_log_lock(sdp); - list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, - bd_le.le_list) { - if (!bd2->bd_bh) - continue; - /* copy buffer if it needs escaping */ - gfs2_log_unlock(sdp); - if (unlikely(buffer_escaped(bd2->bd_bh))) { - void *kaddr; - struct page *page = bd2->bd_bh->b_page; - bh = gfs2_log_get_buf(sdp); - kaddr = kmap_atomic(page, KM_USER0); - memcpy(bh->b_data, - kaddr + bh_offset(bd2->bd_bh), - sdp->sd_sb.sb_bsize); - kunmap_atomic(kaddr, KM_USER0); - *(__be32 *)bh->b_data = 0; - } else { - bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - } - set_buffer_dirty(bh); - ll_rw_block(WRITE, 1, &bh); - gfs2_log_lock(sdp); - if (++n >= num) - break; - } - bh = NULL; - BUG_ON(total_dbuf < num); - total_dbuf -= num; - total_jdata -= num; + ptr += 2; } gfs2_log_unlock(sdp); + brelse(bh); +} - /* Wait on all ordered buffers */ - while (!list_empty(&started)) { - gfs2_log_lock(sdp); - bd1 = list_entry(started.next, struct gfs2_bufdata, - bd_le.le_list); - list_del_init(&bd1->bd_le.le_list); - sdp->sd_log_num_databuf--; - bh = bd1->bd_bh; - if (bh) { - bh->b_private = NULL; - get_bh(bh); - gfs2_log_unlock(sdp); - wait_on_buffer(bh); - brelse(bh); - } else - gfs2_log_unlock(sdp); +/** + * databuf_lo_before_commit - Scan the data buffers, writing as we go + * + */ - kmem_cache_free(gfs2_bufdata_cachep, bd1); - } +static void databuf_lo_before_commit(struct gfs2_sbd *sdp) +{ + struct gfs2_bufdata *bd = NULL; + struct buffer_head *bh = NULL; + unsigned int n = 0; + __be64 *ptr = NULL, *end = NULL; + LIST_HEAD(processed); + LIST_HEAD(in_progress); - /* We've removed all the ordered write bufs here, so only jdata left */ - gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); + gfs2_log_lock(sdp); + while (!list_empty(&sdp->sd_log_le_databuf)) { + if (ptr == end) { + gfs2_log_unlock(sdp); + gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); + n = 0; + bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA); + ptr = bh_log_ptr(bh); + end = bh_ptr_end(bh) - 1; + gfs2_log_lock(sdp); + continue; + } + bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list); + list_move_tail(&bd->bd_le.le_list, &in_progress); + gfs2_check_magic(bd->bd_bh); + *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr); + *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0); + n++; + } + gfs2_log_unlock(sdp); + gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); + gfs2_log_lock(sdp); + list_splice(&processed, &sdp->sd_log_le_databuf); + gfs2_log_unlock(sdp); } static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, @@ -765,11 +767,9 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); list_del_init(&bd->bd_le.le_list); sdp->sd_log_num_databuf--; - sdp->sd_log_num_jdata--; gfs2_unpin(sdp, bd->bd_bh, ai); } gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); - gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); } @@ -817,10 +817,10 @@ const struct gfs2_log_operations gfs2_databuf_lops = { const struct gfs2_log_operations *gfs2_log_ops[] = { &gfs2_glock_lops, + &gfs2_databuf_lops, &gfs2_buf_lops, - &gfs2_revoke_lops, &gfs2_rg_lops, - &gfs2_databuf_lops, + &gfs2_revoke_lops, NULL, }; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index d5d4e68b880..79c91fd8381 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -107,6 +107,8 @@ static int __init init_gfs2_fs(void) fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: + gfs2_glock_exit(); + if (gfs2_bufdata_cachep) kmem_cache_destroy(gfs2_bufdata_cachep); @@ -127,6 +129,7 @@ fail: static void __exit exit_gfs2_fs(void) { + gfs2_glock_exit(); gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 8da343b34ae..4da423985e4 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -297,74 +297,35 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, unlock_page(bh->b_page); } -/** - * gfs2_pin - Pin a buffer in memory - * @sdp: the filesystem the buffer belongs to - * @bh: The buffer to be pinned - * - */ - -void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) +void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) { + struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host); struct gfs2_bufdata *bd = bh->b_private; - - gfs2_assert_withdraw(sdp, test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)); - - if (test_set_buffer_pinned(bh)) - gfs2_assert_withdraw(sdp, 0); - - wait_on_buffer(bh); - - /* If this buffer is in the AIL and it has already been written - to in-place disk block, remove it from the AIL. */ - - gfs2_log_lock(sdp); - if (bd->bd_ail && !buffer_in_io(bh)) - list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); - gfs2_log_unlock(sdp); - - clear_buffer_dirty(bh); - wait_on_buffer(bh); - - if (!buffer_uptodate(bh)) - gfs2_io_error_bh(sdp, bh); - - get_bh(bh); -} - -/** - * gfs2_unpin - Unpin a buffer - * @sdp: the filesystem the buffer belongs to - * @bh: The buffer to unpin - * @ai: - * - */ - -void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, - struct gfs2_ail *ai) -{ - struct gfs2_bufdata *bd = bh->b_private; - - gfs2_assert_withdraw(sdp, buffer_uptodate(bh)); - - if (!buffer_pinned(bh)) - gfs2_assert_withdraw(sdp, 0); - - mark_buffer_dirty(bh); - clear_buffer_pinned(bh); - - gfs2_log_lock(sdp); - if (bd->bd_ail) { - list_del(&bd->bd_ail_st_list); + if (test_clear_buffer_pinned(bh)) { + list_del_init(&bd->bd_le.le_list); + if (meta) { + gfs2_assert_warn(sdp, sdp->sd_log_num_buf); + sdp->sd_log_num_buf--; + tr->tr_num_buf_rm++; + } else { + gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); + sdp->sd_log_num_databuf--; + tr->tr_num_databuf_rm++; + } + tr->tr_touched = 1; brelse(bh); - } else { - struct gfs2_glock *gl = bd->bd_gl; - list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); - atomic_inc(&gl->gl_ail_count); } - bd->bd_ail = ai; - list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); - gfs2_log_unlock(sdp); + if (bd) { + if (bd->bd_ail) { + gfs2_remove_from_ail(NULL, bd); + bh->b_private = NULL; + bd->bd_bh = NULL; + bd->bd_blkno = bh->b_blocknr; + gfs2_trans_add_revoke(sdp, bd); + } + } + clear_buffer_dirty(bh); + clear_buffer_uptodate(bh); } /** @@ -383,44 +344,11 @@ void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) while (blen) { bh = getbuf(ip->i_gl, bstart, NO_CREATE); if (bh) { - struct gfs2_bufdata *bd = bh->b_private; - - if (test_clear_buffer_pinned(bh)) { - struct gfs2_trans *tr = current->journal_info; - struct gfs2_inode *bh_ip = - GFS2_I(bh->b_page->mapping->host); - - gfs2_log_lock(sdp); - list_del_init(&bd->bd_le.le_list); - gfs2_assert_warn(sdp, sdp->sd_log_num_buf); - sdp->sd_log_num_buf--; - gfs2_log_unlock(sdp); - if (bh_ip->i_inode.i_private != NULL) - tr->tr_num_databuf_rm++; - else - tr->tr_num_buf_rm++; - brelse(bh); - } - if (bd) { - gfs2_log_lock(sdp); - if (bd->bd_ail) { - u64 blkno = bh->b_blocknr; - bd->bd_ail = NULL; - list_del(&bd->bd_ail_st_list); - list_del(&bd->bd_ail_gl_list); - atomic_dec(&bd->bd_gl->gl_ail_count); - brelse(bh); - gfs2_log_unlock(sdp); - gfs2_trans_add_revoke(sdp, blkno); - } else - gfs2_log_unlock(sdp); - } - lock_buffer(bh); - clear_buffer_dirty(bh); - clear_buffer_uptodate(bh); + gfs2_log_lock(sdp); + gfs2_remove_from_journal(bh, current->journal_info, 1); + gfs2_log_unlock(sdp); unlock_buffer(bh); - brelse(bh); } @@ -446,10 +374,10 @@ void gfs2_meta_cache_flush(struct gfs2_inode *ip) for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) { bh_slot = &ip->i_cache[x]; - if (!*bh_slot) - break; - brelse(*bh_slot); - *bh_slot = NULL; + if (*bh_slot) { + brelse(*bh_slot); + *bh_slot = NULL; + } } spin_unlock(&ip->i_spin); diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 527bf19d969..b7048222ebb 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -50,9 +50,9 @@ int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, int meta); -void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh); -void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, - struct gfs2_ail *ai); + +void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, + int meta); void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index 4864659555d..b941f9f9f95 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -42,6 +42,7 @@ enum { Opt_nosuiddir, Opt_data_writeback, Opt_data_ordered, + Opt_err, }; static match_table_t tokens = { @@ -64,7 +65,8 @@ static match_table_t tokens = { {Opt_suiddir, "suiddir"}, {Opt_nosuiddir, "nosuiddir"}, {Opt_data_writeback, "data=writeback"}, - {Opt_data_ordered, "data=ordered"} + {Opt_data_ordered, "data=ordered"}, + {Opt_err, NULL} }; /** @@ -237,6 +239,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) case Opt_data_ordered: args->ar_data = GFS2_DATA_ORDERED; break; + case Opt_err: default: fs_info(sdp, "unknown option: %s\n", o); error = -EINVAL; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index 42a5f58f6fc..873a511ef2b 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -90,7 +90,7 @@ static int gfs2_get_block_noalloc(struct inode *inode, sector_t lblock, error = gfs2_block_map(inode, lblock, 0, bh_result); if (error) return error; - if (bh_result->b_blocknr == 0) + if (!buffer_mapped(bh_result)) return -EIO; return 0; } @@ -414,7 +414,8 @@ static int gfs2_prepare_write(struct file *file, struct page *page, if (ind_blocks || data_blocks) rblocks += RES_STATFS + RES_QUOTA; - error = gfs2_trans_begin(sdp, rblocks, 0); + error = gfs2_trans_begin(sdp, rblocks, + PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); if (error) goto out_trans_fail; @@ -616,58 +617,50 @@ static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock) return dblock; } -static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh) +static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) { struct gfs2_bufdata *bd; + lock_buffer(bh); gfs2_log_lock(sdp); + clear_buffer_dirty(bh); bd = bh->b_private; if (bd) { - bd->bd_bh = NULL; - bh->b_private = NULL; - if (!bd->bd_ail && list_empty(&bd->bd_le.le_list)) - kmem_cache_free(gfs2_bufdata_cachep, bd); + if (!list_empty(&bd->bd_le.le_list) && !buffer_pinned(bh)) + list_del_init(&bd->bd_le.le_list); + else + gfs2_remove_from_journal(bh, current->journal_info, 0); } - gfs2_log_unlock(sdp); - - lock_buffer(bh); - clear_buffer_dirty(bh); bh->b_bdev = NULL; clear_buffer_mapped(bh); clear_buffer_req(bh); clear_buffer_new(bh); - clear_buffer_delay(bh); + gfs2_log_unlock(sdp); unlock_buffer(bh); } static void gfs2_invalidatepage(struct page *page, unsigned long offset) { struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); - struct buffer_head *head, *bh, *next; - unsigned int curr_off = 0; + struct buffer_head *bh, *head; + unsigned long pos = 0; BUG_ON(!PageLocked(page)); if (offset == 0) ClearPageChecked(page); if (!page_has_buffers(page)) - return; + goto out; bh = head = page_buffers(page); do { - unsigned int next_off = curr_off + bh->b_size; - next = bh->b_this_page; - - if (offset <= curr_off) - discard_buffer(sdp, bh); - - curr_off = next_off; - bh = next; + if (offset <= pos) + gfs2_discard(sdp, bh); + pos += bh->b_size; + bh = bh->b_this_page; } while (bh != head); - - if (!offset) +out: + if (offset == 0) try_to_release_page(page, 0); - - return; } /** @@ -736,59 +729,6 @@ out: } /** - * stuck_releasepage - We're stuck in gfs2_releasepage(). Print stuff out. - * @bh: the buffer we're stuck on - * - */ - -static void stuck_releasepage(struct buffer_head *bh) -{ - struct inode *inode = bh->b_page->mapping->host; - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; - struct gfs2_bufdata *bd = bh->b_private; - struct gfs2_glock *gl; -static unsigned limit = 0; - - if (limit > 3) - return; - limit++; - - fs_warn(sdp, "stuck in gfs2_releasepage() %p\n", inode); - fs_warn(sdp, "blkno = %llu, bh->b_count = %d\n", - (unsigned long long)bh->b_blocknr, atomic_read(&bh->b_count)); - fs_warn(sdp, "pinned = %u\n", buffer_pinned(bh)); - fs_warn(sdp, "bh->b_private = %s\n", (bd) ? "!NULL" : "NULL"); - - if (!bd) - return; - - gl = bd->bd_gl; - - fs_warn(sdp, "gl = (%u, %llu)\n", - gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number); - - fs_warn(sdp, "bd_list_tr = %s, bd_le.le_list = %s\n", - (list_empty(&bd->bd_list_tr)) ? "no" : "yes", - (list_empty(&bd->bd_le.le_list)) ? "no" : "yes"); - - if (gl->gl_ops == &gfs2_inode_glops) { - struct gfs2_inode *ip = gl->gl_object; - unsigned int x; - - if (!ip) - return; - - fs_warn(sdp, "ip = %llu %llu\n", - (unsigned long long)ip->i_no_formal_ino, - (unsigned long long)ip->i_no_addr); - - for (x = 0; x < GFS2_MAX_META_HEIGHT; x++) - fs_warn(sdp, "ip->i_cache[%u] = %s\n", - x, (ip->i_cache[x]) ? "!NULL" : "NULL"); - } -} - -/** * gfs2_releasepage - free the metadata associated with a page * @page: the page that's being released * @gfp_mask: passed from Linux VFS, ignored by us @@ -805,41 +745,39 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) struct gfs2_sbd *sdp = aspace->i_sb->s_fs_info; struct buffer_head *bh, *head; struct gfs2_bufdata *bd; - unsigned long t = jiffies + gfs2_tune_get(sdp, gt_stall_secs) * HZ; if (!page_has_buffers(page)) - goto out; + return 0; + gfs2_log_lock(sdp); head = bh = page_buffers(page); do { - while (atomic_read(&bh->b_count)) { - if (!atomic_read(&aspace->i_writecount)) - return 0; - - if (!(gfp_mask & __GFP_WAIT)) - return 0; - - if (time_after_eq(jiffies, t)) { - stuck_releasepage(bh); - /* should we withdraw here? */ - return 0; - } - - yield(); - } - + if (atomic_read(&bh->b_count)) + goto cannot_release; + bd = bh->b_private; + if (bd && bd->bd_ail) + goto cannot_release; gfs2_assert_warn(sdp, !buffer_pinned(bh)); gfs2_assert_warn(sdp, !buffer_dirty(bh)); + bh = bh->b_this_page; + } while(bh != head); + gfs2_log_unlock(sdp); + head = bh = page_buffers(page); + do { gfs2_log_lock(sdp); bd = bh->b_private; if (bd) { gfs2_assert_warn(sdp, bd->bd_bh == bh); gfs2_assert_warn(sdp, list_empty(&bd->bd_list_tr)); - gfs2_assert_warn(sdp, !bd->bd_ail); - bd->bd_bh = NULL; - if (!list_empty(&bd->bd_le.le_list)) - bd = NULL; + if (!list_empty(&bd->bd_le.le_list)) { + if (!buffer_pinned(bh)) + list_del_init(&bd->bd_le.le_list); + else + bd = NULL; + } + if (bd) + bd->bd_bh = NULL; bh->b_private = NULL; } gfs2_log_unlock(sdp); @@ -849,8 +787,10 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask) bh = bh->b_this_page; } while (bh != head); -out: return try_to_free_buffers(page); +cannot_release: + gfs2_log_unlock(sdp); + return 0; } const struct address_space_operations gfs2_file_aops = { diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index b8312edee0e..e2d1347796a 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -237,7 +237,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj) inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, - 0); + 0, 0); if (!inode) goto fail; if (IS_ERR(inode)) { diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 94d76ace0b9..46a9e10ff17 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -571,7 +571,8 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; - flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; + flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE + | GL_FLOCK; mutex_lock(&fp->f_fl_mutex); @@ -579,21 +580,19 @@ static int do_flock(struct file *file, int cmd, struct file_lock *fl) if (gl) { if (fl_gh->gh_state == state) goto out; - gfs2_glock_hold(gl); flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); - gfs2_glock_dq_uninit(fl_gh); + gfs2_glock_dq_wait(fl_gh); + gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, &gfs2_flock_glops, CREATE, &gl); if (error) goto out; + gfs2_holder_init(gl, state, flags, fl_gh); + gfs2_glock_put(gl); } - - gfs2_holder_init(gl, state, flags, fl_gh); - gfs2_glock_put(gl); - error = gfs2_glock_nq(fl_gh); if (error) { gfs2_holder_uninit(fl_gh); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index cf5aa505054..17de58e83d9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -28,18 +28,18 @@ #include "lm.h" #include "mount.h" #include "ops_fstype.h" +#include "ops_dentry.h" #include "ops_super.h" #include "recovery.h" #include "rgrp.h" #include "super.h" #include "sys.h" #include "util.h" +#include "log.h" #define DO 0 #define UNDO 1 -extern struct dentry_operations gfs2_dops; - static struct gfs2_sbd *init_sbd(struct super_block *sb) { struct gfs2_sbd *sdp; @@ -82,13 +82,15 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) INIT_LIST_HEAD(&sdp->sd_log_le_revoke); INIT_LIST_HEAD(&sdp->sd_log_le_rg); INIT_LIST_HEAD(&sdp->sd_log_le_databuf); + INIT_LIST_HEAD(&sdp->sd_log_le_ordered); mutex_init(&sdp->sd_log_reserve_mutex); INIT_LIST_HEAD(&sdp->sd_ail1_list); INIT_LIST_HEAD(&sdp->sd_ail2_list); init_rwsem(&sdp->sd_log_flush_lock); - INIT_LIST_HEAD(&sdp->sd_log_flush_list); + atomic_set(&sdp->sd_log_in_flight, 0); + init_waitqueue_head(&sdp->sd_log_flush_wait); INIT_LIST_HEAD(&sdp->sd_revoke_list); @@ -145,7 +147,8 @@ static int init_names(struct gfs2_sbd *sdp, int silent) snprintf(sdp->sd_proto_name, GFS2_FSNAME_LEN, "%s", proto); snprintf(sdp->sd_table_name, GFS2_FSNAME_LEN, "%s", table); - while ((table = strchr(sdp->sd_table_name, '/'))) + table = sdp->sd_table_name; + while ((table = strchr(table, '/'))) *table = '_'; out: @@ -161,14 +164,6 @@ static int init_locking(struct gfs2_sbd *sdp, struct gfs2_holder *mount_gh, if (undo) goto fail_trans; - p = kthread_run(gfs2_scand, sdp, "gfs2_scand"); - error = IS_ERR(p); - if (error) { - fs_err(sdp, "can't start scand thread: %d\n", error); - return error; - } - sdp->sd_scand_process = p; - for (sdp->sd_glockd_num = 0; sdp->sd_glockd_num < sdp->sd_args.ar_num_glockd; sdp->sd_glockd_num++) { @@ -229,14 +224,13 @@ fail: while (sdp->sd_glockd_num--) kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); - kthread_stop(sdp->sd_scand_process); return error; } static inline struct inode *gfs2_lookup_root(struct super_block *sb, u64 no_addr) { - return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); + return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); } static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) @@ -301,8 +295,9 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) fs_err(sdp, "can't get root dentry\n"); error = -ENOMEM; iput(inode); - } - sb->s_root->d_op = &gfs2_dops; + } else + sb->s_root->d_op = &gfs2_dops; + out: gfs2_glock_dq_uninit(&sb_gh); return error; @@ -368,7 +363,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) ip = GFS2_I(sdp->sd_jdesc->jd_inode); error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, + LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE, &sdp->sd_jinode_gh); if (error) { fs_err(sdp, "can't acquire journal inode glock: %d\n", @@ -818,7 +813,6 @@ static struct super_block* get_gfs2_sb(const char *dev_name) struct nameidata nd; struct file_system_type *fstype; struct super_block *sb = NULL, *s; - struct list_head *l; int error; error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); @@ -830,8 +824,7 @@ static struct super_block* get_gfs2_sb(const char *dev_name) error = vfs_getattr(nd.mnt, nd.dentry, &stat); fstype = get_fs_type("gfs2"); - list_for_each(l, &fstype->fs_supers) { - s = list_entry(l, struct super_block, s_instances); + list_for_each_entry(s, &fstype->fs_supers, s_instances) { if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || (S_ISDIR(stat.mode) && s == nd.dentry->d_inode->i_sb)) { sb = s; @@ -861,7 +854,7 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, error = -ENOENT; goto error; } - sdp = (struct gfs2_sbd*) sb->s_fs_info; + sdp = sb->s_fs_info; if (sdp->sd_vfs_meta) { printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); error = -EBUSY; @@ -896,7 +889,10 @@ error: static void gfs2_kill_sb(struct super_block *sb) { - gfs2_delete_debugfs_file(sb->s_fs_info); + if (sb->s_fs_info) { + gfs2_delete_debugfs_file(sb->s_fs_info); + gfs2_meta_syncfs(sb->s_fs_info); + } kill_block_super(sb); } diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 911c115b5c6..291f0c7eaa3 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); break; } else if (PTR_ERR(inode) != -EEXIST || - (nd->intent.open.flags & O_EXCL)) { + (nd && (nd->intent.open.flags & O_EXCL))) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); } @@ -278,17 +278,25 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); - error = gfs2_glock_nq_m(3, ghs); + error = gfs2_glock_nq(ghs); /* parent */ if (error) - goto out; + goto out_parent; + + error = gfs2_glock_nq(ghs + 1); /* child */ + if (error) + goto out_child; + + error = gfs2_glock_nq(ghs + 2); /* rgrp */ + if (error) + goto out_rgrp; error = gfs2_unlink_ok(dip, &dentry->d_name, ip); if (error) - goto out_gunlock; + goto out_rgrp; error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); if (error) - goto out_gunlock; + goto out_rgrp; error = gfs2_dir_del(dip, &dentry->d_name); if (error) @@ -298,12 +306,15 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) out_end_trans: gfs2_trans_end(sdp); -out_gunlock: - gfs2_glock_dq_m(3, ghs); -out: - gfs2_holder_uninit(ghs); - gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs + 2); +out_rgrp: gfs2_holder_uninit(ghs + 2); + gfs2_glock_dq(ghs + 1); +out_child: + gfs2_holder_uninit(ghs + 1); + gfs2_glock_dq(ghs); +out_parent: + gfs2_holder_uninit(ghs); gfs2_glock_dq_uninit(&ri_gh); return error; } @@ -894,12 +905,17 @@ static int gfs2_permission(struct inode *inode, int mask, struct nameidata *nd) static int setattr_size(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); int error; if (attr->ia_size != ip->i_di.di_size) { - error = vmtruncate(inode, attr->ia_size); + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); if (error) return error; + error = vmtruncate(inode, attr->ia_size); + gfs2_trans_end(sdp); + if (error) + return error; } error = gfs2_truncatei(ip, attr->ia_size); diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 603d940f115..950f31460e8 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -92,7 +92,6 @@ static void gfs2_put_super(struct super_block *sb) kthread_stop(sdp->sd_recoverd_process); while (sdp->sd_glockd_num--) kthread_stop(sdp->sd_glockd_process[sdp->sd_glockd_num]); - kthread_stop(sdp->sd_scand_process); if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_ro(sdp); @@ -456,12 +455,15 @@ static void gfs2_delete_inode(struct inode *inode) } error = gfs2_dinode_dealloc(ip); - /* - * Must do this before unlock to avoid trying to write back - * potentially dirty data now that inode no longer exists - * on disk. - */ + if (error) + goto out_unlock; + + error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); + if (error) + goto out_unlock; + /* Needs to be done before glock release & also in a transaction */ truncate_inode_pages(&inode->i_data, 0); + gfs2_trans_end(sdp); out_unlock: gfs2_glock_dq(&ip->i_iopen_gh); diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 6e546ee8f3d..addb51e0f13 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -70,6 +70,7 @@ struct gfs2_quota_host { u64 qu_limit; u64 qu_warn; s64 qu_value; + u32 qu_ll_next; }; struct gfs2_quota_change_host { @@ -580,6 +581,7 @@ static void gfs2_quota_in(struct gfs2_quota_host *qu, const void *buf) qu->qu_limit = be64_to_cpu(str->qu_limit); qu->qu_warn = be64_to_cpu(str->qu_warn); qu->qu_value = be64_to_cpu(str->qu_value); + qu->qu_ll_next = be32_to_cpu(str->qu_ll_next); } static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) @@ -589,6 +591,7 @@ static void gfs2_quota_out(const struct gfs2_quota_host *qu, void *buf) str->qu_limit = cpu_to_be64(qu->qu_limit); str->qu_warn = cpu_to_be64(qu->qu_warn); str->qu_value = cpu_to_be64(qu->qu_value); + str->qu_ll_next = cpu_to_be32(qu->qu_ll_next); memset(&str->qu_reserved, 0, sizeof(str->qu_reserved)); } @@ -614,6 +617,16 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc, s64 value; int err = -EIO; + if (gfs2_is_stuffed(ip)) { + struct gfs2_alloc *al = NULL; + al = gfs2_alloc_get(ip); + /* just request 1 blk */ + al->al_requested = 1; + gfs2_inplace_reserve(ip); + gfs2_unstuff_dinode(ip, NULL); + gfs2_inplace_release(ip); + gfs2_alloc_put(ip); + } page = grab_cache_page(mapping, index); if (!page) return -ENOMEM; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 5ada38c99a2..beb6c7ac008 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -469,7 +469,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) }; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, - LM_FLAG_NOEXP, &ji_gh); + LM_FLAG_NOEXP | GL_NOCACHE, &ji_gh); if (error) goto fail_gunlock_j; } else { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index ce48c4594ec..708c287e1d0 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -31,6 +31,7 @@ #include "inode.h" #define BFITNOENT ((u32)~0) +#define NO_BLOCK ((u64)~0) /* * These routines are used by the resource group routines (rgrp.c) @@ -116,8 +117,7 @@ static unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, unsigned char *buffer, * @buffer: the buffer that holds the bitmaps * @buflen: the length (in bytes) of the buffer * @goal: start search at this block's bit-pair (within @buffer) - * @old_state: GFS2_BLKST_XXX the state of the block we're looking for; - * bit 0 = alloc(1)/free(0), bit 1 = meta(1)/data(0) + * @old_state: GFS2_BLKST_XXX the state of the block we're looking for. * * Scope of @goal and returned block number is only within this bitmap buffer, * not entire rgrp or filesystem. @buffer will be offset from the actual @@ -137,9 +137,13 @@ static u32 gfs2_bitfit(struct gfs2_rgrpd *rgd, unsigned char *buffer, byte = buffer + (goal / GFS2_NBBY); bit = (goal % GFS2_NBBY) * GFS2_BIT_SIZE; end = buffer + buflen; - alloc = (old_state & 1) ? 0 : 0x55; + alloc = (old_state == GFS2_BLKST_FREE) ? 0x55 : 0; while (byte < end) { + /* If we're looking for a free block we can eliminate all + bitmap settings with 0x55, which represents four data + blocks in a row. If we're looking for a data block, we can + eliminate 0x00 which corresponds to four free blocks. */ if ((*byte & 0x55) == alloc) { blk += (8 - bit) >> 1; @@ -859,23 +863,28 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) static struct inode *try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked) { struct inode *inode; - u32 goal = 0; + u32 goal = 0, block; u64 no_addr; + struct gfs2_sbd *sdp = rgd->rd_sbd; for(;;) { if (goal >= rgd->rd_data) break; - goal = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, - GFS2_BLKST_UNLINKED); - if (goal == BFITNOENT) + down_write(&sdp->sd_log_flush_lock); + block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, + GFS2_BLKST_UNLINKED); + up_write(&sdp->sd_log_flush_lock); + if (block == BFITNOENT) break; - no_addr = goal + rgd->rd_data0; + /* rgblk_search can return a block < goal, so we need to + keep it marching forward. */ + no_addr = block + rgd->rd_data0; goal++; - if (no_addr < *last_unlinked) + if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) continue; *last_unlinked = no_addr; inode = gfs2_inode_lookup(rgd->rd_sbd->sd_vfs, DT_UNKNOWN, - no_addr, -1); + no_addr, -1, 1); if (!IS_ERR(inode)) return inode; } @@ -1152,7 +1161,7 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) struct gfs2_alloc *al = &ip->i_alloc; struct inode *inode; int error = 0; - u64 last_unlinked = 0; + u64 last_unlinked = NO_BLOCK; if (gfs2_assert_warn(sdp, al->al_requested)) return -EINVAL; @@ -1289,7 +1298,9 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, allocatable block anywhere else, we want to be able wrap around and search in the first part of our first-searched bit block. */ for (x = 0; x <= length; x++) { - if (bi->bi_clone) + /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone + bitmaps, so we must search the originals for that. */ + if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) blk = gfs2_bitfit(rgd, bi->bi_clone + bi->bi_offset, bi->bi_len, goal, old_state); else @@ -1305,9 +1316,7 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, goal = 0; } - if (old_state != new_state) { - gfs2_assert_withdraw(rgd->rd_sbd, blk != BFITNOENT); - + if (blk != BFITNOENT && old_state != new_state) { gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); gfs2_setbit(rgd, bi->bi_bh->b_data + bi->bi_offset, bi->bi_len, blk, new_state); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index a2da76b5ae4..dd3e737f528 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -58,7 +58,6 @@ void gfs2_tune_init(struct gfs2_tune *gt) gt->gt_incore_log_blocks = 1024; gt->gt_log_flush_secs = 60; gt->gt_jindex_refresh_secs = 60; - gt->gt_scand_secs = 15; gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quotad_secs = 5; diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index c26c21b53c1..ba3a1729cc1 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -442,7 +442,6 @@ TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(quota_cache_secs, 1); TUNE_ATTR(stall_secs, 1); TUNE_ATTR(statfs_quantum, 1); -TUNE_ATTR_DAEMON(scand_secs, scand_process); TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); TUNE_ATTR_DAEMON(logd_secs, logd_process); TUNE_ATTR_DAEMON(quotad_secs, quotad_process); @@ -464,7 +463,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_quota_cache_secs.attr, &tune_attr_stall_secs.attr, &tune_attr_statfs_quantum.attr, - &tune_attr_scand_secs.attr, &tune_attr_recoverd_secs.attr, &tune_attr_logd_secs.attr, &tune_attr_quotad_secs.attr, diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index f8dabf8446b..717983e2c2a 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -142,25 +142,25 @@ void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta) lops_add(sdp, &bd->bd_le); } -void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno) +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) { - struct gfs2_revoke *rv = kmalloc(sizeof(struct gfs2_revoke), - GFP_NOFS | __GFP_NOFAIL); - lops_init_le(&rv->rv_le, &gfs2_revoke_lops); - rv->rv_blkno = blkno; - lops_add(sdp, &rv->rv_le); + BUG_ON(!list_empty(&bd->bd_le.le_list)); + BUG_ON(!list_empty(&bd->bd_ail_st_list)); + BUG_ON(!list_empty(&bd->bd_ail_gl_list)); + lops_init_le(&bd->bd_le, &gfs2_revoke_lops); + lops_add(sdp, &bd->bd_le); } void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) { - struct gfs2_revoke *rv; + struct gfs2_bufdata *bd; int found = 0; gfs2_log_lock(sdp); - list_for_each_entry(rv, &sdp->sd_log_le_revoke, rv_le.le_list) { - if (rv->rv_blkno == blkno) { - list_del(&rv->rv_le.le_list); + list_for_each_entry(bd, &sdp->sd_log_le_revoke, bd_le.le_list) { + if (bd->bd_blkno == blkno) { + list_del_init(&bd->bd_le.le_list); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); sdp->sd_log_num_revoke--; found = 1; @@ -172,7 +172,7 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno) if (found) { struct gfs2_trans *tr = current->journal_info; - kfree(rv); + kmem_cache_free(gfs2_bufdata_cachep, bd); tr->tr_num_revoke_rm++; } } diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index 23d4cbe1de5..043d5f4b9c4 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h @@ -32,7 +32,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp); void gfs2_trans_add_gl(struct gfs2_glock *gl); void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta); -void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, u64 blkno); +void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno); void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd); diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index af4ef808fa9..345798ebd36 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -17,6 +17,18 @@ ToDo/Notes: happen is unclear however so it is worth waiting until someone hits the problem. +2.1.29 - Fix a deadlock at mount time. + + - During mount the VFS holds s_umount lock on the superblock. So when + we try to empty the journal $LogFile contents by calling + ntfs_attr_set() when the machine does not have much memory and the + journal is large ntfs_attr_set() results in the VM trying to balance + dirty pages which in turn tries to that the s_umount lock and thus we + get a deadlock. The solution is to not use ntfs_attr_set() and + instead do the zeroing by hand at the block level rather than page + cache level. + - Fix sparse warnings. + 2.1.28 - Fix a deadlock. - Fix deadlock in fs/ntfs/inode.c::ntfs_put_inode(). Thanks to Sergey diff --git a/fs/ntfs/Makefile b/fs/ntfs/Makefile index 82550838556..58b6be99254 100644 --- a/fs/ntfs/Makefile +++ b/fs/ntfs/Makefile @@ -6,7 +6,7 @@ ntfs-objs := aops.o attrib.o collate.o compress.o debug.o dir.o file.o \ index.o inode.o mft.o mst.o namei.o runlist.o super.o sysctl.o \ unistr.o upcase.o -EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.28\" +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.29\" ifeq ($(CONFIG_NTFS_DEBUG),y) EXTRA_CFLAGS += -DDEBUG diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 6e5c2534f4b..cfdc7900d27 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -2,7 +2,7 @@ * aops.c - NTFS kernel address space operations and page cache handling. * Part of the Linux-NTFS project. * - * Copyright (c) 2001-2006 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -396,7 +396,7 @@ static int ntfs_readpage(struct file *file, struct page *page) loff_t i_size; struct inode *vi; ntfs_inode *ni, *base_ni; - u8 *kaddr; + u8 *addr; ntfs_attr_search_ctx *ctx; MFT_RECORD *mrec; unsigned long flags; @@ -491,15 +491,15 @@ retry_readpage: /* Race with shrinking truncate. */ attr_len = i_size; } - kaddr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page, KM_USER0); /* Copy the data to the page. */ - memcpy(kaddr, (u8*)ctx->attr + + memcpy(addr, (u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), attr_len); /* Zero the remainder of the page. */ - memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); + memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); flush_dcache_page(page); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(addr, KM_USER0); put_unm_err_out: ntfs_attr_put_search_ctx(ctx); unm_err_out: @@ -1344,7 +1344,7 @@ static int ntfs_writepage(struct page *page, struct writeback_control *wbc) loff_t i_size; struct inode *vi = page->mapping->host; ntfs_inode *base_ni = NULL, *ni = NTFS_I(vi); - char *kaddr; + char *addr; ntfs_attr_search_ctx *ctx = NULL; MFT_RECORD *m = NULL; u32 attr_len; @@ -1484,14 +1484,14 @@ retry_writepage: /* Shrinking cannot fail. */ BUG_ON(err); } - kaddr = kmap_atomic(page, KM_USER0); + addr = kmap_atomic(page, KM_USER0); /* Copy the data from the page to the mft record. */ memcpy((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), - kaddr, attr_len); + addr, attr_len); /* Zero out of bounds area in the page cache page. */ - memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); - kunmap_atomic(kaddr, KM_USER0); + memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); + kunmap_atomic(addr, KM_USER0); flush_dcache_page(page); flush_dcache_mft_record_page(ctx->ntfs_ino); /* We are done with the page. */ diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c index 1c08fefe487..92dabdcf2b8 100644 --- a/fs/ntfs/attrib.c +++ b/fs/ntfs/attrib.c @@ -1,7 +1,7 @@ /** * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2006 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -2500,7 +2500,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) struct page *page; u8 *kaddr; pgoff_t idx, end; - unsigned int start_ofs, end_ofs, size; + unsigned start_ofs, end_ofs, size; ntfs_debug("Entering for ofs 0x%llx, cnt 0x%llx, val 0x%hx.", (long long)ofs, (long long)cnt, val); @@ -2548,6 +2548,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) kunmap_atomic(kaddr, KM_USER0); set_page_dirty(page); page_cache_release(page); + balance_dirty_pages_ratelimited(mapping); + cond_resched(); if (idx == end) goto done; idx++; @@ -2604,6 +2606,8 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val) kunmap_atomic(kaddr, KM_USER0); set_page_dirty(page); page_cache_release(page); + balance_dirty_pages_ratelimited(mapping); + cond_resched(); } done: ntfs_debug("Done."); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index ffcc504a166..c814204d4ea 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -1,7 +1,7 @@ /* * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2006 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -26,7 +26,6 @@ #include <linux/swap.h> #include <linux/uio.h> #include <linux/writeback.h> -#include <linux/sched.h> #include <asm/page.h> #include <asm/uaccess.h> @@ -362,7 +361,7 @@ static inline void ntfs_fault_in_pages_readable(const char __user *uaddr, volatile char c; /* Set @end to the first byte outside the last page we care about. */ - end = (const char __user*)PAGE_ALIGN((ptrdiff_t __user)uaddr + bytes); + end = (const char __user*)PAGE_ALIGN((unsigned long)uaddr + bytes); while (!__get_user(c, uaddr) && (uaddr += PAGE_SIZE, uaddr < end)) ; @@ -532,7 +531,8 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages, blocksize_bits = vol->sb->s_blocksize_bits; u = 0; do { - struct page *page = pages[u]; + page = pages[u]; + BUG_ON(!page); /* * create_empty_buffers() will create uptodate/dirty buffers if * the page is uptodate/dirty. @@ -1291,7 +1291,7 @@ static inline size_t ntfs_copy_from_user(struct page **pages, size_t bytes) { struct page **last_page = pages + nr_pages; - char *kaddr; + char *addr; size_t total = 0; unsigned len; int left; @@ -1300,13 +1300,13 @@ static inline size_t ntfs_copy_from_user(struct page **pages, len = PAGE_CACHE_SIZE - ofs; if (len > bytes) len = bytes; - kaddr = kmap_atomic(*pages, KM_USER0); - left = __copy_from_user_inatomic(kaddr + ofs, buf, len); - kunmap_atomic(kaddr, KM_USER0); + addr = kmap_atomic(*pages, KM_USER0); + left = __copy_from_user_inatomic(addr + ofs, buf, len); + kunmap_atomic(addr, KM_USER0); if (unlikely(left)) { /* Do it the slow way. */ - kaddr = kmap(*pages); - left = __copy_from_user(kaddr + ofs, buf, len); + addr = kmap(*pages); + left = __copy_from_user(addr + ofs, buf, len); kunmap(*pages); if (unlikely(left)) goto err_out; @@ -1408,26 +1408,26 @@ static inline size_t ntfs_copy_from_user_iovec(struct page **pages, size_t *iov_ofs, size_t bytes) { struct page **last_page = pages + nr_pages; - char *kaddr; + char *addr; size_t copied, len, total = 0; do { len = PAGE_CACHE_SIZE - ofs; if (len > bytes) len = bytes; - kaddr = kmap_atomic(*pages, KM_USER0); - copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs, + addr = kmap_atomic(*pages, KM_USER0); + copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, *iov, *iov_ofs, len); - kunmap_atomic(kaddr, KM_USER0); + kunmap_atomic(addr, KM_USER0); if (unlikely(copied != len)) { /* Do it the slow way. */ - kaddr = kmap(*pages); - copied = __ntfs_copy_from_user_iovec_inatomic(kaddr + ofs, + addr = kmap(*pages); + copied = __ntfs_copy_from_user_iovec_inatomic(addr + ofs, *iov, *iov_ofs, len); /* * Zero the rest of the target like __copy_from_user(). */ - memset(kaddr + ofs + copied, 0, len - copied); + memset(addr + ofs + copied, 0, len - copied); kunmap(*pages); if (unlikely(copied != len)) goto err_out; @@ -1735,8 +1735,6 @@ static int ntfs_commit_pages_after_write(struct page **pages, read_unlock_irqrestore(&ni->size_lock, flags); BUG_ON(initialized_size != i_size); if (end > initialized_size) { - unsigned long flags; - write_lock_irqsave(&ni->size_lock, flags); ni->initialized_size = end; i_size_write(vi, end); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index b532a730cec..e9da092e277 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -34,7 +34,6 @@ #include "dir.h" #include "debug.h" #include "inode.h" -#include "attrib.h" #include "lcnalloc.h" #include "malloc.h" #include "mft.h" @@ -2500,8 +2499,6 @@ retry_truncate: /* Resize the attribute record to best fit the new attribute size. */ if (new_size < vol->mft_record_size && !ntfs_resident_attr_value_resize(m, a, new_size)) { - unsigned long flags; - /* The resize succeeded! */ flush_dcache_mft_record_page(ctx->ntfs_ino); mark_mft_record_dirty(ctx->ntfs_ino); diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index acfed325f4e..d7932e95b1f 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -1,7 +1,7 @@ /* * logfile.c - NTFS kernel journal handling. Part of the Linux-NTFS project. * - * Copyright (c) 2002-2005 Anton Altaparmakov + * Copyright (c) 2002-2007 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -724,24 +724,139 @@ bool ntfs_is_logfile_clean(struct inode *log_vi, const RESTART_PAGE_HEADER *rp) */ bool ntfs_empty_logfile(struct inode *log_vi) { - ntfs_volume *vol = NTFS_SB(log_vi->i_sb); + VCN vcn, end_vcn; + ntfs_inode *log_ni = NTFS_I(log_vi); + ntfs_volume *vol = log_ni->vol; + struct super_block *sb = vol->sb; + runlist_element *rl; + unsigned long flags; + unsigned block_size, block_size_bits; + int err; + bool should_wait = true; ntfs_debug("Entering."); - if (!NVolLogFileEmpty(vol)) { - int err; - - err = ntfs_attr_set(NTFS_I(log_vi), 0, i_size_read(log_vi), - 0xff); - if (unlikely(err)) { - ntfs_error(vol->sb, "Failed to fill $LogFile with " - "0xff bytes (error code %i).", err); - return false; - } - /* Set the flag so we do not have to do it again on remount. */ - NVolSetLogFileEmpty(vol); + if (NVolLogFileEmpty(vol)) { + ntfs_debug("Done."); + return true; } + /* + * We cannot use ntfs_attr_set() because we may be still in the middle + * of a mount operation. Thus we do the emptying by hand by first + * zapping the page cache pages for the $LogFile/$DATA attribute and + * then emptying each of the buffers in each of the clusters specified + * by the runlist by hand. + */ + block_size = sb->s_blocksize; + block_size_bits = sb->s_blocksize_bits; + vcn = 0; + read_lock_irqsave(&log_ni->size_lock, flags); + end_vcn = (log_ni->initialized_size + vol->cluster_size_mask) >> + vol->cluster_size_bits; + read_unlock_irqrestore(&log_ni->size_lock, flags); + truncate_inode_pages(log_vi->i_mapping, 0); + down_write(&log_ni->runlist.lock); + rl = log_ni->runlist.rl; + if (unlikely(!rl || vcn < rl->vcn || !rl->length)) { +map_vcn: + err = ntfs_map_runlist_nolock(log_ni, vcn, NULL); + if (err) { + ntfs_error(sb, "Failed to map runlist fragment (error " + "%d).", -err); + goto err; + } + rl = log_ni->runlist.rl; + BUG_ON(!rl || vcn < rl->vcn || !rl->length); + } + /* Seek to the runlist element containing @vcn. */ + while (rl->length && vcn >= rl[1].vcn) + rl++; + do { + LCN lcn; + sector_t block, end_block; + s64 len; + + /* + * If this run is not mapped map it now and start again as the + * runlist will have been updated. + */ + lcn = rl->lcn; + if (unlikely(lcn == LCN_RL_NOT_MAPPED)) { + vcn = rl->vcn; + goto map_vcn; + } + /* If this run is not valid abort with an error. */ + if (unlikely(!rl->length || lcn < LCN_HOLE)) + goto rl_err; + /* Skip holes. */ + if (lcn == LCN_HOLE) + continue; + block = lcn << vol->cluster_size_bits >> block_size_bits; + len = rl->length; + if (rl[1].vcn > end_vcn) + len = end_vcn - rl->vcn; + end_block = (lcn + len) << vol->cluster_size_bits >> + block_size_bits; + /* Iterate over the blocks in the run and empty them. */ + do { + struct buffer_head *bh; + + /* Obtain the buffer, possibly not uptodate. */ + bh = sb_getblk(sb, block); + BUG_ON(!bh); + /* Setup buffer i/o submission. */ + lock_buffer(bh); + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + /* Set the entire contents of the buffer to 0xff. */ + memset(bh->b_data, -1, block_size); + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (buffer_dirty(bh)) + clear_buffer_dirty(bh); + /* + * Submit the buffer and wait for i/o to complete but + * only for the first buffer so we do not miss really + * serious i/o errors. Once the first buffer has + * completed ignore errors afterwards as we can assume + * that if one buffer worked all of them will work. + */ + submit_bh(WRITE, bh); + if (should_wait) { + should_wait = false; + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) + goto io_err; + } + brelse(bh); + } while (++block < end_block); + } while ((++rl)->vcn < end_vcn); + up_write(&log_ni->runlist.lock); + /* + * Zap the pages again just in case any got instantiated whilst we were + * emptying the blocks by hand. FIXME: We may not have completed + * writing to all the buffer heads yet so this may happen too early. + * We really should use a kernel thread to do the emptying + * asynchronously and then we can also set the volume dirty and output + * an error message if emptying should fail. + */ + truncate_inode_pages(log_vi->i_mapping, 0); + /* Set the flag so we do not have to do it again on remount. */ + NVolSetLogFileEmpty(vol); ntfs_debug("Done."); return true; +io_err: + ntfs_error(sb, "Failed to write buffer. Unmount and run chkdsk."); + goto dirty_err; +rl_err: + ntfs_error(sb, "Runlist is corrupt. Unmount and run chkdsk."); +dirty_err: + NVolSetErrors(vol); + err = -EIO; +err: + up_write(&log_ni->runlist.lock); + ntfs_error(sb, "Failed to fill $LogFile with 0xff bytes (error %d).", + -err); + return false; } #endif /* NTFS_RW */ diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index 9afd72c7ad0..56a9a6d25a2 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -1,7 +1,7 @@ /** * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2005 Anton Altaparmakov + * Copyright (c) 2001-2007 Anton Altaparmakov * Copyright (c) 2002-2005 Richard Russon * * This program/include file is free software; you can redistribute it and/or @@ -1714,7 +1714,7 @@ extend_hole: sizeof(*rl)); /* Adjust the beginning of the tail if necessary. */ if (end > rl->vcn) { - s64 delta = end - rl->vcn; + delta = end - rl->vcn; rl->vcn = end; rl->length -= delta; /* Only adjust the lcn if it is real. */ diff --git a/include/asm-s390/cache.h b/include/asm-s390/cache.h index cdf431b061b..9b866816863 100644 --- a/include/asm-s390/cache.h +++ b/include/asm-s390/cache.h @@ -14,8 +14,6 @@ #define L1_CACHE_BYTES 256 #define L1_CACHE_SHIFT 8 -#define ARCH_KMALLOC_MINALIGN 8 - #define __read_mostly __attribute__((__section__(".data.read_mostly"))) #endif diff --git a/include/asm-s390/ccwdev.h b/include/asm-s390/ccwdev.h index 1aeda27d5a8..066aa70518c 100644 --- a/include/asm-s390/ccwdev.h +++ b/include/asm-s390/ccwdev.h @@ -67,36 +67,55 @@ ccw_device_id_match(const struct ccw_device_id *array, return NULL; } -/* The struct ccw device is our replacement for the globally accessible - * ioinfo array. ioinfo will mutate into a subchannel device later. +/** + * struct ccw_device - channel attached device + * @ccwlock: pointer to device lock + * @id: id of this device + * @drv: ccw driver for this device + * @dev: embedded device structure + * @online: online status of device + * @handler: interrupt handler * - * Reference: Documentation/s390/driver-model.txt */ + * @handler is a member of the device rather than the driver since a driver + * can have different interrupt handlers for different ccw devices + * (multi-subchannel drivers). + */ struct ccw_device { spinlock_t *ccwlock; +/* private: */ struct ccw_device_private *private; /* cio private information */ - struct ccw_device_id id; /* id of this device, driver_info is - set by ccw_find_driver */ - struct ccw_driver *drv; /* */ - struct device dev; /* */ +/* public: */ + struct ccw_device_id id; + struct ccw_driver *drv; + struct device dev; int online; - /* This is sick, but a driver can have different interrupt handlers - for different ccw_devices (multi-subchannel drivers)... */ void (*handler) (struct ccw_device *, unsigned long, struct irb *); }; -/* Each ccw driver registers with the ccw root bus */ +/** + * struct ccw driver - device driver for channel attached devices + * @owner: owning module + * @ids: ids supported by this driver + * @probe: function called on probe + * @remove: function called on remove + * @set_online: called when setting device online + * @set_offline: called when setting device offline + * @notify: notify driver of device state changes + * @shutdown: called at device shutdown + * @driver: embedded device driver structure + * @name: device driver name + */ struct ccw_driver { - struct module *owner; /* for automatic MOD_INC_USE_COUNT */ - struct ccw_device_id *ids; /* probe driver with these devs */ - int (*probe) (struct ccw_device *); /* ask driver to probe dev */ + struct module *owner; + struct ccw_device_id *ids; + int (*probe) (struct ccw_device *); void (*remove) (struct ccw_device *); - /* device is no longer available */ int (*set_online) (struct ccw_device *); int (*set_offline) (struct ccw_device *); int (*notify) (struct ccw_device *, int); - struct device_driver driver; /* higher level structure, don't init - this from your driver */ + void (*shutdown) (struct ccw_device *); + struct device_driver driver; char *name; }; @@ -124,36 +143,10 @@ extern void ccw_device_clear_options(struct ccw_device *, unsigned long); /* Allow forced onlining of boxed devices. */ #define CCWDEV_ALLOW_FORCE 0x0008 -/* - * ccw_device_start() - * - * Start a S/390 channel program. When the interrupt arrives, the - * IRQ handler is called, either immediately, delayed (dev-end missing, - * or sense required) or never (no IRQ handler registered). - * Depending on the action taken, ccw_device_start() returns: - * 0 - Success - * -EBUSY - Device busy, or status pending - * -ENODEV - Device not operational - * -EINVAL - Device invalid for operation - */ extern int ccw_device_start(struct ccw_device *, struct ccw1 *, unsigned long, __u8, unsigned long); -/* - * ccw_device_start_timeout() - * - * This function notifies the device driver if the channel program has not - * completed during the specified time. If a timeout occurs, the channel - * program is terminated via xsch(), hsch() or csch(). - */ extern int ccw_device_start_timeout(struct ccw_device *, struct ccw1 *, unsigned long, __u8, unsigned long, int); -/* - * ccw_device_start_key() - * ccw_device_start_key_timeout() - * - * Same as ccw_device_start() and ccw_device_start_timeout(), except a - * storage key != default key can be provided for the I/O. - */ extern int ccw_device_start_key(struct ccw_device *, struct ccw1 *, unsigned long, __u8, __u8, unsigned long); extern int ccw_device_start_timeout_key(struct ccw_device *, struct ccw1 *, diff --git a/include/asm-s390/ccwgroup.h b/include/asm-s390/ccwgroup.h index 925b3ddfa14..7109c7cab87 100644 --- a/include/asm-s390/ccwgroup.h +++ b/include/asm-s390/ccwgroup.h @@ -4,19 +4,41 @@ struct ccw_device; struct ccw_driver; +/** + * struct ccwgroup_device - ccw group device + * @creator_id: unique number of the driver + * @state: online/offline state + * @count: number of attached slave devices + * @dev: embedded device structure + * @cdev: variable number of slave devices, allocated as needed + */ struct ccwgroup_device { - unsigned long creator_id; /* unique number of the driver */ + unsigned long creator_id; enum { CCWGROUP_OFFLINE, CCWGROUP_ONLINE, } state; +/* private: */ atomic_t onoff; struct mutex reg_mutex; - unsigned int count; /* number of attached slave devices */ - struct device dev; /* master device */ - struct ccw_device *cdev[0]; /* variable number, allocate as needed */ +/* public: */ + unsigned int count; + struct device dev; + struct ccw_device *cdev[0]; }; +/** + * struct ccwgroup_driver - driver for ccw group devices + * @owner: driver owner + * @name: driver name + * @max_slaves: maximum number of slave devices + * @driver_id: unique id + * @probe: function called on probe + * @remove: function called on remove + * @set_online: function called when device is set online + * @set_offline: function called when device is set offline + * @driver: embedded driver structure + */ struct ccwgroup_driver { struct module *owner; char *name; @@ -28,7 +50,7 @@ struct ccwgroup_driver { int (*set_online) (struct ccwgroup_device *); int (*set_offline) (struct ccwgroup_device *); - struct device_driver driver; /* this driver */ + struct device_driver driver; }; extern int ccwgroup_driver_register (struct ccwgroup_driver *cdriver); diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h index 1982fb34416..2f08c16e44a 100644 --- a/include/asm-s390/cio.h +++ b/include/asm-s390/cio.h @@ -15,30 +15,50 @@ #define LPM_ANYPATH 0xff #define __MAX_CSSID 0 -/* - * subchannel status word +/** + * struct scsw - subchannel status word + * @key: subchannel key + * @sctl: suspend control + * @eswf: esw format + * @cc: deferred condition code + * @fmt: format + * @pfch: prefetch + * @isic: initial-status interruption control + * @alcc: adress-limit checking control + * @ssi: supress-suspended interruption + * @zcc: zero condition code + * @ectl: extended control + * @pno: path not operational + * @res: reserved + * @fctl: function control + * @actl: activity control + * @stctl: status control + * @cpa: channel program address + * @dstat: device status + * @cstat: subchannel status + * @count: residual count */ struct scsw { - __u32 key : 4; /* subchannel key */ - __u32 sctl : 1; /* suspend control */ - __u32 eswf : 1; /* ESW format */ - __u32 cc : 2; /* deferred condition code */ - __u32 fmt : 1; /* format */ - __u32 pfch : 1; /* prefetch */ - __u32 isic : 1; /* initial-status interruption control */ - __u32 alcc : 1; /* address-limit checking control */ - __u32 ssi : 1; /* supress-suspended interruption */ - __u32 zcc : 1; /* zero condition code */ - __u32 ectl : 1; /* extended control */ - __u32 pno : 1; /* path not operational */ - __u32 res : 1; /* reserved */ - __u32 fctl : 3; /* function control */ - __u32 actl : 7; /* activity control */ - __u32 stctl : 5; /* status control */ - __u32 cpa; /* channel program address */ - __u32 dstat : 8; /* device status */ - __u32 cstat : 8; /* subchannel status */ - __u32 count : 16; /* residual count */ + __u32 key : 4; + __u32 sctl : 1; + __u32 eswf : 1; + __u32 cc : 2; + __u32 fmt : 1; + __u32 pfch : 1; + __u32 isic : 1; + __u32 alcc : 1; + __u32 ssi : 1; + __u32 zcc : 1; + __u32 ectl : 1; + __u32 pno : 1; + __u32 res : 1; + __u32 fctl : 3; + __u32 actl : 7; + __u32 stctl : 5; + __u32 cpa; + __u32 dstat : 8; + __u32 cstat : 8; + __u32 count : 16; } __attribute__ ((packed)); #define SCSW_FCTL_CLEAR_FUNC 0x1 @@ -110,11 +130,22 @@ struct scsw { #define SNS2_ENV_DATA_PRESENT 0x10 #define SNS2_INPRECISE_END 0x04 +/** + * struct ccw1 - channel command word + * @cmd_code: command code + * @flags: flags, like IDA adressing, etc. + * @count: byte count + * @cda: data address + * + * The ccw is the basic structure to build channel programs that perform + * operations with the device or the control unit. Only Format-1 channel + * command words are supported. + */ struct ccw1 { - __u8 cmd_code; /* command code */ - __u8 flags; /* flags, like IDA addressing, etc. */ - __u16 count; /* byte count */ - __u32 cda; /* data address */ + __u8 cmd_code; + __u8 flags; + __u16 count; + __u32 cda; } __attribute__ ((packed,aligned(8))); #define CCW_FLAG_DC 0x80 @@ -140,102 +171,162 @@ struct ccw1 { #define SENSE_MAX_COUNT 0x20 +/** + * struct erw - extended report word + * @res0: reserved + * @auth: authorization check + * @pvrf: path-verification-required flag + * @cpt: channel-path timeout + * @fsavf: failing storage address validity flag + * @cons: concurrent sense + * @scavf: secondary ccw address validity flag + * @fsaf: failing storage address format + * @scnt: sense count, if @cons == %1 + * @res16: reserved + */ struct erw { - __u32 res0 : 3; /* reserved */ - __u32 auth : 1; /* Authorization check */ - __u32 pvrf : 1; /* path-verification-required flag */ - __u32 cpt : 1; /* channel-path timeout */ - __u32 fsavf : 1; /* Failing storage address validity flag */ - __u32 cons : 1; /* concurrent-sense */ - __u32 scavf : 1; /* Secondary ccw address validity flag */ - __u32 fsaf : 1; /* Failing storage address format */ - __u32 scnt : 6; /* sense count if cons == 1 */ - __u32 res16 : 16; /* reserved */ + __u32 res0 : 3; + __u32 auth : 1; + __u32 pvrf : 1; + __u32 cpt : 1; + __u32 fsavf : 1; + __u32 cons : 1; + __u32 scavf : 1; + __u32 fsaf : 1; + __u32 scnt : 6; + __u32 res16 : 16; } __attribute__ ((packed)); -/* - * subchannel logout area +/** + * struct sublog - subchannel logout area + * @res0: reserved + * @esf: extended status flags + * @lpum: last path used mask + * @arep: ancillary report + * @fvf: field-validity flags + * @sacc: storage access code + * @termc: termination code + * @devsc: device-status check + * @serr: secondary error + * @ioerr: i/o-error alert + * @seqc: sequence code */ struct sublog { - __u32 res0 : 1; /* reserved */ - __u32 esf : 7; /* extended status flags */ - __u32 lpum : 8; /* last path used mask */ - __u32 arep : 1; /* ancillary report */ - __u32 fvf : 5; /* field-validity flags */ - __u32 sacc : 2; /* storage access code */ - __u32 termc : 2; /* termination code */ - __u32 devsc : 1; /* device-status check */ - __u32 serr : 1; /* secondary error */ - __u32 ioerr : 1; /* i/o-error alert */ - __u32 seqc : 3; /* sequence code */ + __u32 res0 : 1; + __u32 esf : 7; + __u32 lpum : 8; + __u32 arep : 1; + __u32 fvf : 5; + __u32 sacc : 2; + __u32 termc : 2; + __u32 devsc : 1; + __u32 serr : 1; + __u32 ioerr : 1; + __u32 seqc : 3; } __attribute__ ((packed)); -/* - * Format 0 Extended Status Word (ESW) +/** + * struct esw0 - Format 0 Extended Status Word (ESW) + * @sublog: subchannel logout + * @erw: extended report word + * @faddr: failing storage address + * @saddr: secondary ccw address */ struct esw0 { - struct sublog sublog; /* subchannel logout */ - struct erw erw; /* extended report word */ - __u32 faddr[2]; /* failing storage address */ - __u32 saddr; /* secondary ccw address */ + struct sublog sublog; + struct erw erw; + __u32 faddr[2]; + __u32 saddr; } __attribute__ ((packed)); -/* - * Format 1 Extended Status Word (ESW) +/** + * struct esw1 - Format 1 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @zero16: reserved zeros + * @erw: extended report word + * @zeros: three fullwords of zeros */ struct esw1 { - __u8 zero0; /* reserved zeros */ - __u8 lpum; /* last path used mask */ - __u16 zero16; /* reserved zeros */ - struct erw erw; /* extended report word */ - __u32 zeros[3]; /* 2 fullwords of zeros */ + __u8 zero0; + __u8 lpum; + __u16 zero16; + struct erw erw; + __u32 zeros[3]; } __attribute__ ((packed)); -/* - * Format 2 Extended Status Word (ESW) +/** + * struct esw2 - Format 2 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @dcti: device-connect-time interval + * @erw: extended report word + * @zeros: three fullwords of zeros */ struct esw2 { - __u8 zero0; /* reserved zeros */ - __u8 lpum; /* last path used mask */ - __u16 dcti; /* device-connect-time interval */ - struct erw erw; /* extended report word */ - __u32 zeros[3]; /* 2 fullwords of zeros */ + __u8 zero0; + __u8 lpum; + __u16 dcti; + struct erw erw; + __u32 zeros[3]; } __attribute__ ((packed)); -/* - * Format 3 Extended Status Word (ESW) +/** + * struct esw3 - Format 3 Extended Status Word (ESW) + * @zero0: reserved zeros + * @lpum: last path used mask + * @res: reserved + * @erw: extended report word + * @zeros: three fullwords of zeros */ struct esw3 { - __u8 zero0; /* reserved zeros */ - __u8 lpum; /* last path used mask */ - __u16 res; /* reserved */ - struct erw erw; /* extended report word */ - __u32 zeros[3]; /* 2 fullwords of zeros */ + __u8 zero0; + __u8 lpum; + __u16 res; + struct erw erw; + __u32 zeros[3]; } __attribute__ ((packed)); -/* - * interruption response block +/** + * struct irb - interruption response block + * @scsw: subchannel status word + * @esw: extened status word, 4 formats + * @ecw: extended control word + * + * The irb that is handed to the device driver when an interrupt occurs. For + * solicited interrupts, the common I/O layer already performs checks whether + * a field is valid; a field not being valid is always passed as %0. + * If a unit check occured, @ecw may contain sense data; this is retrieved + * by the common I/O layer itself if the device doesn't support concurrent + * sense (so that the device driver never needs to perform basic sene itself). + * For unsolicited interrupts, the irb is passed as-is (expect for sense data, + * if applicable). */ struct irb { - struct scsw scsw; /* subchannel status word */ - union { /* extended status word, 4 formats */ + struct scsw scsw; + union { struct esw0 esw0; struct esw1 esw1; struct esw2 esw2; struct esw3 esw3; } esw; - __u8 ecw[32]; /* extended control word */ + __u8 ecw[32]; } __attribute__ ((packed,aligned(4))); -/* - * command information word (CIW) layout +/** + * struct ciw - command information word (CIW) layout + * @et: entry type + * @reserved: reserved bits + * @ct: command type + * @cmd: command code + * @count: command count */ struct ciw { - __u32 et : 2; /* entry type */ - __u32 reserved : 2; /* reserved */ - __u32 ct : 4; /* command type */ - __u32 cmd : 8; /* command */ - __u32 count : 16; /* coun */ + __u32 et : 2; + __u32 reserved : 2; + __u32 ct : 4; + __u32 cmd : 8; + __u32 count : 16; } __attribute__ ((packed)); #define CIW_TYPE_RCD 0x0 /* read configuration data */ @@ -258,11 +349,32 @@ struct ciw { /* Sick revalidation of device. */ #define CIO_REVALIDATE 0x0008 +/** + * struct ccw_dev_id - unique identifier for ccw devices + * @ssid: subchannel set id + * @devno: device number + * + * This structure is not directly based on any hardware structure. The + * hardware identifies a device by its device number and its subchannel, + * which is in turn identified by its id. In order to get a unique identifier + * for ccw devices across subchannel sets, @struct ccw_dev_id has been + * introduced. + */ struct ccw_dev_id { u8 ssid; u16 devno; }; +/** + * ccw_device_id_is_equal() - compare two ccw_dev_ids + * @dev_id1: a ccw_dev_id + * @dev_id2: another ccw_dev_id + * Returns: + * %1 if the two structures are equal field-by-field, + * %0 if not. + * Context: + * any + */ static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1, struct ccw_dev_id *dev_id2) { diff --git a/include/asm-s390/cmb.h b/include/asm-s390/cmb.h index 021e7c3223e..50196857d27 100644 --- a/include/asm-s390/cmb.h +++ b/include/asm-s390/cmb.h @@ -1,29 +1,29 @@ #ifndef S390_CMB_H #define S390_CMB_H /** - * struct cmbdata -- channel measurement block data for user space + * struct cmbdata - channel measurement block data for user space + * @size: size of the stored data + * @elapsed_time: time since last sampling + * @ssch_rsch_count: number of ssch and rsch + * @sample_count: number of samples + * @device_connect_time: time of device connect + * @function_pending_time: time of function pending + * @device_disconnect_time: time of device disconnect + * @control_unit_queuing_time: time of control unit queuing + * @device_active_only_time: time of device active only + * @device_busy_time: time of device busy (ext. format) + * @initial_command_response_time: initial command response time (ext. format) * - * @size: size of the stored data - * @ssch_rsch_count: XXX - * @sample_count: - * @device_connect_time: - * @function_pending_time: - * @device_disconnect_time: - * @control_unit_queuing_time: - * @device_active_only_time: - * @device_busy_time: - * @initial_command_response_time: - * - * all values are stored as 64 bit for simplicity, especially + * All values are stored as 64 bit for simplicity, especially * in 32 bit emulation mode. All time values are normalized to * nanoseconds. * Currently, two formats are known, which differ by the size of * this structure, i.e. the last two members are only set when * the extended channel measurement facility (first shipped in * z990 machines) is activated. - * Potentially, more fields could be added, which results in a + * Potentially, more fields could be added, which would result in a * new ioctl number. - **/ + */ struct cmbdata { __u64 size; __u64 elapsed_time; @@ -41,53 +41,18 @@ struct cmbdata { }; /* enable channel measurement */ -#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER,32) +#define BIODASDCMFENABLE _IO(DASD_IOCTL_LETTER, 32) /* enable channel measurement */ -#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER,33) +#define BIODASDCMFDISABLE _IO(DASD_IOCTL_LETTER, 33) /* read channel measurement data */ -#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER,33,struct cmbdata) +#define BIODASDREADALLCMB _IOWR(DASD_IOCTL_LETTER, 33, struct cmbdata) #ifdef __KERNEL__ struct ccw_device; -/** - * enable_cmf() - switch on the channel measurement for a specific device - * @cdev: The ccw device to be enabled - * returns 0 for success or a negative error value. - * - * Context: - * non-atomic - **/ extern int enable_cmf(struct ccw_device *cdev); - -/** - * disable_cmf() - switch off the channel measurement for a specific device - * @cdev: The ccw device to be disabled - * returns 0 for success or a negative error value. - * - * Context: - * non-atomic - **/ extern int disable_cmf(struct ccw_device *cdev); - -/** - * cmf_read() - read one value from the current channel measurement block - * @cmf: the channel to be read - * @index: the name of the value that is read - * - * Context: - * any - **/ - extern u64 cmf_read(struct ccw_device *cdev, int index); -/** - * cmf_readall() - read one value from the current channel measurement block - * @cmf: the channel to be read - * @data: a pointer to a data block that will be filled - * - * Context: - * any - **/ -extern int cmf_readall(struct ccw_device *cdev, struct cmbdata*data); +extern int cmf_readall(struct ccw_device *cdev, struct cmbdata *data); #endif /* __KERNEL__ */ #endif /* S390_CMB_H */ diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h index f326451ed6e..ceec3826a67 100644 --- a/include/asm-s390/page.h +++ b/include/asm-s390/page.h @@ -9,11 +9,12 @@ #ifndef _S390_PAGE_H #define _S390_PAGE_H +#include <linux/const.h> #include <asm/types.h> /* PAGE_SHIFT determines the page size */ #define PAGE_SHIFT 12 -#define PAGE_SIZE (1UL << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) #define PAGE_DEFAULT_ACC 0 #define PAGE_DEFAULT_KEY (PAGE_DEFAULT_ACC << 4) diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h index 3208dc6c412..39bb5192dc3 100644 --- a/include/asm-s390/pgtable.h +++ b/include/asm-s390/pgtable.h @@ -107,11 +107,18 @@ extern char empty_zero_page[PAGE_SIZE]; * any out-of-bounds memory accesses will hopefully be caught. * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) + * vmalloc area starts at 4GB to prevent syscall table entry exchanging + * from modules. */ extern unsigned long vmalloc_end; -#define VMALLOC_OFFSET (8*1024*1024) -#define VMALLOC_START (((unsigned long) high_memory + VMALLOC_OFFSET) \ - & ~(VMALLOC_OFFSET-1)) + +#ifdef CONFIG_64BIT +#define VMALLOC_ADDR (max(0x100000000UL, (unsigned long) high_memory)) +#else +#define VMALLOC_ADDR ((unsigned long) high_memory) +#endif +#define VMALLOC_OFFSET (8*1024*1024) +#define VMALLOC_START ((VMALLOC_ADDR + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) #define VMALLOC_END vmalloc_end /* diff --git a/include/asm-s390/s390_ext.h b/include/asm-s390/s390_ext.h index 1e72362cad7..2afc060266a 100644 --- a/include/asm-s390/s390_ext.h +++ b/include/asm-s390/s390_ext.h @@ -5,7 +5,7 @@ * include/asm-s390/s390_ext.h * * S390 version - * Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Copyright IBM Corp. 1999,2007 * Author(s): Holger Smolinski (Holger.Smolinski@de.ibm.com), * Martin Schwidefsky (schwidefsky@de.ibm.com) */ @@ -14,15 +14,11 @@ typedef void (*ext_int_handler_t)(__u16 code); -/* - * Warning: if you change ext_int_info_t you have to change the - * external interrupt handler in entry.S too. - */ typedef struct ext_int_info_t { struct ext_int_info_t *next; ext_int_handler_t handler; __u16 code; -} __attribute__ ((packed)) ext_int_info_t; +} ext_int_info_t; extern ext_int_info_t *ext_int_hash[]; diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index 64a3cd05cae..d866d338555 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -130,6 +130,8 @@ extern void pfault_fini(void); __ret; \ }) +extern void __xchg_called_with_bad_pointer(void); + static inline unsigned long __xchg(unsigned long x, void * ptr, int size) { unsigned long addr, old; @@ -150,8 +152,7 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size) : "=&d" (old), "=m" (*(int *) addr) : "d" (x << shift), "d" (~(255 << shift)), "a" (addr), "m" (*(int *) addr) : "memory", "cc", "0"); - x = old >> shift; - break; + return old >> shift; case 2: addr = (unsigned long) ptr; shift = (2 ^ (addr & 2)) << 3; @@ -166,8 +167,7 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size) : "=&d" (old), "=m" (*(int *) addr) : "d" (x << shift), "d" (~(65535 << shift)), "a" (addr), "m" (*(int *) addr) : "memory", "cc", "0"); - x = old >> shift; - break; + return old >> shift; case 4: asm volatile( " l %0,0(%3)\n" @@ -176,8 +176,7 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size) : "=&d" (old), "=m" (*(int *) ptr) : "d" (x), "a" (ptr), "m" (*(int *) ptr) : "memory", "cc"); - x = old; - break; + return old; #ifdef __s390x__ case 8: asm volatile( @@ -187,11 +186,11 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size) : "=&d" (old), "=m" (*(long *) ptr) : "d" (x), "a" (ptr), "m" (*(long *) ptr) : "memory", "cc"); - x = old; - break; + return old; #endif /* __s390x__ */ - } - return x; + } + __xchg_called_with_bad_pointer(); + return x; } /* @@ -206,6 +205,8 @@ static inline unsigned long __xchg(unsigned long x, void * ptr, int size) ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ (unsigned long)(n),sizeof(*(ptr)))) +extern void __cmpxchg_called_with_bad_pointer(void); + static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) { @@ -270,7 +271,8 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) return prev; #endif /* __s390x__ */ } - return old; + __cmpxchg_called_with_bad_pointer(); + return old; } /* diff --git a/include/asm-s390/zcrypt.h b/include/asm-s390/zcrypt.h index b90e55888a5..a5dada61775 100644 --- a/include/asm-s390/zcrypt.h +++ b/include/asm-s390/zcrypt.h @@ -91,7 +91,7 @@ struct ica_rsa_modexpo_crt { * VUD block * key block */ -struct ica_CPRBX { +struct CPRBX { unsigned short cprb_len; /* CPRB length 220 */ unsigned char cprb_ver_id; /* CPRB version id. 0x02 */ unsigned char pad_000[3]; /* Alignment pad bytes */ @@ -130,7 +130,7 @@ struct ica_CPRBX { unsigned char cntrl_domain[4];/* Control domain */ unsigned char S390enf_mask[4];/* S/390 enforcement mask */ unsigned char pad_004[36]; /* reserved */ -}; +} __attribute__((packed)); /** * xcRB diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95be0ac57e7..5ed888b04b2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -20,20 +20,6 @@ #include <asm/scatterlist.h> -#ifdef CONFIG_LBD -# include <asm/div64.h> -# define sector_div(a, b) do_div(a, b) -#else -# define sector_div(n, b)( \ -{ \ - int _res; \ - _res = (n) % (b); \ - (n) /= (b); \ - _res; \ -} \ -) -#endif - struct scsi_ioctl_command; struct request_queue; diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 2e105a12fe2..7e11d23ac36 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -290,12 +290,7 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, #define blk_add_trace_generic(q, rq, rw, what) do { } while (0) #define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) #define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) -static inline int do_blk_trace_setup(struct request_queue *q, - struct block_device *bdev, - struct blk_user_trace_setup *buts) -{ - return 0; -} +#define do_blk_trace_setup(q, bdev, buts) (-ENOTTY) #endif /* CONFIG_BLK_DEV_IO_TRACE */ #endif /* __KERNEL__ */ #endif diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h index a44a6a078f0..c3c19f926e6 100644 --- a/include/linux/gfs2_ondisk.h +++ b/include/linux/gfs2_ondisk.h @@ -170,6 +170,33 @@ struct gfs2_rgrp { }; /* + * quota linked list: user quotas and group quotas form two separate + * singly linked lists. ll_next stores uids or gids of next quotas in the + * linked list. + +Given the uid/gid, how to calculate the quota file offsets for the corresponding +gfs2_quota structures on disk: + +for user quotas, given uid, +offset = uid * sizeof(struct gfs2_quota); + +for group quotas, given gid, +offset = (gid * sizeof(struct gfs2_quota)) + sizeof(struct gfs2_quota); + + + uid:0 gid:0 uid:12 gid:12 uid:17 gid:17 uid:5142 gid:5142 ++-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+ +| valid | valid | :: | valid | valid | :: | valid | inval | :: | inval | valid | ++-------+-------+ +-------+-------+ +-------+- - - -+ +- - - -+-------+ +next:12 next:12 next:17 next:5142 next:NULL next:NULL + | | | | |<-- user quota list | + \______|___________/ \______|___________/ group quota list -->| + | | | + \__________________/ \_______________________________________/ + +*/ + +/* * quota structure */ @@ -177,7 +204,8 @@ struct gfs2_quota { __be64 qu_limit; __be64 qu_warn; __be64 qu_value; - __u8 qu_reserved[64]; + __be32 qu_ll_next; /* location of next quota in list */ + __u8 qu_reserved[60]; }; /* diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 47160fe378c..d9725a28a26 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -42,6 +42,20 @@ extern const char linux_proc_banner[]; #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +#ifdef CONFIG_LBD +# include <asm/div64.h> +# define sector_div(a, b) do_div(a, b) +#else +# define sector_div(n, b)( \ +{ \ + int _res; \ + _res = (n) % (b); \ + (n) /= (b); \ + _res; \ +} \ +) +#endif + /** * upper_32_bits - return bits 32-63 of a number * @n: the number we're accessing |