diff options
131 files changed, 4981 insertions, 5104 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 0d5394920a3..74484e69640 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt @@ -32,9 +32,9 @@ Mailing list: linux-ext4@vger.kernel.org you will need to merge your changes with the version from e2fsprogs 1.41.x. - - Create a new filesystem using the ext4dev filesystem type: + - Create a new filesystem using the ext4 filesystem type: - # mke2fs -t ext4dev /dev/hda1 + # mke2fs -t ext4 /dev/hda1 Or configure an existing ext3 filesystem to support extents and set the test_fs flag to indicate that it's ok for an in-development @@ -47,13 +47,13 @@ Mailing list: linux-ext4@vger.kernel.org # tune2fs -I 256 /dev/hda1 - (Note: we currently do not have tools to convert an ext4dev + (Note: we currently do not have tools to convert an ext4 filesystem back to ext3; so please do not do try this on production filesystems.) - Mounting: - # mount -t ext4dev /dev/hda1 /wherever + # mount -t ext4 /dev/hda1 /wherever - When comparing performance with other filesystems, remember that ext3/4 by default offers higher data integrity guarantees than most. @@ -177,6 +177,11 @@ barrier=<0|1(*)> This enables/disables the use of write barriers in your disks are battery-backed in one way or another, disabling barriers may safely improve performance. +inode_readahead=n This tuning parameter controls the maximum + number of inode table blocks that ext4's inode + table readahead algorithm will pre-read into + the buffer cache. The default value is 32 blocks. + orlov (*) This enables the new Orlov block allocator. It is enabled by default. @@ -252,6 +257,7 @@ stripe=n Number of filesystem blocks that mballoc will try delalloc (*) Deferring block allocation until write-out time. nodelalloc Disable delayed allocation. Blocks are allocation when data is copied from user to page cache. + Data Mode ========= There are 3 different data modes: diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt new file mode 100644 index 00000000000..1e3defcfe50 --- /dev/null +++ b/Documentation/filesystems/fiemap.txt @@ -0,0 +1,228 @@ +============ +Fiemap Ioctl +============ + +The fiemap ioctl is an efficient method for userspace to get file +extent mappings. Instead of block-by-block mapping (such as bmap), fiemap +returns a list of extents. + + +Request Basics +-------------- + +A fiemap request is encoded within struct fiemap: + +struct fiemap { + __u64 fm_start; /* logical offset (inclusive) at + * which to start mapping (in) */ + __u64 fm_length; /* logical length of mapping which + * userspace cares about (in) */ + __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ + __u32 fm_mapped_extents; /* number of extents that were + * mapped (out) */ + __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ +}; + + +fm_start, and fm_length specify the logical range within the file +which the process would like mappings for. Extents returned mirror +those on disk - that is, the logical offset of the 1st returned extent +may start before fm_start, and the range covered by the last returned +extent may end after fm_length. All offsets and lengths are in bytes. + +Certain flags to modify the way in which mappings are looked up can be +set in fm_flags. If the kernel doesn't understand some particular +flags, it will return EBADR and the contents of fm_flags will contain +the set of flags which caused the error. If the kernel is compatible +with all flags passed, the contents of fm_flags will be unmodified. +It is up to userspace to determine whether rejection of a particular +flag is fatal to it's operation. This scheme is intended to allow the +fiemap interface to grow in the future but without losing +compatibility with old software. + +fm_extent_count specifies the number of elements in the fm_extents[] array +that can be used to return extents. If fm_extent_count is zero, then the +fm_extents[] array is ignored (no extents will be returned), and the +fm_mapped_extents count will hold the number of extents needed in +fm_extents[] to hold the file's current mapping. Note that there is +nothing to prevent the file from changing between calls to FIEMAP. + +The following flags can be set in fm_flags: + +* FIEMAP_FLAG_SYNC +If this flag is set, the kernel will sync the file before mapping extents. + +* FIEMAP_FLAG_XATTR +If this flag is set, the extents returned will describe the inodes +extended attribute lookup tree, instead of it's data tree. + + +Extent Mapping +-------------- + +Extent information is returned within the embedded fm_extents array +which userspace must allocate along with the fiemap structure. The +number of elements in the fiemap_extents[] array should be passed via +fm_extent_count. The number of extents mapped by kernel will be +returned via fm_mapped_extents. If the number of fiemap_extents +allocated is less than would be required to map the requested range, +the maximum number of extents that can be mapped in the fm_extent[] +array will be returned and fm_mapped_extents will be equal to +fm_extent_count. In that case, the last extent in the array will not +complete the requested range and will not have the FIEMAP_EXTENT_LAST +flag set (see the next section on extent flags). + +Each extent is described by a single fiemap_extent structure as +returned in fm_extents. + +struct fiemap_extent { + __u64 fe_logical; /* logical offset in bytes for the start of + * the extent */ + __u64 fe_physical; /* physical offset in bytes for the start + * of the extent */ + __u64 fe_length; /* length in bytes for the extent */ + __u64 fe_reserved64[2]; + __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + __u32 fe_reserved[3]; +}; + +All offsets and lengths are in bytes and mirror those on disk. It is valid +for an extents logical offset to start before the request or it's logical +length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is +returned, fe_logical, fe_physical, and fe_length will be aligned to the +block size of the file system. With the exception of extents flagged as +FIEMAP_EXTENT_MERGED, adjacent extents will not be merged. + +The fe_flags field contains flags which describe the extent returned. +A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in +the file so that the process making fiemap calls can determine when no +more extents are available, without having to call the ioctl again. + +Some flags are intentionally vague and will always be set in the +presence of other more specific flags. This way a program looking for +a general property does not have to know all existing and future flags +which imply that property. + +For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL +are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking +for inline or tail-packed data can key on the specific flag. Software +which simply cares not to try operating on non-aligned extents +however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to +worry about all present and future flags which might imply unaligned +data. Note that the opposite is not true - it would be valid for +FIEMAP_EXTENT_NOT_ALIGNED to appear alone. + +* FIEMAP_EXTENT_LAST +This is the last extent in the file. A mapping attempt past this +extent will return nothing. + +* FIEMAP_EXTENT_UNKNOWN +The location of this extent is currently unknown. This may indicate +the data is stored on an inaccessible volume or that no storage has +been allocated for the file yet. + +* FIEMAP_EXTENT_DELALLOC + - This will also set FIEMAP_EXTENT_UNKNOWN. +Delayed allocation - while there is data for this extent, it's +physical location has not been allocated yet. + +* FIEMAP_EXTENT_ENCODED +This extent does not consist of plain filesystem blocks but is +encoded (e.g. encrypted or compressed). Reading the data in this +extent via I/O to the block device will have undefined results. + +Note that it is *always* undefined to try to update the data +in-place by writing to the indicated location without the +assistance of the filesystem, or to access the data using the +information returned by the FIEMAP interface while the filesystem +is mounted. In other words, user applications may only read the +extent data via I/O to the block device while the filesystem is +unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is +clear; user applications must not try reading or writing to the +filesystem via the block device under any other circumstances. + +* FIEMAP_EXTENT_DATA_ENCRYPTED + - This will also set FIEMAP_EXTENT_ENCODED +The data in this extent has been encrypted by the file system. + +* FIEMAP_EXTENT_NOT_ALIGNED +Extent offsets and length are not guaranteed to be block aligned. + +* FIEMAP_EXTENT_DATA_INLINE + This will also set FIEMAP_EXTENT_NOT_ALIGNED +Data is located within a meta data block. + +* FIEMAP_EXTENT_DATA_TAIL + This will also set FIEMAP_EXTENT_NOT_ALIGNED +Data is packed into a block with data from other files. + +* FIEMAP_EXTENT_UNWRITTEN +Unwritten extent - the extent is allocated but it's data has not been +initialized. This indicates the extent's data will be all zero if read +through the filesystem but the contents are undefined if read directly from +the device. + +* FIEMAP_EXTENT_MERGED +This will be set when a file does not support extents, i.e., it uses a block +based addressing scheme. Since returning an extent for each block back to +userspace would be highly inefficient, the kernel will try to merge most +adjacent blocks into 'extents'. + + +VFS -> File System Implementation +--------------------------------- + +File systems wishing to support fiemap must implement a ->fiemap callback on +their inode_operations structure. The fs ->fiemap call is responsible for +defining it's set of supported fiemap flags, and calling a helper function on +each discovered extent: + +struct inode_operations { + ... + + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); + +->fiemap is passed struct fiemap_extent_info which describes the +fiemap request: + +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */ +}; + +It is intended that the file system should not need to access any of this +structure directly. + + +Flag checking should be done at the beginning of the ->fiemap callback via the +fiemap_check_flags() helper: + +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + +The struct fieinfo should be passed in as recieved from ioctl_fiemap(). The +set of fiemap flags which the fs understands should be passed via fs_flags. If +fiemap_check_flags finds invalid user flags, it will place the bad values in +fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from +fiemap_check_flags(), it should immediately exit, returning that error back to +ioctl_fiemap(). + + +For each extent in the request range, the file system should call +the helper function, fiemap_fill_next_extent(): + +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags, u32 dev); + +fiemap_fill_next_extent() will use the passed values to populate the +next free extent in the fm_extents array. 'General' extent flags will +automatically be set from specific flags on behalf of the calling file +system so that the userspace API is not broken. + +fiemap_fill_next_extent() returns 0 on success, and 1 when the +user-supplied fm_extents array is full. If an error is encountered +while copying the extent to user memory, -EFAULT will be returned. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f566ad9bcb7..d831d24d2a6 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -923,45 +923,44 @@ CPUs. The "procs_blocked" line gives the number of processes currently blocked, waiting for I/O to complete. + 1.9 Ext4 file system parameters ------------------------------ -Ext4 file system have one directory per partition under /proc/fs/ext4/ -# ls /proc/fs/ext4/hdc/ -group_prealloc max_to_scan mb_groups mb_history min_to_scan order2_req -stats stream_req - -mb_groups: -This file gives the details of multiblock allocator buddy cache of free blocks - -mb_history: -Multiblock allocation history. - -stats: -This file indicate whether the multiblock allocator should start collecting -statistics. The statistics are shown during unmount - -group_prealloc: -The multiblock allocator normalize the block allocation request to -group_prealloc filesystem blocks if we don't have strip value set. -The stripe value can be specified at mount time or during mke2fs. - -max_to_scan: -How long multiblock allocator can look for a best extent (in found extents) - -min_to_scan: -How long multiblock allocator must look for a best extent - -order2_req: -Multiblock allocator use 2^N search using buddies only for requests greater -than or equal to order2_req. The request size is specfied in file system -blocks. A value of 2 indicate only if the requests are greater than or equal -to 4 blocks. - -stream_req: -Files smaller than stream_req are served by the stream allocator, whose -purpose is to pack requests as close each to other as possible to -produce smooth I/O traffic. Avalue of 16 indicate that file smaller than 16 -filesystem block size will use group based preallocation. + +Information about mounted ext4 file systems can be found in +/proc/fs/ext4. Each mounted filesystem will have a directory in +/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or +/proc/fs/ext4/dm-0). The files in each per-device directory are shown +in Table 1-10, below. + +Table 1-10: Files in /proc/fs/ext4/<devname> +.............................................................................. + File Content + mb_groups details of multiblock allocator buddy cache of free blocks + mb_history multiblock allocation history + stats controls whether the multiblock allocator should start + collecting statistics, which are shown during the unmount + group_prealloc the multiblock allocator will round up allocation + requests to a multiple of this tuning parameter if the + stripe size is not set in the ext4 superblock + max_to_scan The maximum number of extents the multiblock allocator + will search to find the best extent + min_to_scan The minimum number of extents the multiblock allocator + will search to find the best extent + order2_req Tuning parameter which controls the minimum size for + requests (as a power of 2) where the buddy cache is + used + stream_req Files which have fewer blocks than this tunable + parameter will have their blocks allocated out of a + block group specific preallocation pool, so that small + files are packed closely together. Each large file + will have its blocks allocated out of its own unique + preallocation pool. +inode_readahead Tuning parameter which controls the maximum number of + inode table blocks that ext4's inode table readahead + algorithm will pre-read into the buffer cache +.............................................................................. + ------------------------------------------------------------------------------ Summary diff --git a/MAINTAINERS b/MAINTAINERS index 68781ed2b73..587f418ed00 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1659,9 +1659,10 @@ L: linux-ext4@vger.kernel.org S: Maintained EXT4 FILE SYSTEM -P: Stephen Tweedie, Andrew Morton -M: sct@redhat.com, akpm@linux-foundation.org, adilger@sun.com +P: Theodore Ts'o +M: tytso@mit.edu, adilger@sun.com L: linux-ext4@vger.kernel.org +W: http://ext4.wiki.kernel.org S: Maintained F71805F HARDWARE MONITORING DRIVER diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 8dd3336efd7..3c578ef78c4 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -369,7 +369,6 @@ static int __init acpi_rtc_init(void) DBG("RTC unavailable?\n"); return 0; } -/* do this between RTC subsys_initcall() and rtc_cmos driver_initcall() */ -fs_initcall(acpi_rtc_init); +module_init(acpi_rtc_init); #endif diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 59fe051957e..5d312dc9be9 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -503,7 +503,7 @@ int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg) scsi_cmd[0] = ATA_16; scsi_cmd[4] = args[2]; - if (args[0] == WIN_SMART) { /* hack -- ide driver does this too... */ + if (args[0] == ATA_CMD_SMART) { /* hack -- ide driver does this too */ scsi_cmd[6] = args[3]; scsi_cmd[8] = args[1]; scsi_cmd[10] = 0x4f; diff --git a/drivers/block/hd.c b/drivers/block/hd.c index 682243bf2e4..482c0c4b964 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c @@ -39,6 +39,7 @@ #include <linux/ioport.h> #include <linux/init.h> #include <linux/blkpg.h> +#include <linux/ata.h> #include <linux/hdreg.h> #define REALLY_SLOW_IO @@ -370,7 +371,7 @@ repeat: struct hd_i_struct *disk = &hd_info[i]; disk->special_op = disk->recalibrate = 1; hd_out(disk, disk->sect, disk->sect, disk->head-1, - disk->cyl, WIN_SPECIFY, &reset_hd); + disk->cyl, ATA_CMD_INIT_DEV_PARAMS, &reset_hd); if (reset) goto repeat; } else @@ -558,7 +559,7 @@ static int do_special_op(struct hd_i_struct *disk, struct request *req) { if (disk->recalibrate) { disk->recalibrate = 0; - hd_out(disk, disk->sect, 0, 0, 0, WIN_RESTORE, &recal_intr); + hd_out(disk, disk->sect, 0, 0, 0, ATA_CMD_RESTORE, &recal_intr); return reset; } if (disk->head > 16) { @@ -631,13 +632,13 @@ repeat: if (blk_fs_request(req)) { switch (rq_data_dir(req)) { case READ: - hd_out(disk, nsect, sec, head, cyl, WIN_READ, + hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ, &read_intr); if (reset) goto repeat; break; case WRITE: - hd_out(disk, nsect, sec, head, cyl, WIN_WRITE, + hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_WRITE, &write_intr); if (reset) goto repeat; diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 052879a6f85..b50b5dac95b 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -131,29 +131,6 @@ config BLK_DEV_IDEDISK If unsure, say Y. -config IDEDISK_MULTI_MODE - bool "Use multiple sector mode for Programmed Input/Output by default" - help - This setting is irrelevant for most IDE disks, with direct memory - access, to which multiple sector mode does not apply. Multiple sector - mode is a feature of most modern IDE hard drives, permitting the - transfer of multiple sectors per Programmed Input/Output interrupt, - rather than the usual one sector per interrupt. When this feature is - enabled, it can reduce operating system overhead for disk Programmed - Input/Output. On some systems, it also can increase the data - throughput of Programmed Input/Output. Some drives, however, seemed - to run slower with multiple sector mode enabled. Some drives claimed - to support multiple sector mode, but lost data at some settings. - Under rare circumstances, such failures could result in massive - filesystem corruption. - - If you get the following error, try to say Y here: - - hda: set_multmode: status=0x51 { DriveReady SeekComplete Error } - hda: set_multmode: error=0x04 { DriveStatusError } - - If in doubt, say N. - config BLK_DEV_IDECS tristate "PCMCIA IDE support" depends on PCMCIA diff --git a/drivers/ide/Makefile b/drivers/ide/Makefile index 64e0ecdc4ed..308b8a12f31 100644 --- a/drivers/ide/Makefile +++ b/drivers/ide/Makefile @@ -4,8 +4,8 @@ EXTRA_CFLAGS += -Idrivers/ide -ide-core-y += ide.o ide-io.o ide-iops.o ide-lib.o ide-probe.o ide-taskfile.o \ - ide-pio-blacklist.o +ide-core-y += ide.o ide-ioctls.o ide-io.o ide-iops.o ide-lib.o ide-probe.o \ + ide-taskfile.o ide-pio-blacklist.o # core IDE code ide-core-$(CONFIG_IDE_TIMINGS) += ide-timings.o @@ -37,11 +37,12 @@ obj-$(CONFIG_IDE_GENERIC) += ide-generic.o obj-$(CONFIG_BLK_DEV_IDEPNP) += ide-pnp.o ide-cd_mod-y += ide-cd.o ide-cd_ioctl.o ide-cd_verbose.o +ide-floppy_mod-y += ide-floppy.o ide-floppy_ioctl.o obj-$(CONFIG_BLK_DEV_IDEDISK) += ide-disk.o obj-$(CONFIG_BLK_DEV_IDECD) += ide-cd_mod.o +obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy_mod.o obj-$(CONFIG_BLK_DEV_IDETAPE) += ide-tape.o -obj-$(CONFIG_BLK_DEV_IDEFLOPPY) += ide-floppy.o ifeq ($(CONFIG_BLK_DEV_IDECS), y) ide-cs-core-y += legacy/ide-cs.o diff --git a/drivers/ide/arm/icside.c b/drivers/ide/arm/icside.c index df4af408395..70f5b164828 100644 --- a/drivers/ide/arm/icside.c +++ b/drivers/ide/arm/icside.c @@ -10,7 +10,6 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/errno.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/dma-mapping.h> #include <linux/device.h> @@ -265,8 +264,8 @@ static void icside_set_dma_mode(ide_drive_t *drive, const u8 xfer_mode) * If we're going to be doing MW_DMA_1 or MW_DMA_2, we should * take care to note the values in the ID... */ - if (use_dma_info && drive->id->eide_dma_time > cycle_time) - cycle_time = drive->id->eide_dma_time; + if (use_dma_info && drive->id[ATA_ID_EIDE_DMA_TIME] > cycle_time) + cycle_time = drive->id[ATA_ID_EIDE_DMA_TIME]; drive->drive_data = cycle_time; diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c index 4fd91dcf1dc..122ed3c072f 100644 --- a/drivers/ide/arm/palm_bk3710.c +++ b/drivers/ide/arm/palm_bk3710.c @@ -27,7 +27,6 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/ioport.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/delay.h> #include <linux/init.h> @@ -180,7 +179,7 @@ static void palm_bk3710_setpiomode(void __iomem *base, ide_drive_t *mate, val32 |= (t2i << (dev ? 8 : 0)); writel(val32, base + BK3710_DATRCVR); - if (mate && mate->present) { + if (mate) { u8 mode2 = ide_get_best_pio_mode(mate, 255, 4); if (mode2 < mode) @@ -213,7 +212,8 @@ static void palm_bk3710_set_dma_mode(ide_drive_t *drive, u8 xferspeed) palm_bk3710_setudmamode(base, is_slave, xferspeed - XFER_UDMA_0); } else { - palm_bk3710_setdmamode(base, is_slave, drive->id->eide_dma_min, + palm_bk3710_setdmamode(base, is_slave, + drive->id[ATA_ID_EIDE_DMA_MIN], xferspeed); } } @@ -229,7 +229,7 @@ static void palm_bk3710_set_pio_mode(ide_drive_t *drive, u8 pio) * Obtain the drive PIO data for tuning the Palm Chip registers */ cycle_time = ide_pio_cycle_time(drive, pio); - mate = ide_get_paired_drive(drive); + mate = ide_get_pair_dev(drive); palm_bk3710_setpiomode(base, mate, is_slave, cycle_time, pio); } diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c index 6f704628c27..2427c380b3d 100644 --- a/drivers/ide/ide-acpi.c +++ b/drivers/ide/ide-acpi.c @@ -584,7 +584,7 @@ void ide_acpi_get_timing(ide_hwif_t *hwif) * This function executes the _STM ACPI method for the target channel. * * _STM requires Identify Drive data, which has to passed as an argument. - * Unfortunately hd_driveid is a mangled version which we can't readily + * Unfortunately drive->id is a mangled version which we can't readily * use; hence we'll get the information afresh. */ void ide_acpi_push_timing(ide_hwif_t *hwif) @@ -614,10 +614,10 @@ void ide_acpi_push_timing(ide_hwif_t *hwif) in_params[0].buffer.length = sizeof(struct GTM_buffer); in_params[0].buffer.pointer = (u8 *)&hwif->acpidata->gtm; in_params[1].type = ACPI_TYPE_BUFFER; - in_params[1].buffer.length = sizeof(struct hd_driveid); + in_params[1].buffer.length = sizeof(ATA_ID_WORDS * 2); in_params[1].buffer.pointer = (u8 *)&master->idbuff; in_params[2].type = ACPI_TYPE_BUFFER; - in_params[2].buffer.length = sizeof(struct hd_driveid); + in_params[2].buffer.length = sizeof(ATA_ID_WORDS * 2); in_params[2].buffer.pointer = (u8 *)&slave->idbuff; /* Output buffer: _STM has no output */ diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c index adf04f99cde..608c5bade92 100644 --- a/drivers/ide/ide-atapi.c +++ b/drivers/ide/ide-atapi.c @@ -14,12 +14,201 @@ #define debug_log(fmt, args...) do {} while (0) #endif +/* + * Check whether we can support a device, + * based on the ATAPI IDENTIFY command results. + */ +int ide_check_atapi_device(ide_drive_t *drive, const char *s) +{ + u16 *id = drive->id; + u8 gcw[2], protocol, device_type, removable, drq_type, packet_size; + + *((u16 *)&gcw) = id[ATA_ID_CONFIG]; + + protocol = (gcw[1] & 0xC0) >> 6; + device_type = gcw[1] & 0x1F; + removable = (gcw[0] & 0x80) >> 7; + drq_type = (gcw[0] & 0x60) >> 5; + packet_size = gcw[0] & 0x03; + +#ifdef CONFIG_PPC + /* kludge for Apple PowerBook internal zip */ + if (drive->media == ide_floppy && device_type == 5 && + !strstr((char *)&id[ATA_ID_PROD], "CD-ROM") && + strstr((char *)&id[ATA_ID_PROD], "ZIP")) + device_type = 0; +#endif + + if (protocol != 2) + printk(KERN_ERR "%s: %s: protocol (0x%02x) is not ATAPI\n", + s, drive->name, protocol); + else if ((drive->media == ide_floppy && device_type != 0) || + (drive->media == ide_tape && device_type != 1)) + printk(KERN_ERR "%s: %s: invalid device type (0x%02x)\n", + s, drive->name, device_type); + else if (removable == 0) + printk(KERN_ERR "%s: %s: the removable flag is not set\n", + s, drive->name); + else if (drive->media == ide_floppy && drq_type == 3) + printk(KERN_ERR "%s: %s: sorry, DRQ type (0x%02x) not " + "supported\n", s, drive->name, drq_type); + else if (packet_size != 0) + printk(KERN_ERR "%s: %s: packet size (0x%02x) is not 12 " + "bytes\n", s, drive->name, packet_size); + else + return 1; + return 0; +} +EXPORT_SYMBOL_GPL(ide_check_atapi_device); + +/* PIO data transfer routine using the scatter gather table. */ +int ide_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, + unsigned int bcount, int write) +{ + ide_hwif_t *hwif = drive->hwif; + const struct ide_tp_ops *tp_ops = hwif->tp_ops; + xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data; + struct scatterlist *sg = pc->sg; + char *buf; + int count, done = 0; + + while (bcount) { + count = min(sg->length - pc->b_count, bcount); + + if (PageHighMem(sg_page(sg))) { + unsigned long flags; + + local_irq_save(flags); + buf = kmap_atomic(sg_page(sg), KM_IRQ0) + sg->offset; + xf(drive, NULL, buf + pc->b_count, count); + kunmap_atomic(buf - sg->offset, KM_IRQ0); + local_irq_restore(flags); + } else { + buf = sg_virt(sg); + xf(drive, NULL, buf + pc->b_count, count); + } + + bcount -= count; + pc->b_count += count; + done += count; + + if (pc->b_count == sg->length) { + if (!--pc->sg_cnt) + break; + pc->sg = sg = sg_next(sg); + pc->b_count = 0; + } + } + + if (bcount) { + printk(KERN_ERR "%s: %d leftover bytes, %s\n", drive->name, + bcount, write ? "padding with zeros" + : "discarding data"); + ide_pad_transfer(drive, write, bcount); + } + + return done; +} +EXPORT_SYMBOL_GPL(ide_io_buffers); + +void ide_init_pc(struct ide_atapi_pc *pc) +{ + memset(pc, 0, sizeof(*pc)); + pc->buf = pc->pc_buf; + pc->buf_size = IDE_PC_BUFFER_SIZE; +} +EXPORT_SYMBOL_GPL(ide_init_pc); + +/* + * Generate a new packet command request in front of the request queue, before + * the current request, so that it will be processed immediately, on the next + * pass through the driver. + */ +void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk, + struct ide_atapi_pc *pc, struct request *rq) +{ + blk_rq_init(NULL, rq); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_flags |= REQ_PREEMPT; + rq->buffer = (char *)pc; + rq->rq_disk = disk; + memcpy(rq->cmd, pc->c, 12); + if (drive->media == ide_tape) + rq->cmd[13] = REQ_IDETAPE_PC1; + ide_do_drive_cmd(drive, rq); +} +EXPORT_SYMBOL_GPL(ide_queue_pc_head); + +/* + * Add a special packet command request to the tail of the request queue, + * and wait for it to be serviced. + */ +int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk, + struct ide_atapi_pc *pc) +{ + struct request *rq; + int error; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->buffer = (char *)pc; + memcpy(rq->cmd, pc->c, 12); + if (drive->media == ide_tape) + rq->cmd[13] = REQ_IDETAPE_PC1; + error = blk_execute_rq(drive->queue, disk, rq, 0); + blk_put_request(rq); + + return error; +} +EXPORT_SYMBOL_GPL(ide_queue_pc_tail); + +int ide_do_test_unit_ready(ide_drive_t *drive, struct gendisk *disk) +{ + struct ide_atapi_pc pc; + + ide_init_pc(&pc); + pc.c[0] = TEST_UNIT_READY; + + return ide_queue_pc_tail(drive, disk, &pc); +} +EXPORT_SYMBOL_GPL(ide_do_test_unit_ready); + +int ide_do_start_stop(ide_drive_t *drive, struct gendisk *disk, int start) +{ + struct ide_atapi_pc pc; + + ide_init_pc(&pc); + pc.c[0] = START_STOP; + pc.c[4] = start; + + if (drive->media == ide_tape) + pc.flags |= PC_FLAG_WAIT_FOR_DSC; + + return ide_queue_pc_tail(drive, disk, &pc); +} +EXPORT_SYMBOL_GPL(ide_do_start_stop); + +int ide_set_media_lock(ide_drive_t *drive, struct gendisk *disk, int on) +{ + struct ide_atapi_pc pc; + + if (drive->atapi_flags & IDE_AFLAG_NO_DOORLOCK) + return 0; + + ide_init_pc(&pc); + pc.c[0] = ALLOW_MEDIUM_REMOVAL; + pc.c[4] = on; + + return ide_queue_pc_tail(drive, disk, &pc); +} +EXPORT_SYMBOL_GPL(ide_set_media_lock); + /* TODO: unify the code thus making some arguments go away */ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), - void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int)) + int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned, int)) { ide_hwif_t *hwif = drive->hwif; struct request *rq = hwif->hwgroup->rq; @@ -41,7 +230,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, if (pc->flags & PC_FLAG_DMA_IN_PROGRESS) { if (hwif->dma_ops->dma_end(drive) || - (drive->media == ide_tape && !scsi && (stat & ERR_STAT))) { + (drive->media == ide_tape && !scsi && (stat & ATA_ERR))) { if (drive->media == ide_floppy && !scsi) printk(KERN_ERR "%s: DMA %s error\n", drive->name, rq_data_dir(pc->rq) @@ -56,7 +245,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, } /* No more interrupts */ - if ((stat & DRQ_STAT) == 0) { + if ((stat & ATA_DRQ) == 0) { debug_log("Packet command completed, %d bytes transferred\n", pc->xferred); @@ -65,10 +254,10 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, local_irq_enable_in_hardirq(); if (drive->media == ide_tape && !scsi && - (stat & ERR_STAT) && rq->cmd[0] == REQUEST_SENSE) - stat &= ~ERR_STAT; + (stat & ATA_ERR) && rq->cmd[0] == REQUEST_SENSE) + stat &= ~ATA_ERR; - if ((stat & ERR_STAT) || (pc->flags & PC_FLAG_DMA_ERROR)) { + if ((stat & ATA_ERR) || (pc->flags & PC_FLAG_DMA_ERROR)) { /* Error detected */ debug_log("%s: I/O error\n", drive->name); @@ -95,7 +284,7 @@ ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, cmd_finished: pc->error = 0; if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && - (stat & SEEK_STAT) == 0) { + (stat & ATA_DSC) == 0) { dsc_handle(drive); return ide_stopped; } @@ -117,17 +306,18 @@ cmd_finished: /* Get the number of bytes to transfer on this interrupt. */ ide_read_bcount_and_ireason(drive, &bcount, &ireason); - if (ireason & CD) { + if (ireason & ATAPI_COD) { printk(KERN_ERR "%s: CoD != 0 in %s\n", drive->name, __func__); return ide_do_reset(drive); } - if (((ireason & IO) == IO) == !!(pc->flags & PC_FLAG_WRITING)) { + if (((ireason & ATAPI_IO) == ATAPI_IO) == + !!(pc->flags & PC_FLAG_WRITING)) { /* Hopefully, we will never get here */ printk(KERN_ERR "%s: We wanted to %s, but the device wants us " "to %s!\n", drive->name, - (ireason & IO) ? "Write" : "Read", - (ireason & IO) ? "Read" : "Write"); + (ireason & ATAPI_IO) ? "Write" : "Read", + (ireason & ATAPI_IO) ? "Read" : "Write"); return ide_do_reset(drive); } @@ -171,9 +361,14 @@ cmd_finished: if ((drive->media == ide_floppy && !scsi && !pc->buf) || (drive->media == ide_tape && !scsi && pc->bh) || - (scsi && pc->sg)) - io_buffers(drive, pc, bcount, !!(pc->flags & PC_FLAG_WRITING)); - else + (scsi && pc->sg)) { + int done = io_buffers(drive, pc, bcount, + !!(pc->flags & PC_FLAG_WRITING)); + + /* FIXME: don't do partial completions */ + if (drive->media == ide_floppy && !scsi) + ide_end_request(drive, 1, done >> 9); + } else xferfunc(drive, NULL, pc->cur_pos, bcount); /* Update the current position */ @@ -205,7 +400,8 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason) { int retries = 100; - while (retries-- && ((ireason & CD) == 0 || (ireason & IO))) { + while (retries-- && ((ireason & ATAPI_COD) == 0 || + (ireason & ATAPI_IO))) { printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " "a packet command, retrying\n", drive->name); udelay(100); @@ -214,8 +410,8 @@ static u8 ide_wait_ireason(ide_drive_t *drive, u8 ireason) printk(KERN_ERR "%s: (IO,CoD != (0,1) while issuing " "a packet command, ignoring\n", drive->name); - ireason |= CD; - ireason &= ~IO; + ireason |= ATAPI_COD; + ireason &= ~ATAPI_IO; } } @@ -231,7 +427,7 @@ ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_startstop_t startstop; u8 ireason; - if (ide_wait_stat(&startstop, drive, DRQ_STAT, BUSY_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, ATA_DRQ, ATA_BUSY, WAIT_READY)) { printk(KERN_ERR "%s: Strange, packet command initiated yet " "DRQ isn't asserted\n", drive->name); return startstop; @@ -241,7 +437,7 @@ ide_startstop_t ide_transfer_pc(ide_drive_t *drive, struct ide_atapi_pc *pc, if (drive->media == ide_tape && !drive->scsi) ireason = ide_wait_ireason(drive, ireason); - if ((ireason & CD) == 0 || (ireason & IO)) { + if ((ireason & ATAPI_COD) == 0 || (ireason & ATAPI_IO)) { printk(KERN_ERR "%s: (IO,CoD) != (0,1) while issuing " "a packet command\n", drive->name); return ide_do_reset(drive); @@ -303,7 +499,7 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_atapi_pc *pc, /* Issue the packet command */ if (drive->atapi_flags & IDE_AFLAG_DRQ_INTERRUPT) { - ide_execute_command(drive, WIN_PACKETCMD, handler, + ide_execute_command(drive, ATA_CMD_PACKET, handler, timeout, NULL); return ide_started; } else { diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 03c2cb6a58b..465a92ca017 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -436,7 +436,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) ide_dump_status_no_sense(drive, "media error (blank)", stat); do_end_request = 1; - } else if ((err & ~ABRT_ERR) != 0) { + } else if ((err & ~ATA_ABORTED) != 0) { /* go to the default handler for other errors */ ide_error(drive, "cdrom_decode_status", stat); return 1; @@ -457,7 +457,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) * If we got a CHECK_CONDITION status, queue * a request sense command. */ - if (stat & ERR_STAT) + if (stat & ATA_ERR) cdrom_queue_request_sense(drive, NULL, NULL); } else { blk_dump_rq_flags(rq, "ide-cd: bad rq"); @@ -468,7 +468,7 @@ static int cdrom_decode_status(ide_drive_t *drive, int good_stat, int *stat_ret) return 1; end_request: - if (stat & ERR_STAT) { + if (stat & ATA_ERR) { unsigned long flags; spin_lock_irqsave(&ide_lock, flags); @@ -541,7 +541,7 @@ static ide_startstop_t cdrom_start_packet_command(ide_drive_t *drive, drive->waiting_for_dma = 0; /* packet command */ - ide_execute_command(drive, WIN_PACKETCMD, handler, + ide_execute_command(drive, ATA_CMD_PACKET, handler, ATAPI_WAIT_PC, cdrom_timer_expiry); return ide_started; } else { @@ -574,7 +574,7 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, */ /* check for errors */ - if (cdrom_decode_status(drive, DRQ_STAT, NULL)) + if (cdrom_decode_status(drive, ATA_DRQ, NULL)) return ide_stopped; /* ok, next interrupt will be DMA interrupt */ @@ -582,8 +582,8 @@ static ide_startstop_t cdrom_transfer_packet_command(ide_drive_t *drive, drive->waiting_for_dma = 1; } else { /* otherwise, we must wait for DRQ to get set */ - if (ide_wait_stat(&startstop, drive, DRQ_STAT, - BUSY_STAT, WAIT_READY)) + if (ide_wait_stat(&startstop, drive, ATA_DRQ, + ATA_BUSY, WAIT_READY)) return startstop; } @@ -938,7 +938,7 @@ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive) thislen = len; /* If DRQ is clear, the command has completed. */ - if ((stat & DRQ_STAT) == 0) { + if ((stat & ATA_DRQ) == 0) { if (blk_fs_request(rq)) { /* * If we're not done reading/writing, complain. @@ -1164,13 +1164,12 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) { struct request_queue *q = drive->queue; unsigned int alignment; - unsigned long addr; - unsigned long stack_mask = ~(THREAD_SIZE - 1); + char *buf; if (rq->bio) - addr = (unsigned long)bio_data(rq->bio); + buf = bio_data(rq->bio); else - addr = (unsigned long)rq->data; + buf = rq->data; info->dma = drive->using_dma; @@ -1181,11 +1180,8 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq) * separate masks. */ alignment = queue_dma_alignment(q) | q->dma_pad_mask; - if (addr & alignment || rq->data_len & alignment) - info->dma = 0; - - if (!((addr & stack_mask) ^ - ((unsigned long)current->stack & stack_mask))) + if ((unsigned long)buf & alignment || rq->data_len & alignment + || object_is_on_stack(buf)) info->dma = 0; } } @@ -1206,7 +1202,7 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq, unsigned long elapsed = jiffies - info->start_seek; int stat = hwif->tp_ops->read_status(hwif); - if ((stat & SEEK_STAT) != SEEK_STAT) { + if ((stat & ATA_DSC) != ATA_DSC) { if (elapsed < IDECD_SEEK_TIMEOUT) { ide_stall_queue(drive, IDECD_SEEK_TIMER); @@ -1813,13 +1809,12 @@ static ide_proc_entry_t idecd_proc[] = { { NULL, 0, NULL, NULL } }; -static void ide_cdrom_add_settings(ide_drive_t *drive) -{ - ide_add_setting(drive, "dsc_overlap", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, - &drive->dsc_overlap, NULL); -} -#else -static inline void ide_cdrom_add_settings(ide_drive_t *drive) { ; } +ide_devset_rw_field(dsc_overlap, dsc_overlap); + +static const struct ide_proc_devset idecd_settings[] = { + IDE_PROC_DEVSET(dsc_overlap, 0, 1), + { 0 }, +}; #endif static const struct cd_list_entry ide_cd_quirks_list[] = { @@ -1866,14 +1861,14 @@ static const struct cd_list_entry ide_cd_quirks_list[] = { { NULL, NULL, 0 } }; -static unsigned int ide_cd_flags(struct hd_driveid *id) +static unsigned int ide_cd_flags(u16 *id) { const struct cd_list_entry *cle = ide_cd_quirks_list; while (cle->id_model) { - if (strcmp(cle->id_model, id->model) == 0 && + if (strcmp(cle->id_model, (char *)&id[ATA_ID_PROD]) == 0 && (cle->id_firmware == NULL || - strstr(id->fw_rev, cle->id_firmware))) + strstr((char *)&id[ATA_ID_FW_REV], cle->id_firmware))) return cle->cd_flags; cle++; } @@ -1885,7 +1880,8 @@ static int ide_cdrom_setup(ide_drive_t *drive) { struct cdrom_info *cd = drive->driver_data; struct cdrom_device_info *cdi = &cd->devinfo; - struct hd_driveid *id = drive->id; + u16 *id = drive->id; + char *fw_rev = (char *)&id[ATA_ID_FW_REV]; int nslots; blk_queue_prep_rq(drive->queue, ide_cdrom_prep_fn); @@ -1900,15 +1896,15 @@ static int ide_cdrom_setup(ide_drive_t *drive) drive->atapi_flags = IDE_AFLAG_MEDIA_CHANGED | IDE_AFLAG_NO_EJECT | ide_cd_flags(id); - if ((id->config & 0x0060) == 0x20) + if ((id[ATA_ID_CONFIG] & 0x0060) == 0x20) drive->atapi_flags |= IDE_AFLAG_DRQ_INTERRUPT; if ((drive->atapi_flags & IDE_AFLAG_VERTOS_300_SSD) && - id->fw_rev[4] == '1' && id->fw_rev[6] <= '2') + fw_rev[4] == '1' && fw_rev[6] <= '2') drive->atapi_flags |= (IDE_AFLAG_TOCTRACKS_AS_BCD | IDE_AFLAG_TOCADDR_AS_BCD); else if ((drive->atapi_flags & IDE_AFLAG_VERTOS_600_ESD) && - id->fw_rev[4] == '1' && id->fw_rev[6] <= '2') + fw_rev[4] == '1' && fw_rev[6] <= '2') drive->atapi_flags |= IDE_AFLAG_TOCTRACKS_AS_BCD; else if (drive->atapi_flags & IDE_AFLAG_SANYO_3CD) /* 3 => use CD in slot 0 */ @@ -1927,7 +1923,8 @@ static int ide_cdrom_setup(ide_drive_t *drive) cd->devinfo.handle = NULL; return 1; } - ide_cdrom_add_settings(drive); + + ide_proc_register_driver(drive, cd->driver); return 0; } @@ -1972,12 +1969,12 @@ static ide_driver_t ide_cdrom_driver = { .remove = ide_cd_remove, .version = IDECD_VERSION, .media = ide_cdrom, - .supports_dsc_overlap = 1, .do_request = ide_cd_do_request, .end_request = ide_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idecd_proc, + .settings = idecd_settings, #endif }; @@ -2112,10 +2109,10 @@ static int ide_cd_probe(ide_drive_t *drive) if (!strstr("ide-cdrom", drive->driver_req)) goto failed; - if (!drive->present) - goto failed; + if (drive->media != ide_cdrom && drive->media != ide_optical) goto failed; + /* skip drives that we were told to ignore */ if (ignore != NULL) { if (strstr(ignore, drive->name)) { @@ -2137,8 +2134,6 @@ static int ide_cd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - ide_proc_register_driver(drive, &ide_cdrom_driver); - kref_init(&info->kref); info->drive = drive; @@ -2153,7 +2148,6 @@ static int ide_cd_probe(ide_drive_t *drive) g->driverfs_dev = &drive->gendev; g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE; if (ide_cdrom_setup(drive)) { - ide_proc_unregister_driver(drive, &ide_cdrom_driver); ide_cd_release(&info->kref); goto failed; } diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 33ea8c04871..01846f244b4 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -30,10 +30,8 @@ #include <linux/delay.h> #include <linux/mutex.h> #include <linux/leds.h> - -#define _IDE_DISK - #include <linux/ide.h> +#include <linux/hdreg.h> #include <asm/byteorder.h> #include <asm/irq.h> @@ -90,68 +88,19 @@ static void ide_disk_put(struct ide_disk_obj *idkp) mutex_unlock(&idedisk_ref_mutex); } -/* - * lba_capacity_is_ok() performs a sanity check on the claimed "lba_capacity" - * value for this drive (from its reported identification information). - * - * Returns: 1 if lba_capacity looks sensible - * 0 otherwise - * - * It is called only once for each drive. - */ -static int lba_capacity_is_ok(struct hd_driveid *id) -{ - unsigned long lba_sects, chs_sects, head, tail; - - /* No non-LBA info .. so valid! */ - if (id->cyls == 0) - return 1; - - /* - * The ATA spec tells large drives to return - * C/H/S = 16383/16/63 independent of their size. - * Some drives can be jumpered to use 15 heads instead of 16. - * Some drives can be jumpered to use 4092 cyls instead of 16383. - */ - if ((id->cyls == 16383 - || (id->cyls == 4092 && id->cur_cyls == 16383)) && - id->sectors == 63 && - (id->heads == 15 || id->heads == 16) && - (id->lba_capacity >= 16383*63*id->heads)) - return 1; - - lba_sects = id->lba_capacity; - chs_sects = id->cyls * id->heads * id->sectors; - - /* perform a rough sanity check on lba_sects: within 10% is OK */ - if ((lba_sects - chs_sects) < chs_sects/10) - return 1; - - /* some drives have the word order reversed */ - head = ((lba_sects >> 16) & 0xffff); - tail = (lba_sects & 0xffff); - lba_sects = (head | (tail << 16)); - if ((lba_sects - chs_sects) < chs_sects/10) { - id->lba_capacity = lba_sects; - return 1; /* lba_capacity is (now) good */ - } - - return 0; /* lba_capacity value may be bad */ -} - static const u8 ide_rw_cmds[] = { - WIN_MULTREAD, - WIN_MULTWRITE, - WIN_MULTREAD_EXT, - WIN_MULTWRITE_EXT, - WIN_READ, - WIN_WRITE, - WIN_READ_EXT, - WIN_WRITE_EXT, - WIN_READDMA, - WIN_WRITEDMA, - WIN_READDMA_EXT, - WIN_WRITEDMA_EXT, + ATA_CMD_READ_MULTI, + ATA_CMD_WRITE_MULTI, + ATA_CMD_READ_MULTI_EXT, + ATA_CMD_WRITE_MULTI_EXT, + ATA_CMD_PIO_READ, + ATA_CMD_PIO_WRITE, + ATA_CMD_PIO_READ_EXT, + ATA_CMD_PIO_WRITE_EXT, + ATA_CMD_READ, + ATA_CMD_WRITE, + ATA_CMD_READ_EXT, + ATA_CMD_WRITE_EXT, }; static const u8 ide_data_phases[] = { @@ -322,9 +271,9 @@ static u64 idedisk_read_native_max_address(ide_drive_t *drive, int lba48) /* Create IDE/ATA command request structure */ memset(&args, 0, sizeof(ide_task_t)); if (lba48) - tf->command = WIN_READ_NATIVE_MAX_EXT; + tf->command = ATA_CMD_READ_NATIVE_MAX_EXT; else - tf->command = WIN_READ_NATIVE_MAX; + tf->command = ATA_CMD_READ_NATIVE_MAX; tf->device = ATA_LBA; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; if (lba48) @@ -359,10 +308,10 @@ static u64 idedisk_set_max_address(ide_drive_t *drive, u64 addr_req, int lba48) tf->hob_lbal = (addr_req >>= 8) & 0xff; tf->hob_lbam = (addr_req >>= 8) & 0xff; tf->hob_lbah = (addr_req >>= 8) & 0xff; - tf->command = WIN_SET_MAX_EXT; + tf->command = ATA_CMD_SET_MAX_EXT; } else { tf->device = (addr_req >>= 8) & 0x0f; - tf->command = WIN_SET_MAX; + tf->command = ATA_CMD_SET_MAX; } tf->device |= ATA_LBA; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; @@ -385,25 +334,6 @@ static unsigned long long sectors_to_MB(unsigned long long n) } /* - * Bits 10 of command_set_1 and cfs_enable_1 must be equal, - * so on non-buggy drives we need test only one. - * However, we should also check whether these fields are valid. - */ -static inline int idedisk_supports_hpa(const struct hd_driveid *id) -{ - return (id->command_set_1 & 0x0400) && (id->cfs_enable_1 & 0x0400); -} - -/* - * The same here. - */ -static inline int idedisk_supports_lba48(const struct hd_driveid *id) -{ - return (id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400) - && id->lba_capacity_2; -} - -/* * Some disks report total number of sectors instead of * maximum sector address. We list them here. */ @@ -417,7 +347,7 @@ static const struct drive_list_entry hpa_list[] = { static void idedisk_check_hpa(ide_drive_t *drive) { unsigned long long capacity, set_max; - int lba48 = idedisk_supports_lba48(drive->id); + int lba48 = ata_id_lba48_enabled(drive->id); capacity = drive->capacity64; @@ -453,23 +383,23 @@ static void idedisk_check_hpa(ide_drive_t *drive) static void init_idedisk_capacity(ide_drive_t *drive) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; /* * If this drive supports the Host Protected Area feature set, * then we may need to change our opinion about the drive's capacity. */ - int hpa = idedisk_supports_hpa(id); + int hpa = ata_id_hpa_enabled(id); - if (idedisk_supports_lba48(id)) { + if (ata_id_lba48_enabled(id)) { /* drive speaks 48-bit LBA */ drive->select.b.lba = 1; - drive->capacity64 = id->lba_capacity_2; + drive->capacity64 = ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); if (hpa) idedisk_check_hpa(drive); - } else if ((id->capability & 2) && lba_capacity_is_ok(id)) { + } else if (ata_id_has_lba(id) && ata_id_is_lba_capacity_ok(id)) { /* drive speaks 28-bit LBA */ drive->select.b.lba = 1; - drive->capacity64 = id->lba_capacity; + drive->capacity64 = ata_id_u32(id, ATA_ID_LBA_CAPACITY); if (hpa) idedisk_check_hpa(drive); } else { @@ -480,7 +410,7 @@ static void init_idedisk_capacity(ide_drive_t *drive) static sector_t idedisk_capacity(ide_drive_t *drive) { - return drive->capacity64 - drive->sect0; + return drive->capacity64; } #ifdef CONFIG_IDE_PROC_FS @@ -490,10 +420,10 @@ static int smart_enable(ide_drive_t *drive) struct ide_taskfile *tf = &args.tf; memset(&args, 0, sizeof(ide_task_t)); - tf->feature = SMART_ENABLE; - tf->lbam = SMART_LCYL_PASS; - tf->lbah = SMART_HCYL_PASS; - tf->command = WIN_SMART; + tf->feature = ATA_SMART_ENABLE; + tf->lbam = ATA_SMART_LBAM_PASS; + tf->lbah = ATA_SMART_LBAH_PASS; + tf->command = ATA_CMD_SMART; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; return ide_no_data_taskfile(drive, &args); } @@ -506,9 +436,9 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd) memset(&args, 0, sizeof(ide_task_t)); tf->feature = sub_cmd; tf->nsect = 0x01; - tf->lbam = SMART_LCYL_PASS; - tf->lbah = SMART_HCYL_PASS; - tf->command = WIN_SMART; + tf->lbam = ATA_SMART_LBAM_PASS; + tf->lbah = ATA_SMART_LBAH_PASS; + tf->command = ATA_CMD_SMART; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; args.data_phase = TASKFILE_IN; (void) smart_enable(drive); @@ -523,7 +453,7 @@ static int proc_idedisk_read_cache int len; if (drive->id_read) - len = sprintf(out, "%i\n", drive->id->buf_size / 2); + len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2); else len = sprintf(out, "(none)\n"); @@ -549,13 +479,14 @@ static int proc_idedisk_read_smart(char *page, char **start, off_t off, if (get_smart_data(drive, page, sub_cmd) == 0) { unsigned short *val = (unsigned short *) page; - char *out = ((char *)val) + (SECTOR_WORDS * 4); + char *out = (char *)val + SECTOR_SIZE; + page = out; do { out += sprintf(out, "%04x%c", le16_to_cpu(*val), (++i & 7) ? ' ' : '\n'); val += 1; - } while (i < (SECTOR_WORDS * 2)); + } while (i < SECTOR_SIZE / 2); len = out - page; } @@ -566,14 +497,14 @@ static int proc_idedisk_read_sv (char *page, char **start, off_t off, int count, int *eof, void *data) { return proc_idedisk_read_smart(page, start, off, count, eof, data, - SMART_READ_VALUES); + ATA_SMART_READ_VALUES); } static int proc_idedisk_read_st (char *page, char **start, off_t off, int count, int *eof, void *data) { return proc_idedisk_read_smart(page, start, off, count, eof, data, - SMART_READ_THRESHOLDS); + ATA_SMART_READ_THRESHOLDS); } static ide_proc_entry_t idedisk_proc[] = { @@ -595,11 +526,11 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) BUG_ON(task == NULL); memset(task, 0, sizeof(*task)); - if (ide_id_has_flush_cache_ext(drive->id) && + if (ata_id_flush_ext_enabled(drive->id) && (drive->capacity64 >= (1UL << 28))) - task->tf.command = WIN_FLUSH_CACHE_EXT; + task->tf.command = ATA_CMD_FLUSH_EXT; else - task->tf.command = WIN_FLUSH_CACHE; + task->tf.command = ATA_CMD_FLUSH; task->tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE | IDE_TFLAG_DYN; task->data_phase = TASKFILE_NO_DATA; @@ -609,6 +540,8 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) rq->special = task; } +ide_devset_get(multcount, mult_count); + /* * This is tightly woven into the driver->do_special can not touch. * DON'T do it again until a total personality rewrite is committed. @@ -618,7 +551,7 @@ static int set_multcount(ide_drive_t *drive, int arg) struct request *rq; int error; - if (arg < 0 || arg > drive->id->max_multsect) + if (arg < 0 || arg > (drive->id[ATA_ID_MAX_MULTSECT] & 0xff)) return -EINVAL; if (drive->special.b.set_multmode) @@ -635,22 +568,21 @@ static int set_multcount(ide_drive_t *drive, int arg) return (drive->mult_count == arg) ? 0 : -EIO; } +ide_devset_get(nowerr, nowerr); + static int set_nowerr(ide_drive_t *drive, int arg) { if (arg < 0 || arg > 1) return -EINVAL; - if (ide_spin_wait_hwgroup(drive)) - return -EBUSY; drive->nowerr = arg; drive->bad_wstat = arg ? BAD_R_STAT : BAD_W_STAT; - spin_unlock_irq(&ide_lock); return 0; } static void update_ordered(ide_drive_t *drive) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; unsigned ordered = QUEUE_ORDERED_NONE; prepare_flush_fn *prep_fn = NULL; @@ -666,9 +598,9 @@ static void update_ordered(ide_drive_t *drive) * not available so we don't need to recheck that. */ capacity = idedisk_capacity(drive); - barrier = ide_id_has_flush_cache(id) && !drive->noflush && + barrier = ata_id_flush_enabled(id) && !drive->noflush && (drive->addressing == 0 || capacity <= (1ULL << 28) || - ide_id_has_flush_cache_ext(id)); + ata_id_flush_ext_enabled(id)); printk(KERN_INFO "%s: cache flushes %ssupported\n", drive->name, barrier ? "" : "not "); @@ -683,7 +615,9 @@ static void update_ordered(ide_drive_t *drive) blk_queue_ordered(drive->queue, ordered, prep_fn); } -static int write_cache(ide_drive_t *drive, int arg) +ide_devset_get(wcache, wcache); + +static int set_wcache(ide_drive_t *drive, int arg) { ide_task_t args; int err = 1; @@ -691,11 +625,11 @@ static int write_cache(ide_drive_t *drive, int arg) if (arg < 0 || arg > 1) return -EINVAL; - if (ide_id_has_flush_cache(drive->id)) { + if (ata_id_flush_enabled(drive->id)) { memset(&args, 0, sizeof(ide_task_t)); args.tf.feature = arg ? - SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; - args.tf.command = WIN_SETFEATURES; + SETFEATURES_WC_ON : SETFEATURES_WC_OFF; + args.tf.command = ATA_CMD_SET_FEATURES; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; err = ide_no_data_taskfile(drive, &args); if (err == 0) @@ -712,14 +646,16 @@ static int do_idedisk_flushcache(ide_drive_t *drive) ide_task_t args; memset(&args, 0, sizeof(ide_task_t)); - if (ide_id_has_flush_cache_ext(drive->id)) - args.tf.command = WIN_FLUSH_CACHE_EXT; + if (ata_id_flush_ext_enabled(drive->id)) + args.tf.command = ATA_CMD_FLUSH_EXT; else - args.tf.command = WIN_FLUSH_CACHE; + args.tf.command = ATA_CMD_FLUSH; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; return ide_no_data_taskfile(drive, &args); } +ide_devset_get(acoustic, acoustic); + static int set_acoustic(ide_drive_t *drive, int arg) { ide_task_t args; @@ -728,22 +664,24 @@ static int set_acoustic(ide_drive_t *drive, int arg) return -EINVAL; memset(&args, 0, sizeof(ide_task_t)); - args.tf.feature = arg ? SETFEATURES_EN_AAM : SETFEATURES_DIS_AAM; + args.tf.feature = arg ? SETFEATURES_AAM_ON : SETFEATURES_AAM_OFF; args.tf.nsect = arg; - args.tf.command = WIN_SETFEATURES; + args.tf.command = ATA_CMD_SET_FEATURES; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; ide_no_data_taskfile(drive, &args); drive->acoustic = arg; return 0; } +ide_devset_get(addressing, addressing); + /* * drive->addressing: * 0: 28-bit * 1: 48-bit * 2: 48-bit capable doing 28-bit */ -static int set_lba_addressing(ide_drive_t *drive, int arg) +static int set_addressing(ide_drive_t *drive, int arg) { if (arg < 0 || arg > 2) return -EINVAL; @@ -753,52 +691,54 @@ static int set_lba_addressing(ide_drive_t *drive, int arg) if (drive->hwif->host_flags & IDE_HFLAG_NO_LBA48) return 0; - if (!idedisk_supports_lba48(drive->id)) + if (ata_id_lba48_enabled(drive->id) == 0) return -EIO; + drive->addressing = arg; + return 0; } +ide_devset_rw(acoustic, acoustic); +ide_devset_rw(address, addressing); +ide_devset_rw(multcount, multcount); +ide_devset_rw(wcache, wcache); + +ide_devset_rw_sync(nowerr, nowerr); + #ifdef CONFIG_IDE_PROC_FS -static void idedisk_add_settings(ide_drive_t *drive) -{ - struct hd_driveid *id = drive->id; - - ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 65535, 1, 1, - &drive->bios_cyl, NULL); - ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, - &drive->bios_head, NULL); - ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, - &drive->bios_sect, NULL); - ide_add_setting(drive, "address", SETTING_RW, TYPE_BYTE, 0, 2, 1, 1, - &drive->addressing, set_lba_addressing); - ide_add_setting(drive, "multcount", SETTING_RW, TYPE_BYTE, 0, - id->max_multsect, 1, 1, &drive->mult_count, - set_multcount); - ide_add_setting(drive, "nowerr", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, - &drive->nowerr, set_nowerr); - ide_add_setting(drive, "lun", SETTING_RW, TYPE_INT, 0, 7, 1, 1, - &drive->lun, NULL); - ide_add_setting(drive, "wcache", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, - &drive->wcache, write_cache); - ide_add_setting(drive, "acoustic", SETTING_RW, TYPE_BYTE, 0, 254, 1, 1, - &drive->acoustic, set_acoustic); - ide_add_setting(drive, "failures", SETTING_RW, TYPE_INT, 0, 65535, 1, 1, - &drive->failures, NULL); - ide_add_setting(drive, "max_failures", SETTING_RW, TYPE_INT, 0, 65535, - 1, 1, &drive->max_failures, NULL); -} -#else -static inline void idedisk_add_settings(ide_drive_t *drive) { ; } +ide_devset_rw_field(bios_cyl, bios_cyl); +ide_devset_rw_field(bios_head, bios_head); +ide_devset_rw_field(bios_sect, bios_sect); +ide_devset_rw_field(failures, failures); +ide_devset_rw_field(lun, lun); +ide_devset_rw_field(max_failures, max_failures); + +static const struct ide_proc_devset idedisk_settings[] = { + IDE_PROC_DEVSET(acoustic, 0, 254), + IDE_PROC_DEVSET(address, 0, 2), + IDE_PROC_DEVSET(bios_cyl, 0, 65535), + IDE_PROC_DEVSET(bios_head, 0, 255), + IDE_PROC_DEVSET(bios_sect, 0, 63), + IDE_PROC_DEVSET(failures, 0, 65535), + IDE_PROC_DEVSET(lun, 0, 7), + IDE_PROC_DEVSET(max_failures, 0, 65535), + IDE_PROC_DEVSET(multcount, 0, 16), + IDE_PROC_DEVSET(nowerr, 0, 1), + IDE_PROC_DEVSET(wcache, 0, 1), + { 0 }, +}; #endif static void idedisk_setup(ide_drive_t *drive) { + struct ide_disk_obj *idkp = drive->driver_data; ide_hwif_t *hwif = drive->hwif; - struct hd_driveid *id = drive->id; + u16 *id = drive->id; + char *m = (char *)&id[ATA_ID_PROD]; unsigned long long capacity; - idedisk_add_settings(drive); + ide_proc_register_driver(drive, idkp->driver); if (drive->id_read == 0) return; @@ -807,11 +747,11 @@ static void idedisk_setup(ide_drive_t *drive) /* * Removable disks (eg. SYQUEST); ignore 'WD' drives */ - if (id->model[0] != 'W' || id->model[1] != 'D') + if (m[0] != 'W' || m[1] != 'D') drive->doorlocking = 1; } - (void)set_lba_addressing(drive, 1); + (void)set_addressing(drive, 1); if (drive->addressing == 1) { int max_s = 2048; @@ -853,8 +793,7 @@ static void idedisk_setup(ide_drive_t *drive) capacity = idedisk_capacity(drive); if (!drive->forced_geom) { - - if (idedisk_supports_lba48(drive->id)) { + if (ata_id_lba48_enabled(drive->id)) { /* compatibility */ drive->bios_sect = 63; drive->bios_head = 255; @@ -880,22 +819,22 @@ static void idedisk_setup(ide_drive_t *drive) drive->name, capacity, sectors_to_MB(capacity)); /* Only print cache size when it was specified */ - if (id->buf_size) - printk(KERN_CONT " w/%dKiB Cache", id->buf_size / 2); + if (id[ATA_ID_BUF_SIZE]) + printk(KERN_CONT " w/%dKiB Cache", id[ATA_ID_BUF_SIZE] / 2); printk(KERN_CONT ", CHS=%d/%d/%d\n", drive->bios_cyl, drive->bios_head, drive->bios_sect); /* write cache enabled? */ - if ((id->csfo & 1) || (id->cfs_enable_1 & (1 << 5))) + if ((id[ATA_ID_CSFO] & 1) || ata_id_wcache_enabled(id)) drive->wcache = 1; - write_cache(drive, 1); + set_wcache(drive, 1); } static void ide_cacheflush_p(ide_drive_t *drive) { - if (!drive->wcache || !ide_id_has_flush_cache(drive->id)) + if (!drive->wcache || ata_id_flush_enabled(drive->id) == 0) return; if (do_idedisk_flushcache(drive)) @@ -937,7 +876,7 @@ static int ide_disk_probe(ide_drive_t *drive); */ static void ide_disk_resume(ide_drive_t *drive) { - if (idedisk_supports_hpa(drive->id)) + if (ata_id_hpa_enabled(drive->id)) init_idedisk_capacity(drive); } @@ -980,12 +919,12 @@ static ide_driver_t idedisk_driver = { .shutdown = ide_device_shutdown, .version = IDEDISK_VERSION, .media = ide_disk, - .supports_dsc_overlap = 0, .do_request = ide_do_rw_disk, .end_request = ide_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idedisk_proc, + .settings = idedisk_settings, #endif }; @@ -994,7 +933,7 @@ static int idedisk_set_doorlock(ide_drive_t *drive, int on) ide_task_t task; memset(&task, 0, sizeof(task)); - task.tf.command = on ? WIN_DOORLOCK : WIN_DOORUNLOCK; + task.tf.command = on ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; return ide_no_data_taskfile(drive, &task); @@ -1059,52 +998,28 @@ static int idedisk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } +static const struct ide_ioctl_devset ide_disk_ioctl_settings[] = { +{ HDIO_GET_ADDRESS, HDIO_SET_ADDRESS, &ide_devset_address }, +{ HDIO_GET_MULTCOUNT, HDIO_SET_MULTCOUNT, &ide_devset_multcount }, +{ HDIO_GET_NOWERR, HDIO_SET_NOWERR, &ide_devset_nowerr }, +{ HDIO_GET_WCACHE, HDIO_SET_WCACHE, &ide_devset_wcache }, +{ HDIO_GET_ACOUSTIC, HDIO_SET_ACOUSTIC, &ide_devset_acoustic }, +{ 0 } +}; + static int idedisk_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - unsigned long flags; struct block_device *bdev = inode->i_bdev; struct ide_disk_obj *idkp = ide_disk_g(bdev->bd_disk); ide_drive_t *drive = idkp->drive; - int err, (*setfunc)(ide_drive_t *, int); - u8 *val; - - switch (cmd) { - case HDIO_GET_ADDRESS: val = &drive->addressing; goto read_val; - case HDIO_GET_MULTCOUNT: val = &drive->mult_count; goto read_val; - case HDIO_GET_NOWERR: val = &drive->nowerr; goto read_val; - case HDIO_GET_WCACHE: val = &drive->wcache; goto read_val; - case HDIO_GET_ACOUSTIC: val = &drive->acoustic; goto read_val; - case HDIO_SET_ADDRESS: setfunc = set_lba_addressing; goto set_val; - case HDIO_SET_MULTCOUNT: setfunc = set_multcount; goto set_val; - case HDIO_SET_NOWERR: setfunc = set_nowerr; goto set_val; - case HDIO_SET_WCACHE: setfunc = write_cache; goto set_val; - case HDIO_SET_ACOUSTIC: setfunc = set_acoustic; goto set_val; - } + int err; - return generic_ide_ioctl(drive, file, bdev, cmd, arg); + err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings); + if (err != -EOPNOTSUPP) + return err; -read_val: - mutex_lock(&ide_setting_mtx); - spin_lock_irqsave(&ide_lock, flags); - err = *val; - spin_unlock_irqrestore(&ide_lock, flags); - mutex_unlock(&ide_setting_mtx); - return err >= 0 ? put_user(err, (long __user *)arg) : err; - -set_val: - if (bdev != bdev->bd_contains) - err = -EINVAL; - else { - if (!capable(CAP_SYS_ADMIN)) - err = -EACCES; - else { - mutex_lock(&ide_setting_mtx); - err = setfunc(drive, arg); - mutex_unlock(&ide_setting_mtx); - } - } - return err; + return generic_ide_ioctl(drive, file, bdev, cmd, arg); } static int idedisk_media_changed(struct gendisk *disk) @@ -1148,8 +1063,7 @@ static int ide_disk_probe(ide_drive_t *drive) /* strstr("foo", "") is non-NULL */ if (!strstr("ide-disk", drive->driver_req)) goto failed; - if (!drive->present) - goto failed; + if (drive->media != ide_disk) goto failed; @@ -1163,8 +1077,6 @@ static int ide_disk_probe(ide_drive_t *drive) ide_init_disk(g, drive); - ide_proc_register_driver(drive, &idedisk_driver); - kref_init(&idkp->kref); idkp->drive = drive; diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index 3fa07c0aeaa..ef2f1504c0d 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -106,7 +106,7 @@ ide_startstop_t ide_dma_intr (ide_drive_t *drive) dma_stat = hwif->dma_ops->dma_end(drive); stat = hwif->tp_ops->read_status(hwif); - if (OK_STAT(stat,DRIVE_READY,drive->bad_wstat|DRQ_STAT)) { + if (OK_STAT(stat, DRIVE_READY, drive->bad_wstat | ATA_DRQ)) { if (!dma_stat) { struct request *rq = HWGROUP(drive)->rq; @@ -288,7 +288,7 @@ EXPORT_SYMBOL_GPL(ide_destroy_dmatable); static int config_drive_for_dma (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct hd_driveid *id = drive->id; + u16 *id = drive->id; if (drive->media != ide_disk) { if (hwif->host_flags & IDE_HFLAG_NO_ATAPI_DMA) @@ -299,16 +299,17 @@ static int config_drive_for_dma (ide_drive_t *drive) * Enable DMA on any drive that has * UltraDMA (mode 0/1/2/3/4/5/6) enabled */ - if ((id->field_valid & 4) && ((id->dma_ultra >> 8) & 0x7f)) + if ((id[ATA_ID_FIELD_VALID] & 4) && + ((id[ATA_ID_UDMA_MODES] >> 8) & 0x7f)) return 1; /* * Enable DMA on any drive that has mode2 DMA * (multi or single) enabled */ - if (id->field_valid & 2) /* regular DMA */ - if ((id->dma_mword & 0x404) == 0x404 || - (id->dma_1word & 0x404) == 0x404) + if (id[ATA_ID_FIELD_VALID] & 2) /* regular DMA */ + if ((id[ATA_ID_MWDMA_MODES] & 0x404) == 0x404 || + (id[ATA_ID_SWDMA_MODES] & 0x404) == 0x404) return 1; /* Consult the list of known "good" drives */ @@ -591,12 +592,12 @@ static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } int __ide_dma_bad_drive (ide_drive_t *drive) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; int blacklist = ide_in_drive_list(id, drive_blacklist); if (blacklist) { printk(KERN_WARNING "%s: Disabling (U)DMA for %s (blacklisted)\n", - drive->name, id->model); + drive->name, (char *)&id[ATA_ID_PROD]); return blacklist; } return 0; @@ -612,21 +613,21 @@ static const u8 xfer_mode_bases[] = { static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; ide_hwif_t *hwif = drive->hwif; const struct ide_port_ops *port_ops = hwif->port_ops; unsigned int mask = 0; switch(base) { case XFER_UDMA_0: - if ((id->field_valid & 4) == 0) + if ((id[ATA_ID_FIELD_VALID] & 4) == 0) break; if (port_ops && port_ops->udma_filter) mask = port_ops->udma_filter(drive); else mask = hwif->ultra_mask; - mask &= id->dma_ultra; + mask &= id[ATA_ID_UDMA_MODES]; /* * avoid false cable warning from eighty_ninty_three() @@ -637,19 +638,19 @@ static unsigned int ide_get_mode_mask(ide_drive_t *drive, u8 base, u8 req_mode) } break; case XFER_MW_DMA_0: - if ((id->field_valid & 2) == 0) + if ((id[ATA_ID_FIELD_VALID] & 2) == 0) break; if (port_ops && port_ops->mdma_filter) mask = port_ops->mdma_filter(drive); else mask = hwif->mwdma_mask; - mask &= id->dma_mword; + mask &= id[ATA_ID_MWDMA_MODES]; break; case XFER_SW_DMA_0: - if (id->field_valid & 2) { - mask = id->dma_1word & hwif->swdma_mask; - } else if (id->tDMA) { - u8 mode = id->tDMA; + if (id[ATA_ID_FIELD_VALID] & 2) { + mask = id[ATA_ID_SWDMA_MODES] & hwif->swdma_mask; + } else if (id[ATA_ID_OLD_DMA_MODES] >> 8) { + u8 mode = id[ATA_ID_OLD_DMA_MODES] >> 8; /* * if the mode is valid convert it to the mask @@ -706,7 +707,8 @@ u8 ide_find_dma_mode(ide_drive_t *drive, u8 req_mode) /* * is this correct? */ - if (ide_dma_good_drive(drive) && drive->id->eide_dma_time < 150) + if (ide_dma_good_drive(drive) && + drive->id[ATA_ID_EIDE_DMA_TIME] < 150) mode = XFER_MW_DMA_1; } @@ -725,7 +727,7 @@ static int ide_tune_dma(ide_drive_t *drive) ide_hwif_t *hwif = drive->hwif; u8 speed; - if (drive->nodma || (drive->id->capability & 1) == 0) + if (drive->nodma || ata_id_has_dma(drive->id) == 0) return 0; /* consult the list of known "bad" drives */ @@ -767,13 +769,15 @@ static int ide_dma_check(ide_drive_t *drive) int ide_id_dma_bug(ide_drive_t *drive) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; - if (id->field_valid & 4) { - if ((id->dma_ultra >> 8) && (id->dma_mword >> 8)) + if (id[ATA_ID_FIELD_VALID] & 4) { + if ((id[ATA_ID_UDMA_MODES] >> 8) && + (id[ATA_ID_MWDMA_MODES] >> 8)) goto err_out; - } else if (id->field_valid & 2) { - if ((id->dma_mword >> 8) && (id->dma_1word >> 8)) + } else if (id[ATA_ID_FIELD_VALID] & 2) { + if ((id[ATA_ID_MWDMA_MODES] >> 8) && + (id[ATA_ID_SWDMA_MODES] >> 8)) goto err_out; } return 0; diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index e9034c0125f..d36f155470a 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -15,6 +15,8 @@ * Documentation/ide/ChangeLog.ide-floppy.1996-2002 */ +#define DRV_NAME "ide-floppy" + #define IDEFLOPPY_VERSION "1.00" #include <linux/module.h> @@ -31,8 +33,10 @@ #include <linux/slab.h> #include <linux/cdrom.h> #include <linux/ide.h> +#include <linux/hdreg.h> #include <linux/bitops.h> #include <linux/mutex.h> +#include <linux/scatterlist.h> #include <scsi/scsi_ioctl.h> @@ -42,6 +46,8 @@ #include <linux/io.h> #include <asm/unaligned.h> +#include "ide-floppy.h" + /* define to see debug info */ #define IDEFLOPPY_DEBUG_LOG 0 @@ -55,102 +61,23 @@ #define debug_log(fmt, args...) do {} while (0) #endif - -/* Some drives require a longer irq timeout. */ -#define IDEFLOPPY_WAIT_CMD (5 * WAIT_CMD) - /* * After each failed packet command we issue a request sense command and retry * the packet command IDEFLOPPY_MAX_PC_RETRIES times. */ #define IDEFLOPPY_MAX_PC_RETRIES 3 -/* - * With each packet command, we allocate a buffer of IDEFLOPPY_PC_BUFFER_SIZE - * bytes. - */ -#define IDEFLOPPY_PC_BUFFER_SIZE 256 - -/* - * In various places in the driver, we need to allocate storage for packet - * commands and requests, which will remain valid while we leave the driver to - * wait for an interrupt or a timeout event. - */ -#define IDEFLOPPY_PC_STACK (10 + IDEFLOPPY_MAX_PC_RETRIES) - /* format capacities descriptor codes */ #define CAPACITY_INVALID 0x00 #define CAPACITY_UNFORMATTED 0x01 #define CAPACITY_CURRENT 0x02 #define CAPACITY_NO_CARTRIDGE 0x03 -/* - * Most of our global data which we need to save even as we leave the driver - * due to an interrupt or a timer event is stored in a variable of type - * idefloppy_floppy_t, defined below. - */ -typedef struct ide_floppy_obj { - ide_drive_t *drive; - ide_driver_t *driver; - struct gendisk *disk; - struct kref kref; - unsigned int openers; /* protected by BKL for now */ - - /* Current packet command */ - struct ide_atapi_pc *pc; - /* Last failed packet command */ - struct ide_atapi_pc *failed_pc; - /* Packet command stack */ - struct ide_atapi_pc pc_stack[IDEFLOPPY_PC_STACK]; - /* Next free packet command storage space */ - int pc_stack_index; - struct request rq_stack[IDEFLOPPY_PC_STACK]; - /* We implement a circular array */ - int rq_stack_index; - - /* Last error information */ - u8 sense_key, asc, ascq; - /* delay this long before sending packet command */ - u8 ticks; - int progress_indication; - - /* Device information */ - /* Current format */ - int blocks, block_size, bs_factor; - /* Last format capacity descriptor */ - u8 cap_desc[8]; - /* Copy of the flexible disk page */ - u8 flexible_disk_page[32]; - /* Write protect */ - int wp; - /* Supports format progress report */ - int srfp; -} idefloppy_floppy_t; - #define IDEFLOPPY_TICKS_DELAY HZ/20 /* default delay for ZIP 100 (50ms) */ -/* Defines for the MODE SENSE command */ -#define MODE_SENSE_CURRENT 0x00 -#define MODE_SENSE_CHANGEABLE 0x01 -#define MODE_SENSE_DEFAULT 0x02 -#define MODE_SENSE_SAVED 0x03 - -/* IOCTLs used in low-level formatting. */ -#define IDEFLOPPY_IOCTL_FORMAT_SUPPORTED 0x4600 -#define IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY 0x4601 -#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602 -#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603 - /* Error code returned in rq->errors to the higher part of the driver. */ #define IDEFLOPPY_ERROR_GENERAL 101 -/* - * Pages of the SELECT SENSE / MODE SENSE packet commands. - * See SFF-8070i spec. - */ -#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b -#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05 - static DEFINE_MUTEX(idefloppy_ref_mutex); #define to_ide_floppy(obj) container_of(obj, struct ide_floppy_obj, kref) @@ -219,44 +146,6 @@ static int idefloppy_end_request(ide_drive_t *drive, int uptodate, int nsecs) return 0; } -static void ide_floppy_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int direction) -{ - ide_hwif_t *hwif = drive->hwif; - struct request *rq = pc->rq; - struct req_iterator iter; - struct bio_vec *bvec; - unsigned long flags; - int count, done = 0; - char *data; - - rq_for_each_segment(bvec, rq, iter) { - if (!bcount) - break; - - count = min(bvec->bv_len, bcount); - - data = bvec_kmap_irq(bvec, &flags); - if (direction) - hwif->tp_ops->output_data(drive, NULL, data, count); - else - hwif->tp_ops->input_data(drive, NULL, data, count); - bvec_kunmap_irq(data, &flags); - - bcount -= count; - pc->b_count += count; - done += count; - } - - idefloppy_end_request(drive, 1, done >> 9); - - if (bcount) { - printk(KERN_ERR "%s: leftover data in %s, bcount == %d\n", - drive->name, __func__, bcount); - ide_pad_transfer(drive, direction, bcount); - } -} - static void idefloppy_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) { @@ -267,43 +156,6 @@ static void idefloppy_update_buffers(ide_drive_t *drive, idefloppy_end_request(drive, 1, 0); } -/* - * Generate a new packet command request in front of the request queue, before - * the current request so that it will be processed immediately, on the next - * pass through the driver. - */ -static void idefloppy_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc, - struct request *rq) -{ - struct ide_floppy_obj *floppy = drive->driver_data; - - blk_rq_init(NULL, rq); - rq->buffer = (char *) pc; - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_PREEMPT; - rq->rq_disk = floppy->disk; - memcpy(rq->cmd, pc->c, 12); - ide_do_drive_cmd(drive, rq); -} - -static struct ide_atapi_pc *idefloppy_next_pc_storage(ide_drive_t *drive) -{ - idefloppy_floppy_t *floppy = drive->driver_data; - - if (floppy->pc_stack_index == IDEFLOPPY_PC_STACK) - floppy->pc_stack_index = 0; - return (&floppy->pc_stack[floppy->pc_stack_index++]); -} - -static struct request *idefloppy_next_rq_storage(ide_drive_t *drive) -{ - idefloppy_floppy_t *floppy = drive->driver_data; - - if (floppy->rq_stack_index == IDEFLOPPY_PC_STACK) - floppy->rq_stack_index = 0; - return (&floppy->rq_stack[floppy->rq_stack_index++]); -} - static void ide_floppy_callback(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; @@ -341,16 +193,9 @@ static void ide_floppy_callback(ide_drive_t *drive) idefloppy_end_request(drive, uptodate, 0); } -static void idefloppy_init_pc(struct ide_atapi_pc *pc) -{ - memset(pc, 0, sizeof(*pc)); - pc->buf = pc->pc_buf; - pc->buf_size = IDEFLOPPY_PC_BUFFER_SIZE; -} - -static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc) +void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *pc) { - idefloppy_init_pc(pc); + ide_init_pc(pc); pc->c[0] = GPCMD_REQUEST_SENSE; pc->c[4] = 255; pc->req_xfer = 18; @@ -362,14 +207,13 @@ static void idefloppy_create_request_sense_cmd(struct ide_atapi_pc *pc) */ static void idefloppy_retry_pc(ide_drive_t *drive) { - struct ide_atapi_pc *pc; - struct request *rq; + struct ide_floppy_obj *floppy = drive->driver_data; + struct request *rq = &floppy->request_sense_rq; + struct ide_atapi_pc *pc = &floppy->request_sense_pc; (void)ide_read_error(drive); - pc = idefloppy_next_pc_storage(drive); - rq = idefloppy_next_rq_storage(drive); - idefloppy_create_request_sense_cmd(pc); - idefloppy_queue_pc_head(drive, pc, rq); + ide_floppy_create_request_sense_cmd(pc); + ide_queue_pc_head(drive, floppy->disk, pc, rq); } /* The usual interrupt handler called during a packet command. */ @@ -378,8 +222,8 @@ static ide_startstop_t idefloppy_pc_intr(ide_drive_t *drive) idefloppy_floppy_t *floppy = drive->driver_data; return ide_pc_intr(drive, floppy->pc, idefloppy_pc_intr, - IDEFLOPPY_WAIT_CMD, NULL, idefloppy_update_buffers, - idefloppy_retry_pc, NULL, ide_floppy_io_buffers); + WAIT_FLOPPY_CMD, NULL, idefloppy_update_buffers, + idefloppy_retry_pc, NULL, ide_io_buffers); } /* @@ -396,10 +240,9 @@ static int idefloppy_transfer_pc(ide_drive_t *drive) drive->hwif->tp_ops->output_data(drive, NULL, floppy->pc->c, 12); /* Timeout for the packet command */ - return IDEFLOPPY_WAIT_CMD; + return WAIT_FLOPPY_CMD; } - /* * Called as an interrupt (or directly). When the device says it's ready for a * packet, we schedule the packet transfer to occur about 2-3 ticks later in @@ -424,7 +267,7 @@ static ide_startstop_t idefloppy_start_pc_transfer(ide_drive_t *drive) timeout = floppy->ticks; expiry = &idefloppy_transfer_pc; } else { - timeout = IDEFLOPPY_WAIT_CMD; + timeout = WAIT_FLOPPY_CMD; expiry = NULL; } @@ -474,58 +317,27 @@ static ide_startstop_t idefloppy_issue_pc(ide_drive_t *drive, pc->retries++; return ide_issue_pc(drive, pc, idefloppy_start_pc_transfer, - IDEFLOPPY_WAIT_CMD, NULL); -} - -static void idefloppy_create_prevent_cmd(struct ide_atapi_pc *pc, int prevent) -{ - debug_log("creating prevent removal command, prevent = %d\n", prevent); - - idefloppy_init_pc(pc); - pc->c[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; - pc->c[4] = prevent; + WAIT_FLOPPY_CMD, NULL); } -static void idefloppy_create_read_capacity_cmd(struct ide_atapi_pc *pc) +void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *pc) { - idefloppy_init_pc(pc); + ide_init_pc(pc); pc->c[0] = GPCMD_READ_FORMAT_CAPACITIES; pc->c[7] = 255; pc->c[8] = 255; pc->req_xfer = 255; } -static void idefloppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b, - int l, int flags) -{ - idefloppy_init_pc(pc); - pc->c[0] = GPCMD_FORMAT_UNIT; - pc->c[1] = 0x17; - - memset(pc->buf, 0, 12); - pc->buf[1] = 0xA2; - /* Default format list header, u8 1: FOV/DCRT/IMM bits set */ - - if (flags & 1) /* Verify bit on... */ - pc->buf[1] ^= 0x20; /* ... turn off DCRT bit */ - pc->buf[3] = 8; - - put_unaligned(cpu_to_be32(b), (unsigned int *)(&pc->buf[4])); - put_unaligned(cpu_to_be32(l), (unsigned int *)(&pc->buf[8])); - pc->buf_size = 12; - pc->flags |= PC_FLAG_WRITING; -} - /* A mode sense command is used to "sense" floppy parameters. */ -static void idefloppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, - u8 page_code, u8 type) +void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code) { u16 length = 8; /* sizeof(Mode Parameter Header) = 8 Bytes */ - idefloppy_init_pc(pc); + ide_init_pc(pc); pc->c[0] = GPCMD_MODE_SENSE_10; pc->c[1] = 0; - pc->c[2] = page_code + (type << 6); + pc->c[2] = page_code; switch (page_code) { case IDEFLOPPY_CAPABILITIES_PAGE: @@ -542,13 +354,6 @@ static void idefloppy_create_mode_sense_cmd(struct ide_atapi_pc *pc, pc->req_xfer = length; } -static void idefloppy_create_start_stop_cmd(struct ide_atapi_pc *pc, int start) -{ - idefloppy_init_pc(pc); - pc->c[0] = GPCMD_START_STOP_UNIT; - pc->c[4] = start; -} - static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, struct ide_atapi_pc *pc, struct request *rq, unsigned long sector) @@ -560,7 +365,7 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, debug_log("create_rw10_cmd: block == %d, blocks == %d\n", block, blocks); - idefloppy_init_pc(pc); + ide_init_pc(pc); pc->c[0] = cmd == READ ? GPCMD_READ_10 : GPCMD_WRITE_10; put_unaligned(cpu_to_be16(blocks), (unsigned short *)&pc->c[7]); put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[2]); @@ -568,7 +373,7 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, memcpy(rq->cmd, pc->c, 12); pc->rq = rq; - pc->b_count = cmd == READ ? 0 : rq->bio->bi_size; + pc->b_count = 0; if (rq->cmd_flags & REQ_RW) pc->flags |= PC_FLAG_WRITING; pc->buf = NULL; @@ -579,10 +384,10 @@ static void idefloppy_create_rw_cmd(idefloppy_floppy_t *floppy, static void idefloppy_blockpc_cmd(idefloppy_floppy_t *floppy, struct ide_atapi_pc *pc, struct request *rq) { - idefloppy_init_pc(pc); + ide_init_pc(pc); memcpy(pc->c, rq->cmd, sizeof(pc->c)); pc->rq = rq; - pc->b_count = rq->data_len; + pc->b_count = 0; if (rq->data_len && rq_data_dir(rq) == WRITE) pc->flags |= PC_FLAG_WRITING; pc->buf = rq->data; @@ -599,15 +404,17 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, struct request *rq, sector_t block_s) { idefloppy_floppy_t *floppy = drive->driver_data; + ide_hwif_t *hwif = drive->hwif; struct ide_atapi_pc *pc; unsigned long block = (unsigned long)block_s; - debug_log("dev: %s, cmd_type: %x, errors: %d\n", - rq->rq_disk ? rq->rq_disk->disk_name : "?", - rq->cmd_type, rq->errors); - debug_log("sector: %ld, nr_sectors: %ld, " - "current_nr_sectors: %d\n", (long)rq->sector, - rq->nr_sectors, rq->current_nr_sectors); + debug_log("%s: dev: %s, cmd: 0x%x, cmd_type: %x, errors: %d\n", + __func__, rq->rq_disk ? rq->rq_disk->disk_name : "?", + rq->cmd[0], rq->cmd_type, rq->errors); + + debug_log("%s: sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n", + __func__, (long)rq->sector, rq->nr_sectors, + rq->current_nr_sectors); if (rq->errors >= ERROR_MAX) { if (floppy->failed_pc) @@ -626,12 +433,12 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, idefloppy_end_request(drive, 0, 0); return ide_stopped; } - pc = idefloppy_next_pc_storage(drive); + pc = &floppy->queued_pc; idefloppy_create_rw_cmd(floppy, pc, rq, block); } else if (blk_special_request(rq)) { pc = (struct ide_atapi_pc *) rq->buffer; } else if (blk_pc_request(rq)) { - pc = idefloppy_next_pc_storage(drive); + pc = &floppy->queued_pc; idefloppy_blockpc_cmd(floppy, pc, rq); } else { blk_dump_rq_flags(rq, @@ -640,29 +447,15 @@ static ide_startstop_t idefloppy_do_request(ide_drive_t *drive, return ide_stopped; } - pc->rq = rq; + ide_init_sg_cmd(drive, rq); + ide_map_sg(drive, rq); - return idefloppy_issue_pc(drive, pc); -} + pc->sg = hwif->sg_table; + pc->sg_cnt = hwif->sg_nents; -/* - * Add a special packet command request to the tail of the request queue, - * and wait for it to be serviced. - */ -static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - struct ide_floppy_obj *floppy = drive->driver_data; - struct request *rq; - int error; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->buffer = (char *) pc; - rq->cmd_type = REQ_TYPE_SPECIAL; - memcpy(rq->cmd, pc->c, 12); - error = blk_execute_rq(drive->queue, floppy->disk, rq, 0); - blk_put_request(rq); + pc->rq = rq; - return error; + return idefloppy_issue_pc(drive, pc); } /* @@ -672,22 +465,28 @@ static int idefloppy_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; + struct gendisk *disk = floppy->disk; struct ide_atapi_pc pc; u8 *page; int capacity, lba_capacity; u16 transfer_rate, sector_size, cyls, rpm; u8 heads, sectors; - idefloppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE, - MODE_SENSE_CURRENT); + ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_FLEXIBLE_DISK_PAGE); - if (idefloppy_queue_pc_tail(drive, &pc)) { + if (ide_queue_pc_tail(drive, disk, &pc)) { printk(KERN_ERR "ide-floppy: Can't get flexible disk page" " parameters\n"); return 1; } - floppy->wp = !!(pc.buf[3] & 0x80); - set_disk_ro(floppy->disk, floppy->wp); + + if (pc.buf[3] & 0x80) + drive->atapi_flags |= IDE_AFLAG_WP; + else + drive->atapi_flags &= ~IDE_AFLAG_WP; + + set_disk_ro(disk, !!(drive->atapi_flags & IDE_AFLAG_WP)); + page = &pc.buf[8]; transfer_rate = be16_to_cpup((__be16 *)&pc.buf[8 + 2]); @@ -721,23 +520,6 @@ static int ide_floppy_get_flexible_disk_page(ide_drive_t *drive) return 0; } -static int idefloppy_get_sfrp_bit(ide_drive_t *drive) -{ - idefloppy_floppy_t *floppy = drive->driver_data; - struct ide_atapi_pc pc; - - floppy->srfp = 0; - idefloppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE, - MODE_SENSE_CURRENT); - - pc.flags |= PC_FLAG_SUPPRESS_ERROR; - if (idefloppy_queue_pc_tail(drive, &pc)) - return 1; - - floppy->srfp = pc.buf[8 + 2] & 0x40; - return (0); -} - /* * Determine if a media is present in the floppy drive, and if so, its LBA * capacity. @@ -745,6 +527,7 @@ static int idefloppy_get_sfrp_bit(ide_drive_t *drive) static int ide_floppy_get_capacity(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; + struct gendisk *disk = floppy->disk; struct ide_atapi_pc pc; u8 *cap_desc; u8 header_len, desc_cnt; @@ -756,8 +539,8 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) floppy->bs_factor = 1; set_capacity(floppy->disk, 0); - idefloppy_create_read_capacity_cmd(&pc); - if (idefloppy_queue_pc_tail(drive, &pc)) { + ide_floppy_create_read_capacity_cmd(&pc); + if (ide_queue_pc_tail(drive, disk, &pc)) { printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); return 1; } @@ -832,202 +615,55 @@ static int ide_floppy_get_capacity(ide_drive_t *drive) if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) (void) ide_floppy_get_flexible_disk_page(drive); - set_capacity(floppy->disk, floppy->blocks * floppy->bs_factor); + set_capacity(disk, floppy->blocks * floppy->bs_factor); + return rc; } -/* - * Obtain the list of formattable capacities. - * Very similar to ide_floppy_get_capacity, except that we push the capacity - * descriptors to userland, instead of our own structures. - * - * Userland gives us the following structure: - * - * struct idefloppy_format_capacities { - * int nformats; - * struct { - * int nblocks; - * int blocksize; - * } formats[]; - * }; - * - * userland initializes nformats to the number of allocated formats[] records. - * On exit we set nformats to the number of records we've actually initialized. - */ - -static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) +static sector_t idefloppy_capacity(ide_drive_t *drive) { - struct ide_atapi_pc pc; - u8 header_len, desc_cnt; - int i, blocks, length, u_array_size, u_index; - int __user *argp; - - if (get_user(u_array_size, arg)) - return (-EFAULT); - - if (u_array_size <= 0) - return (-EINVAL); - - idefloppy_create_read_capacity_cmd(&pc); - if (idefloppy_queue_pc_tail(drive, &pc)) { - printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); - return (-EIO); - } - header_len = pc.buf[3]; - desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ - - u_index = 0; - argp = arg + 1; - - /* - * We always skip the first capacity descriptor. That's the current - * capacity. We are interested in the remaining descriptors, the - * formattable capacities. - */ - for (i = 1; i < desc_cnt; i++) { - unsigned int desc_start = 4 + i*8; - - if (u_index >= u_array_size) - break; /* User-supplied buffer too small */ - - blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); - length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); - - if (put_user(blocks, argp)) - return(-EFAULT); - ++argp; - - if (put_user(length, argp)) - return (-EFAULT); - ++argp; - - ++u_index; - } + idefloppy_floppy_t *floppy = drive->driver_data; + unsigned long capacity = floppy->blocks * floppy->bs_factor; - if (put_user(u_index, arg)) - return (-EFAULT); - return (0); + return capacity; } -/* - * Get ATAPI_FORMAT_UNIT progress indication. - * - * Userland gives a pointer to an int. The int is set to a progress - * indicator 0-65536, with 65536=100%. - * - * If the drive does not support format progress indication, we just check - * the dsc bit, and return either 0 or 65536. - */ +#ifdef CONFIG_IDE_PROC_FS +ide_devset_rw_field(bios_cyl, bios_cyl); +ide_devset_rw_field(bios_head, bios_head); +ide_devset_rw_field(bios_sect, bios_sect); -static int idefloppy_get_format_progress(ide_drive_t *drive, int __user *arg) +static int get_ticks(ide_drive_t *drive) { idefloppy_floppy_t *floppy = drive->driver_data; - struct ide_atapi_pc pc; - int progress_indication = 0x10000; - - if (floppy->srfp) { - idefloppy_create_request_sense_cmd(&pc); - if (idefloppy_queue_pc_tail(drive, &pc)) - return (-EIO); - - if (floppy->sense_key == 2 && - floppy->asc == 4 && - floppy->ascq == 4) - progress_indication = floppy->progress_indication; - - /* Else assume format_unit has finished, and we're at 0x10000 */ - } else { - ide_hwif_t *hwif = drive->hwif; - unsigned long flags; - u8 stat; - - local_irq_save(flags); - stat = hwif->tp_ops->read_status(hwif); - local_irq_restore(flags); - - progress_indication = ((stat & SEEK_STAT) == 0) ? 0 : 0x10000; - } - if (put_user(progress_indication, arg)) - return (-EFAULT); - - return (0); + return floppy->ticks; } -static sector_t idefloppy_capacity(ide_drive_t *drive) +static int set_ticks(ide_drive_t *drive, int arg) { idefloppy_floppy_t *floppy = drive->driver_data; - unsigned long capacity = floppy->blocks * floppy->bs_factor; - - return capacity; -} - -/* - * Check whether we can support a drive, based on the ATAPI IDENTIFY command - * results. - */ -static int idefloppy_identify_device(ide_drive_t *drive, struct hd_driveid *id) -{ - u8 gcw[2]; - u8 device_type, protocol, removable, drq_type, packet_size; - - *((u16 *) &gcw) = id->config; - - device_type = gcw[1] & 0x1F; - removable = (gcw[0] & 0x80) >> 7; - protocol = (gcw[1] & 0xC0) >> 6; - drq_type = (gcw[0] & 0x60) >> 5; - packet_size = gcw[0] & 0x03; - -#ifdef CONFIG_PPC - /* kludge for Apple PowerBook internal zip */ - if (device_type == 5 && - !strstr(id->model, "CD-ROM") && strstr(id->model, "ZIP")) - device_type = 0; -#endif - - if (protocol != 2) - printk(KERN_ERR "ide-floppy: Protocol (0x%02x) is not ATAPI\n", - protocol); - else if (device_type != 0) - printk(KERN_ERR "ide-floppy: Device type (0x%02x) is not set " - "to floppy\n", device_type); - else if (!removable) - printk(KERN_ERR "ide-floppy: The removable flag is not set\n"); - else if (drq_type == 3) - printk(KERN_ERR "ide-floppy: Sorry, DRQ type (0x%02x) not " - "supported\n", drq_type); - else if (packet_size != 0) - printk(KERN_ERR "ide-floppy: Packet size (0x%02x) is not 12 " - "bytes\n", packet_size); - else - return 1; + floppy->ticks = arg; return 0; } -#ifdef CONFIG_IDE_PROC_FS -static void idefloppy_add_settings(ide_drive_t *drive) -{ - idefloppy_floppy_t *floppy = drive->driver_data; +IDE_DEVSET(ticks, DS_SYNC, get_ticks, set_ticks); - ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 1023, 1, 1, - &drive->bios_cyl, NULL); - ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, - &drive->bios_head, NULL); - ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, - &drive->bios_sect, NULL); - ide_add_setting(drive, "ticks", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, - &floppy->ticks, NULL); -} -#else -static inline void idefloppy_add_settings(ide_drive_t *drive) { ; } +static const struct ide_proc_devset idefloppy_settings[] = { + IDE_PROC_DEVSET(bios_cyl, 0, 1023), + IDE_PROC_DEVSET(bios_head, 0, 255), + IDE_PROC_DEVSET(bios_sect, 0, 63), + IDE_PROC_DEVSET(ticks, 0, 255), + { 0 }, +}; #endif static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) { + u16 *id = drive->id; u8 gcw[2]; - *((u16 *) &gcw) = drive->id->config; - floppy->pc = floppy->pc_stack; + *((u16 *)&gcw) = id[ATA_ID_CONFIG]; + drive->pc_callback = ide_floppy_callback; if (((gcw[0] & 0x60) >> 5) == 1) @@ -1041,7 +677,7 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) * it. It should be fixed as of version 1.9, but to be on the safe side * we'll leave the limitation below for the 2.2.x tree. */ - if (!strncmp(drive->id->model, "IOMEGA ZIP 100 ATAPI", 20)) { + if (!strncmp((char *)&id[ATA_ID_PROD], "IOMEGA ZIP 100 ATAPI", 20)) { drive->atapi_flags |= IDE_AFLAG_ZIP_DRIVE; /* This value will be visible in the /proc/ide/hdx/settings */ floppy->ticks = IDEFLOPPY_TICKS_DELAY; @@ -1052,13 +688,16 @@ static void idefloppy_setup(ide_drive_t *drive, idefloppy_floppy_t *floppy) * Guess what? The IOMEGA Clik! drive also needs the above fix. It makes * nasty clicking noises without it, so please don't remove this. */ - if (strncmp(drive->id->model, "IOMEGA Clik!", 11) == 0) { + if (strncmp((char *)&id[ATA_ID_PROD], "IOMEGA Clik!", 11) == 0) { blk_queue_max_sectors(drive->queue, 64); drive->atapi_flags |= IDE_AFLAG_CLIK_DRIVE; + /* IOMEGA Clik! drives do not support lock/unlock commands */ + drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; } (void) ide_floppy_get_capacity(drive); - idefloppy_add_settings(drive); + + ide_proc_register_driver(drive, floppy->driver); } static void ide_floppy_remove(ide_drive_t *drive) @@ -1115,12 +754,12 @@ static ide_driver_t idefloppy_driver = { .remove = ide_floppy_remove, .version = IDEFLOPPY_VERSION, .media = ide_floppy, - .supports_dsc_overlap = 0, .do_request = idefloppy_do_request, .end_request = idefloppy_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idefloppy_proc, + .settings = idefloppy_settings, #endif }; @@ -1129,7 +768,6 @@ static int idefloppy_open(struct inode *inode, struct file *filp) struct gendisk *disk = inode->i_bdev->bd_disk; struct ide_floppy_obj *floppy; ide_drive_t *drive; - struct ide_atapi_pc pc; int ret = 0; debug_log("Reached %s\n", __func__); @@ -1146,13 +784,8 @@ static int idefloppy_open(struct inode *inode, struct file *filp) drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; /* Just in case */ - idefloppy_init_pc(&pc); - pc.c[0] = GPCMD_TEST_UNIT_READY; - - if (idefloppy_queue_pc_tail(drive, &pc)) { - idefloppy_create_start_stop_cmd(&pc, 1); - (void) idefloppy_queue_pc_tail(drive, &pc); - } + if (ide_do_test_unit_ready(drive, disk)) + ide_do_start_stop(drive, disk, 1); if (ide_floppy_get_capacity(drive) && (filp->f_flags & O_NDELAY) == 0 @@ -1166,16 +799,13 @@ static int idefloppy_open(struct inode *inode, struct file *filp) goto out_put_floppy; } - if (floppy->wp && (filp->f_mode & 2)) { + if ((drive->atapi_flags & IDE_AFLAG_WP) && (filp->f_mode & 2)) { ret = -EROFS; goto out_put_floppy; } + drive->atapi_flags |= IDE_AFLAG_MEDIA_CHANGED; - /* IOMEGA Clik! drives do not support lock/unlock commands */ - if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) { - idefloppy_create_prevent_cmd(&pc, 1); - (void) idefloppy_queue_pc_tail(drive, &pc); - } + ide_set_media_lock(drive, disk, 1); check_disk_change(inode->i_bdev); } else if (drive->atapi_flags & IDE_AFLAG_FORMAT_IN_PROGRESS) { ret = -EBUSY; @@ -1194,17 +824,11 @@ static int idefloppy_release(struct inode *inode, struct file *filp) struct gendisk *disk = inode->i_bdev->bd_disk; struct ide_floppy_obj *floppy = ide_floppy_g(disk); ide_drive_t *drive = floppy->drive; - struct ide_atapi_pc pc; debug_log("Reached %s\n", __func__); if (floppy->openers == 1) { - /* IOMEGA Clik! drives do not support lock/unlock commands */ - if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) { - idefloppy_create_prevent_cmd(&pc, 0); - (void) idefloppy_queue_pc_tail(drive, &pc); - } - + ide_set_media_lock(drive, disk, 0); drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; } @@ -1230,80 +854,20 @@ static int ide_floppy_lockdoor(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned long arg, unsigned int cmd) { idefloppy_floppy_t *floppy = drive->driver_data; + struct gendisk *disk = floppy->disk; + int prevent = (arg && cmd != CDROMEJECT) ? 1 : 0; if (floppy->openers > 1) return -EBUSY; - /* The IOMEGA Clik! Drive doesn't support this command - - * no room for an eject mechanism */ - if (!(drive->atapi_flags & IDE_AFLAG_CLIK_DRIVE)) { - int prevent = arg ? 1 : 0; - - if (cmd == CDROMEJECT) - prevent = 0; + ide_set_media_lock(drive, disk, prevent); - idefloppy_create_prevent_cmd(pc, prevent); - (void) idefloppy_queue_pc_tail(floppy->drive, pc); - } - - if (cmd == CDROMEJECT) { - idefloppy_create_start_stop_cmd(pc, 2); - (void) idefloppy_queue_pc_tail(floppy->drive, pc); - } + if (cmd == CDROMEJECT) + ide_do_start_stop(drive, disk, 2); return 0; } -static int ide_floppy_format_unit(idefloppy_floppy_t *floppy, - int __user *arg) -{ - struct ide_atapi_pc pc; - ide_drive_t *drive = floppy->drive; - int blocks, length, flags, err = 0; - - if (floppy->openers > 1) { - /* Don't format if someone is using the disk */ - drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; - return -EBUSY; - } - - drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS; - - /* - * Send ATAPI_FORMAT_UNIT to the drive. - * - * Userland gives us the following structure: - * - * struct idefloppy_format_command { - * int nblocks; - * int blocksize; - * int flags; - * } ; - * - * flags is a bitmask, currently, the only defined flag is: - * - * 0x01 - verify media after format. - */ - if (get_user(blocks, arg) || - get_user(length, arg+1) || - get_user(flags, arg+2)) { - err = -EFAULT; - goto out; - } - - (void) idefloppy_get_sfrp_bit(drive); - idefloppy_create_format_unit_cmd(&pc, blocks, length, flags); - - if (idefloppy_queue_pc_tail(drive, &pc)) - err = -EIO; - -out: - if (err) - drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; - return err; -} - - static int idefloppy_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { @@ -1314,23 +878,12 @@ static int idefloppy_ioctl(struct inode *inode, struct file *file, void __user *argp = (void __user *)arg; int err; - switch (cmd) { - case CDROMEJECT: - /* fall through */ - case CDROM_LOCKDOOR: + if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) return ide_floppy_lockdoor(drive, &pc, arg, cmd); - case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED: - return 0; - case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: - return ide_floppy_get_format_capacities(drive, argp); - case IDEFLOPPY_IOCTL_FORMAT_START: - if (!(file->f_mode & 2)) - return -EPERM; - - return ide_floppy_format_unit(floppy, (int __user *)arg); - case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: - return idefloppy_get_format_progress(drive, argp); - } + + err = ide_floppy_format_ioctl(drive, file, cmd, argp); + if (err != -ENOTTY) + return err; /* * skip SCSI_IOCTL_SEND_COMMAND (deprecated) @@ -1339,8 +892,6 @@ static int idefloppy_ioctl(struct inode *inode, struct file *file, if (cmd != CDROM_SEND_PACKET && cmd != SCSI_IOCTL_SEND_COMMAND) err = scsi_cmd_ioctl(file, bdev->bd_disk->queue, bdev->bd_disk, cmd, argp); - else - err = -ENOTTY; if (err == -ENOTTY) err = generic_ide_ioctl(drive, file, bdev, cmd, arg); @@ -1388,11 +939,11 @@ static int ide_floppy_probe(ide_drive_t *drive) if (!strstr("ide-floppy", drive->driver_req)) goto failed; - if (!drive->present) - goto failed; + if (drive->media != ide_floppy) goto failed; - if (!idefloppy_identify_device(drive, drive->id)) { + + if (!ide_check_atapi_device(drive, DRV_NAME)) { printk(KERN_ERR "ide-floppy: %s: not supported by this version" " of ide-floppy\n", drive->name); goto failed; @@ -1410,8 +961,6 @@ static int ide_floppy_probe(ide_drive_t *drive) ide_init_disk(g, drive); - ide_proc_register_driver(drive, &idefloppy_driver); - kref_init(&floppy->kref); floppy->drive = drive; @@ -1450,6 +999,7 @@ static int __init idefloppy_init(void) } MODULE_ALIAS("ide:*m-floppy*"); +MODULE_ALIAS("ide-floppy"); module_init(idefloppy_init); module_exit(idefloppy_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/ide/ide-floppy.h b/drivers/ide/ide-floppy.h new file mode 100644 index 00000000000..ecadc2bc322 --- /dev/null +++ b/drivers/ide/ide-floppy.h @@ -0,0 +1,63 @@ +#ifndef __IDE_FLOPPY_H +#define __IDE_FLOPPY_H + +/* + * Most of our global data which we need to save even as we leave the driver + * due to an interrupt or a timer event is stored in a variable of type + * idefloppy_floppy_t, defined below. + */ +typedef struct ide_floppy_obj { + ide_drive_t *drive; + ide_driver_t *driver; + struct gendisk *disk; + struct kref kref; + unsigned int openers; /* protected by BKL for now */ + + /* Current packet command */ + struct ide_atapi_pc *pc; + /* Last failed packet command */ + struct ide_atapi_pc *failed_pc; + /* used for blk_{fs,pc}_request() requests */ + struct ide_atapi_pc queued_pc; + + struct ide_atapi_pc request_sense_pc; + struct request request_sense_rq; + + /* Last error information */ + u8 sense_key, asc, ascq; + /* delay this long before sending packet command */ + u8 ticks; + int progress_indication; + + /* Device information */ + /* Current format */ + int blocks, block_size, bs_factor; + /* Last format capacity descriptor */ + u8 cap_desc[8]; + /* Copy of the flexible disk page */ + u8 flexible_disk_page[32]; +} idefloppy_floppy_t; + +/* + * Pages of the SELECT SENSE / MODE SENSE packet commands. + * See SFF-8070i spec. + */ +#define IDEFLOPPY_CAPABILITIES_PAGE 0x1b +#define IDEFLOPPY_FLEXIBLE_DISK_PAGE 0x05 + +/* IOCTLs used in low-level formatting. */ +#define IDEFLOPPY_IOCTL_FORMAT_SUPPORTED 0x4600 +#define IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY 0x4601 +#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602 +#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603 + +/* ide-floppy.c */ +void ide_floppy_create_mode_sense_cmd(struct ide_atapi_pc *, u8); +void ide_floppy_create_read_capacity_cmd(struct ide_atapi_pc *); +void ide_floppy_create_request_sense_cmd(struct ide_atapi_pc *); + +/* ide-floppy_ioctl.c */ +int ide_floppy_format_ioctl(ide_drive_t *, struct file *, unsigned int, + void __user *); + +#endif /*__IDE_FLOPPY_H */ diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c new file mode 100644 index 00000000000..5ffc4512d14 --- /dev/null +++ b/drivers/ide/ide-floppy_ioctl.c @@ -0,0 +1,243 @@ +/* + * ide-floppy IOCTLs handling. + */ + +#include <linux/kernel.h> +#include <linux/ide.h> +#include <linux/cdrom.h> + +#include <asm/unaligned.h> + +#include <scsi/scsi_ioctl.h> + +#include "ide-floppy.h" + +/* + * Obtain the list of formattable capacities. + * Very similar to ide_floppy_get_capacity, except that we push the capacity + * descriptors to userland, instead of our own structures. + * + * Userland gives us the following structure: + * + * struct idefloppy_format_capacities { + * int nformats; + * struct { + * int nblocks; + * int blocksize; + * } formats[]; + * }; + * + * userland initializes nformats to the number of allocated formats[] records. + * On exit we set nformats to the number of records we've actually initialized. + */ + +static int ide_floppy_get_format_capacities(ide_drive_t *drive, int __user *arg) +{ + struct ide_floppy_obj *floppy = drive->driver_data; + struct ide_atapi_pc pc; + u8 header_len, desc_cnt; + int i, blocks, length, u_array_size, u_index; + int __user *argp; + + if (get_user(u_array_size, arg)) + return -EFAULT; + + if (u_array_size <= 0) + return -EINVAL; + + ide_floppy_create_read_capacity_cmd(&pc); + if (ide_queue_pc_tail(drive, floppy->disk, &pc)) { + printk(KERN_ERR "ide-floppy: Can't get floppy parameters\n"); + return -EIO; + } + + header_len = pc.buf[3]; + desc_cnt = header_len / 8; /* capacity descriptor of 8 bytes */ + + u_index = 0; + argp = arg + 1; + + /* + * We always skip the first capacity descriptor. That's the current + * capacity. We are interested in the remaining descriptors, the + * formattable capacities. + */ + for (i = 1; i < desc_cnt; i++) { + unsigned int desc_start = 4 + i*8; + + if (u_index >= u_array_size) + break; /* User-supplied buffer too small */ + + blocks = be32_to_cpup((__be32 *)&pc.buf[desc_start]); + length = be16_to_cpup((__be16 *)&pc.buf[desc_start + 6]); + + if (put_user(blocks, argp)) + return -EFAULT; + + ++argp; + + if (put_user(length, argp)) + return -EFAULT; + + ++argp; + + ++u_index; + } + + if (put_user(u_index, arg)) + return -EFAULT; + + return 0; +} + +static void ide_floppy_create_format_unit_cmd(struct ide_atapi_pc *pc, int b, + int l, int flags) +{ + ide_init_pc(pc); + pc->c[0] = GPCMD_FORMAT_UNIT; + pc->c[1] = 0x17; + + memset(pc->buf, 0, 12); + pc->buf[1] = 0xA2; + /* Default format list header, u8 1: FOV/DCRT/IMM bits set */ + + if (flags & 1) /* Verify bit on... */ + pc->buf[1] ^= 0x20; /* ... turn off DCRT bit */ + pc->buf[3] = 8; + + put_unaligned(cpu_to_be32(b), (unsigned int *)(&pc->buf[4])); + put_unaligned(cpu_to_be32(l), (unsigned int *)(&pc->buf[8])); + pc->buf_size = 12; + pc->flags |= PC_FLAG_WRITING; +} + +static int ide_floppy_get_sfrp_bit(ide_drive_t *drive) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + struct ide_atapi_pc pc; + + drive->atapi_flags &= ~IDE_AFLAG_SRFP; + + ide_floppy_create_mode_sense_cmd(&pc, IDEFLOPPY_CAPABILITIES_PAGE); + pc.flags |= PC_FLAG_SUPPRESS_ERROR; + + if (ide_queue_pc_tail(drive, floppy->disk, &pc)) + return 1; + + if (pc.buf[8 + 2] & 0x40) + drive->atapi_flags |= IDE_AFLAG_SRFP; + + return 0; +} + +static int ide_floppy_format_unit(ide_drive_t *drive, int __user *arg) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + struct ide_atapi_pc pc; + int blocks, length, flags, err = 0; + + if (floppy->openers > 1) { + /* Don't format if someone is using the disk */ + drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; + return -EBUSY; + } + + drive->atapi_flags |= IDE_AFLAG_FORMAT_IN_PROGRESS; + + /* + * Send ATAPI_FORMAT_UNIT to the drive. + * + * Userland gives us the following structure: + * + * struct idefloppy_format_command { + * int nblocks; + * int blocksize; + * int flags; + * } ; + * + * flags is a bitmask, currently, the only defined flag is: + * + * 0x01 - verify media after format. + */ + if (get_user(blocks, arg) || + get_user(length, arg+1) || + get_user(flags, arg+2)) { + err = -EFAULT; + goto out; + } + + (void)ide_floppy_get_sfrp_bit(drive); + ide_floppy_create_format_unit_cmd(&pc, blocks, length, flags); + + if (ide_queue_pc_tail(drive, floppy->disk, &pc)) + err = -EIO; + +out: + if (err) + drive->atapi_flags &= ~IDE_AFLAG_FORMAT_IN_PROGRESS; + return err; +} + +/* + * Get ATAPI_FORMAT_UNIT progress indication. + * + * Userland gives a pointer to an int. The int is set to a progress + * indicator 0-65536, with 65536=100%. + * + * If the drive does not support format progress indication, we just check + * the dsc bit, and return either 0 or 65536. + */ + +static int ide_floppy_get_format_progress(ide_drive_t *drive, int __user *arg) +{ + idefloppy_floppy_t *floppy = drive->driver_data; + struct ide_atapi_pc pc; + int progress_indication = 0x10000; + + if (drive->atapi_flags & IDE_AFLAG_SRFP) { + ide_floppy_create_request_sense_cmd(&pc); + if (ide_queue_pc_tail(drive, floppy->disk, &pc)) + return -EIO; + + if (floppy->sense_key == 2 && + floppy->asc == 4 && + floppy->ascq == 4) + progress_indication = floppy->progress_indication; + + /* Else assume format_unit has finished, and we're at 0x10000 */ + } else { + ide_hwif_t *hwif = drive->hwif; + unsigned long flags; + u8 stat; + + local_irq_save(flags); + stat = hwif->tp_ops->read_status(hwif); + local_irq_restore(flags); + + progress_indication = ((stat & ATA_DSC) == 0) ? 0 : 0x10000; + } + + if (put_user(progress_indication, arg)) + return -EFAULT; + + return 0; +} + +int ide_floppy_format_ioctl(ide_drive_t *drive, struct file *file, + unsigned int cmd, void __user *argp) +{ + switch (cmd) { + case IDEFLOPPY_IOCTL_FORMAT_SUPPORTED: + return 0; + case IDEFLOPPY_IOCTL_FORMAT_GET_CAPACITY: + return ide_floppy_get_format_capacities(drive, argp); + case IDEFLOPPY_IOCTL_FORMAT_START: + if (!(file->f_mode & 2)) + return -EPERM; + return ide_floppy_format_unit(drive, (int __user *)argp); + case IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS: + return ide_floppy_get_format_progress(drive, argp); + default: + return -ENOTTY; + } +} diff --git a/drivers/ide/ide-generic.c b/drivers/ide/ide-generic.c index 8fe8b5b9cf7..0a3cb0c33ae 100644 --- a/drivers/ide/ide-generic.c +++ b/drivers/ide/ide-generic.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/ide.h> +#include <linux/pci_ids.h> /* FIXME: convert m32r to use ide_platform host driver */ #ifdef CONFIG_M32R @@ -27,7 +28,7 @@ #define DRV_NAME "ide_generic" -static int probe_mask = 0x03; +static int probe_mask; module_param(probe_mask, int, 0); MODULE_PARM_DESC(probe_mask, "probe mask for legacy ISA IDE ports"); @@ -100,19 +101,65 @@ static const u16 legacy_bases[] = { 0x1f0, 0x170, 0x1e8, 0x168, 0x1e0, 0x160 }; static const int legacy_irqs[] = { 14, 15, 11, 10, 8, 12 }; #endif +static void ide_generic_check_pci_legacy_iobases(int *primary, int *secondary) +{ + struct pci_dev *p = NULL; + u16 val; + + for_each_pci_dev(p) { + + if (pci_resource_start(p, 0) == 0x1f0) + *primary = 1; + if (pci_resource_start(p, 2) == 0x170) + *secondary = 1; + + /* Cyrix CS55{1,2}0 pre SFF MWDMA ATA on the bridge */ + if (p->vendor == PCI_VENDOR_ID_CYRIX && + (p->device == PCI_DEVICE_ID_CYRIX_5510 || + p->device == PCI_DEVICE_ID_CYRIX_5520)) + *primary = *secondary = 1; + + /* Intel MPIIX - PIO ATA on non PCI side of bridge */ + if (p->vendor == PCI_VENDOR_ID_INTEL && + p->device == PCI_DEVICE_ID_INTEL_82371MX) { + + pci_read_config_word(p, 0x6C, &val); + if (val & 0x8000) { + /* ATA port enabled */ + if (val & 0x4000) + *secondary = 1; + else + *primary = 1; + } + } + } +} + static int __init ide_generic_init(void) { hw_regs_t hw[MAX_HWIFS], *hws[MAX_HWIFS]; struct ide_host *host; unsigned long io_addr; - int i, rc; + int i, rc, primary = 0, secondary = 0; #ifdef CONFIG_MIPS if (!ide_probe_legacy()) return -ENODEV; #endif - printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" module " - "parameter for probing all legacy ISA IDE ports\n"); + ide_generic_check_pci_legacy_iobases(&primary, &secondary); + + if (!probe_mask) { + printk(KERN_INFO DRV_NAME ": please use \"probe_mask=0x3f\" " + "module parameter for probing all legacy ISA IDE ports\n"); + + if (primary == 0) + probe_mask |= 0x1; + + if (secondary == 0) + probe_mask |= 0x2; + } else + printk(KERN_INFO DRV_NAME ": enforcing probing of I/O ports " + "upon user request\n"); memset(hws, 0, sizeof(hw_regs_t *) * MAX_HWIFS); diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index a896a283f27..1c51949833b 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -40,6 +40,7 @@ #include <linux/pci.h> #include <linux/delay.h> #include <linux/ide.h> +#include <linux/hdreg.h> #include <linux/completion.h> #include <linux/reboot.h> #include <linux/cdrom.h> @@ -183,18 +184,18 @@ static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request * if (drive->media != ide_disk) break; /* Not supported? Switch to next step now. */ - if (!drive->wcache || !ide_id_has_flush_cache(drive->id)) { + if (!drive->wcache || ata_id_flush_enabled(drive->id) == 0) { ide_complete_power_step(drive, rq, 0, 0); return ide_stopped; } - if (ide_id_has_flush_cache_ext(drive->id)) - args->tf.command = WIN_FLUSH_CACHE_EXT; + if (ata_id_flush_ext_enabled(drive->id)) + args->tf.command = ATA_CMD_FLUSH_EXT; else - args->tf.command = WIN_FLUSH_CACHE; + args->tf.command = ATA_CMD_FLUSH; goto out_do_tf; case idedisk_pm_standby: /* Suspend step 2 (standby) */ - args->tf.command = WIN_STANDBYNOW1; + args->tf.command = ATA_CMD_STANDBYNOW1; goto out_do_tf; case idedisk_pm_restore_pio: /* Resume step 1 (restore PIO) */ @@ -209,7 +210,7 @@ static ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request * return ide_stopped; case idedisk_pm_idle: /* Resume step 2 (idle) */ - args->tf.command = WIN_IDLEIMMEDIATE; + args->tf.command = ATA_CMD_IDLEIMMEDIATE; goto out_do_tf; case ide_pm_restore_dma: /* Resume step 3 (restore DMA) */ @@ -322,7 +323,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) ide_task_t *task = (ide_task_t *)rq->special; if (rq->errors == 0) - rq->errors = !OK_STAT(stat, READY_STAT, BAD_STAT); + rq->errors = !OK_STAT(stat, ATA_DRDY, BAD_STAT); if (task) { struct ide_taskfile *tf = &task->tf; @@ -373,29 +374,29 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8 { ide_hwif_t *hwif = drive->hwif; - if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { + if ((stat & ATA_BUSY) || ((stat & ATA_DF) && !drive->nowerr)) { /* other bits are useless when BUSY */ rq->errors |= ERROR_RESET; - } else if (stat & ERR_STAT) { + } else if (stat & ATA_ERR) { /* err has different meaning on cdrom and tape */ - if (err == ABRT_ERR) { + if (err == ATA_ABORTED) { if (drive->select.b.lba && - /* some newer drives don't support WIN_SPECIFY */ - hwif->tp_ops->read_status(hwif) == WIN_SPECIFY) + /* some newer drives don't support ATA_CMD_INIT_DEV_PARAMS */ + hwif->tp_ops->read_status(hwif) == ATA_CMD_INIT_DEV_PARAMS) return ide_stopped; } else if ((err & BAD_CRC) == BAD_CRC) { /* UDMA crc error, just retry the operation */ drive->crc_count++; - } else if (err & (BBD_ERR | ECC_ERR)) { + } else if (err & (ATA_BBK | ATA_UNC)) { /* retries won't help these */ rq->errors = ERROR_MAX; - } else if (err & TRK0_ERR) { + } else if (err & ATA_TRK0NF) { /* help it find track zero */ rq->errors |= ERROR_RECAL; } } - if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ && + if ((stat & ATA_DRQ) && rq_data_dir(rq) == READ && (hwif->host_flags & IDE_HFLAG_ERROR_STOPS_FIFO) == 0) { int nsect = drive->mult_count ? drive->mult_count : 1; @@ -407,7 +408,7 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8 return ide_stopped; } - if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) + if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) rq->errors |= ERROR_RESET; if ((rq->errors & ERROR_RESET) == ERROR_RESET) { @@ -427,16 +428,16 @@ static ide_startstop_t ide_atapi_error(ide_drive_t *drive, struct request *rq, u { ide_hwif_t *hwif = drive->hwif; - if (stat & BUSY_STAT || ((stat & WRERR_STAT) && !drive->nowerr)) { + if ((stat & ATA_BUSY) || ((stat & ATA_DF) && !drive->nowerr)) { /* other bits are useless when BUSY */ rq->errors |= ERROR_RESET; } else { /* add decoding error stuff */ } - if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) + if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) /* force an abort */ - hwif->tp_ops->exec_command(hwif, WIN_IDLEIMMEDIATE); + hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); if (rq->errors >= ERROR_MAX) { ide_kill_rq(drive, rq); @@ -509,19 +510,19 @@ static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf) tf->lbam = drive->cyl; tf->lbah = drive->cyl >> 8; tf->device = ((drive->head - 1) | drive->select.all) & ~ATA_LBA; - tf->command = WIN_SPECIFY; + tf->command = ATA_CMD_INIT_DEV_PARAMS; } static void ide_tf_set_restore_cmd(ide_drive_t *drive, struct ide_taskfile *tf) { tf->nsect = drive->sect; - tf->command = WIN_RESTORE; + tf->command = ATA_CMD_RESTORE; } static void ide_tf_set_setmult_cmd(ide_drive_t *drive, struct ide_taskfile *tf) { tf->nsect = drive->mult_req; - tf->command = WIN_SETMULT; + tf->command = ATA_CMD_SET_MULTI; } static ide_startstop_t ide_disk_special(ide_drive_t *drive) @@ -540,8 +541,6 @@ static ide_startstop_t ide_disk_special(ide_drive_t *drive) ide_tf_set_restore_cmd(drive, &args.tf); } else if (s->b.set_multmode) { s->b.set_multmode = 0; - if (drive->mult_req > drive->id->max_multsect) - drive->mult_req = drive->id->max_multsect; ide_tf_set_setmult_cmd(drive, &args.tf); } else if (s->all) { int special = s->all; @@ -586,9 +585,10 @@ static int set_pio_mode_abuse(ide_hwif_t *hwif, u8 req_pio) * do_special - issue some special commands * @drive: drive the command is for * - * do_special() is used to issue WIN_SPECIFY, WIN_RESTORE, and WIN_SETMULT - * commands to a drive. It used to do much more, but has been scaled - * back. + * do_special() is used to issue ATA_CMD_INIT_DEV_PARAMS, + * ATA_CMD_RESTORE and ATA_CMD_SET_MULTI commands to a drive. + * + * It used to do much more, but has been scaled back. */ static ide_startstop_t do_special (ide_drive_t *drive) @@ -716,9 +716,49 @@ static ide_startstop_t execute_drive_cmd (ide_drive_t *drive, return ide_stopped; } +int ide_devset_execute(ide_drive_t *drive, const struct ide_devset *setting, + int arg) +{ + struct request_queue *q = drive->queue; + struct request *rq; + int ret = 0; + + if (!(setting->flags & DS_SYNC)) + return setting->set(drive, arg); + + rq = blk_get_request(q, READ, GFP_KERNEL); + if (!rq) + return -ENOMEM; + + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_len = 5; + rq->cmd[0] = REQ_DEVSET_EXEC; + *(int *)&rq->cmd[1] = arg; + rq->special = setting->set; + + if (blk_execute_rq(q, NULL, rq, 0)) + ret = rq->errors; + blk_put_request(rq); + + return ret; +} +EXPORT_SYMBOL_GPL(ide_devset_execute); + static ide_startstop_t ide_special_rq(ide_drive_t *drive, struct request *rq) { switch (rq->cmd[0]) { + case REQ_DEVSET_EXEC: + { + int err, (*setfunc)(ide_drive_t *, int) = rq->special; + + err = setfunc(drive, *(int *)&rq->cmd[1]); + if (err) + rq->errors = err; + else + err = 1; + ide_end_request(drive, err, 0); + return ide_stopped; + } case REQ_DRIVE_RESET: return ide_do_reset(drive); default: @@ -766,9 +806,7 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) * start_request - start of I/O and command issuing for IDE * * start_request() initiates handling of a new I/O request. It - * accepts commands and I/O (read/write) requests. It also does - * the final remapping for weird stuff like EZDrive. Once - * device mapper can work sector level the EZDrive stuff can go away + * accepts commands and I/O (read/write) requests. * * FIXME: this function needs a rename */ @@ -776,7 +814,6 @@ static void ide_check_pm_state(ide_drive_t *drive, struct request *rq) static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - sector_t block; BUG_ON(!blk_rq_started(rq)); @@ -791,21 +828,12 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) goto kill_rq; } - block = rq->sector; - if (blk_fs_request(rq) && - (drive->media == ide_disk || drive->media == ide_floppy)) { - block += drive->sect0; - } - /* Yecch - this will shift the entire interval, - possibly killing some innocent following sector */ - if (block == 0 && drive->remap_0_to_1 == 1) - block = 1; /* redirect MBR access to EZ-Drive partn table */ - if (blk_pm_request(rq)) ide_check_pm_state(drive, rq); SELECT_DRIVE(drive); - if (ide_wait_stat(&startstop, drive, drive->ready_stat, BUSY_STAT|DRQ_STAT, WAIT_READY)) { + if (ide_wait_stat(&startstop, drive, drive->ready_stat, + ATA_BUSY | ATA_DRQ, WAIT_READY)) { printk(KERN_ERR "%s: drive not ready for command\n", drive->name); return startstop; } @@ -844,7 +872,8 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq) return ide_special_rq(drive, rq); drv = *(ide_driver_t **)rq->rq_disk->private_data; - return drv->do_request(drive, rq, block); + + return drv->do_request(drive, rq, rq->sector); } return do_special(drive); kill_rq: @@ -1325,7 +1354,7 @@ static void unexpected_intr (int irq, ide_hwgroup_t *hwgroup) if (hwif->irq == irq) { stat = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(stat, READY_STAT, BAD_STAT)) { + if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) { /* Try to not flood the console with msgs */ static unsigned long last_msgtime, count; ++count; diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c new file mode 100644 index 00000000000..cf01564901a --- /dev/null +++ b/drivers/ide/ide-ioctls.c @@ -0,0 +1,290 @@ +/* + * IDE ioctls handling. + */ + +#include <linux/hdreg.h> +#include <linux/ide.h> + +static const struct ide_ioctl_devset ide_ioctl_settings[] = { +{ HDIO_GET_32BIT, HDIO_SET_32BIT, &ide_devset_io_32bit }, +{ HDIO_GET_KEEPSETTINGS, HDIO_SET_KEEPSETTINGS, &ide_devset_keepsettings }, +{ HDIO_GET_UNMASKINTR, HDIO_SET_UNMASKINTR, &ide_devset_unmaskirq }, +{ HDIO_GET_DMA, HDIO_SET_DMA, &ide_devset_using_dma }, +{ -1, HDIO_SET_PIO_MODE, &ide_devset_pio_mode }, +{ 0 } +}; + +int ide_setting_ioctl(ide_drive_t *drive, struct block_device *bdev, + unsigned int cmd, unsigned long arg, + const struct ide_ioctl_devset *s) +{ + const struct ide_devset *ds; + unsigned long flags; + int err = -EOPNOTSUPP; + + for (; (ds = s->setting); s++) { + if (ds->get && s->get_ioctl == cmd) + goto read_val; + else if (ds->set && s->set_ioctl == cmd) + goto set_val; + } + + return err; + +read_val: + mutex_lock(&ide_setting_mtx); + spin_lock_irqsave(&ide_lock, flags); + err = ds->get(drive); + spin_unlock_irqrestore(&ide_lock, flags); + mutex_unlock(&ide_setting_mtx); + return err >= 0 ? put_user(err, (long __user *)arg) : err; + +set_val: + if (bdev != bdev->bd_contains) + err = -EINVAL; + else { + if (!capable(CAP_SYS_ADMIN)) + err = -EACCES; + else { + mutex_lock(&ide_setting_mtx); + err = ide_devset_execute(drive, ds, arg); + mutex_unlock(&ide_setting_mtx); + } + } + return err; +} +EXPORT_SYMBOL_GPL(ide_setting_ioctl); + +static int ide_get_identity_ioctl(ide_drive_t *drive, unsigned int cmd, + unsigned long arg) +{ + u16 *id = NULL; + int size = (cmd == HDIO_GET_IDENTITY) ? (ATA_ID_WORDS * 2) : 142; + int rc = 0; + + if (drive->id_read == 0) { + rc = -ENOMSG; + goto out; + } + + id = kmalloc(size, GFP_KERNEL); + if (id == NULL) { + rc = -ENOMEM; + goto out; + } + + memcpy(id, drive->id, size); + ata_id_to_hd_driveid(id); + + if (copy_to_user((void __user *)arg, id, size)) + rc = -EFAULT; + + kfree(id); +out: + return rc; +} + +static int ide_get_nice_ioctl(ide_drive_t *drive, unsigned long arg) +{ + return put_user((drive->dsc_overlap << IDE_NICE_DSC_OVERLAP) | + (drive->nice1 << IDE_NICE_1), (long __user *)arg); +} + +static int ide_set_nice_ioctl(ide_drive_t *drive, unsigned long arg) +{ + if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1)))) + return -EPERM; + + if (((arg >> IDE_NICE_DSC_OVERLAP) & 1) && + (drive->media == ide_disk || drive->media == ide_floppy || + drive->scsi)) + return -EPERM; + + drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1; + drive->nice1 = (arg >> IDE_NICE_1) & 1; + + return 0; +} + +static int ide_cmd_ioctl(ide_drive_t *drive, unsigned cmd, unsigned long arg) +{ + u8 *buf = NULL; + int bufsize = 0, err = 0; + u8 args[4], xfer_rate = 0; + ide_task_t tfargs; + struct ide_taskfile *tf = &tfargs.tf; + u16 *id = drive->id; + + if (NULL == (void *) arg) { + struct request *rq; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_ATA_TASKFILE; + err = blk_execute_rq(drive->queue, NULL, rq, 0); + blk_put_request(rq); + + return err; + } + + if (copy_from_user(args, (void __user *)arg, 4)) + return -EFAULT; + + memset(&tfargs, 0, sizeof(ide_task_t)); + tf->feature = args[2]; + if (args[0] == ATA_CMD_SMART) { + tf->nsect = args[3]; + tf->lbal = args[1]; + tf->lbam = 0x4f; + tf->lbah = 0xc2; + tfargs.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_IN_NSECT; + } else { + tf->nsect = args[1]; + tfargs.tf_flags = IDE_TFLAG_OUT_FEATURE | + IDE_TFLAG_OUT_NSECT | IDE_TFLAG_IN_NSECT; + } + tf->command = args[0]; + tfargs.data_phase = args[3] ? TASKFILE_IN : TASKFILE_NO_DATA; + + if (args[3]) { + tfargs.tf_flags |= IDE_TFLAG_IO_16BIT; + bufsize = SECTOR_SIZE * args[3]; + buf = kzalloc(bufsize, GFP_KERNEL); + if (buf == NULL) + return -ENOMEM; + } + + if (tf->command == ATA_CMD_SET_FEATURES && + tf->feature == SETFEATURES_XFER && + tf->nsect >= XFER_SW_DMA_0 && + (id[ATA_ID_UDMA_MODES] || + id[ATA_ID_MWDMA_MODES] || + id[ATA_ID_SWDMA_MODES])) { + xfer_rate = args[1]; + if (tf->nsect > XFER_UDMA_2 && !eighty_ninty_three(drive)) { + printk(KERN_WARNING "%s: UDMA speeds >UDMA33 cannot " + "be set\n", drive->name); + goto abort; + } + } + + err = ide_raw_taskfile(drive, &tfargs, buf, args[3]); + + args[0] = tf->status; + args[1] = tf->error; + args[2] = tf->nsect; + + if (!err && xfer_rate) { + /* active-retuning-calls future */ + ide_set_xfer_rate(drive, xfer_rate); + ide_driveid_update(drive); + } +abort: + if (copy_to_user((void __user *)arg, &args, 4)) + err = -EFAULT; + if (buf) { + if (copy_to_user((void __user *)(arg + 4), buf, bufsize)) + err = -EFAULT; + kfree(buf); + } + return err; +} + +static int ide_task_ioctl(ide_drive_t *drive, unsigned cmd, unsigned long arg) +{ + void __user *p = (void __user *)arg; + int err = 0; + u8 args[7]; + ide_task_t task; + + if (copy_from_user(args, p, 7)) + return -EFAULT; + + memset(&task, 0, sizeof(task)); + memcpy(&task.tf_array[7], &args[1], 6); + task.tf.command = args[0]; + task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; + + err = ide_no_data_taskfile(drive, &task); + + args[0] = task.tf.command; + memcpy(&args[1], &task.tf_array[7], 6); + + if (copy_to_user(p, args, 7)) + err = -EFAULT; + + return err; +} + +static int generic_drive_reset(ide_drive_t *drive) +{ + struct request *rq; + int ret = 0; + + rq = blk_get_request(drive->queue, READ, __GFP_WAIT); + rq->cmd_type = REQ_TYPE_SPECIAL; + rq->cmd_len = 1; + rq->cmd[0] = REQ_DRIVE_RESET; + rq->cmd_flags |= REQ_SOFTBARRIER; + if (blk_execute_rq(drive->queue, NULL, rq, 1)) + ret = rq->errors; + blk_put_request(rq); + return ret; +} + +int generic_ide_ioctl(ide_drive_t *drive, struct file *file, + struct block_device *bdev, + unsigned int cmd, unsigned long arg) +{ + int err; + + err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_ioctl_settings); + if (err != -EOPNOTSUPP) + return err; + + switch (cmd) { + case HDIO_OBSOLETE_IDENTITY: + case HDIO_GET_IDENTITY: + if (bdev != bdev->bd_contains) + return -EINVAL; + return ide_get_identity_ioctl(drive, cmd, arg); + case HDIO_GET_NICE: + return ide_get_nice_ioctl(drive, arg); + case HDIO_SET_NICE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return ide_set_nice_ioctl(drive, arg); +#ifdef CONFIG_IDE_TASK_IOCTL + case HDIO_DRIVE_TASKFILE: + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + return -EACCES; + if (drive->media == ide_disk) + return ide_taskfile_ioctl(drive, cmd, arg); + return -ENOMSG; +#endif + case HDIO_DRIVE_CMD: + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + return ide_cmd_ioctl(drive, cmd, arg); + case HDIO_DRIVE_TASK: + if (!capable(CAP_SYS_RAWIO)) + return -EACCES; + return ide_task_ioctl(drive, cmd, arg); + case HDIO_DRIVE_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return generic_drive_reset(drive); + case HDIO_GET_BUSSTATE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (put_user(BUSSTATE_ON, (long __user *)arg)) + return -EFAULT; + return 0; + case HDIO_SET_BUSSTATE: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} +EXPORT_SYMBOL(generic_ide_ioctl); diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index 2cbadffe922..0a2fd3b37ac 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -18,7 +18,6 @@ #include <linux/slab.h> #include <linux/pci.h> #include <linux/delay.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/bitops.h> #include <linux/nmi.h> @@ -400,97 +399,14 @@ const struct ide_tp_ops default_tp_ops = { .output_data = ide_output_data, }; -void ide_fix_driveid (struct hd_driveid *id) +void ide_fix_driveid(u16 *id) { #ifndef __LITTLE_ENDIAN # ifdef __BIG_ENDIAN int i; - u16 *stringcast; - - id->config = __le16_to_cpu(id->config); - id->cyls = __le16_to_cpu(id->cyls); - id->reserved2 = __le16_to_cpu(id->reserved2); - id->heads = __le16_to_cpu(id->heads); - id->track_bytes = __le16_to_cpu(id->track_bytes); - id->sector_bytes = __le16_to_cpu(id->sector_bytes); - id->sectors = __le16_to_cpu(id->sectors); - id->vendor0 = __le16_to_cpu(id->vendor0); - id->vendor1 = __le16_to_cpu(id->vendor1); - id->vendor2 = __le16_to_cpu(id->vendor2); - stringcast = (u16 *)&id->serial_no[0]; - for (i = 0; i < (20/2); i++) - stringcast[i] = __le16_to_cpu(stringcast[i]); - id->buf_type = __le16_to_cpu(id->buf_type); - id->buf_size = __le16_to_cpu(id->buf_size); - id->ecc_bytes = __le16_to_cpu(id->ecc_bytes); - stringcast = (u16 *)&id->fw_rev[0]; - for (i = 0; i < (8/2); i++) - stringcast[i] = __le16_to_cpu(stringcast[i]); - stringcast = (u16 *)&id->model[0]; - for (i = 0; i < (40/2); i++) - stringcast[i] = __le16_to_cpu(stringcast[i]); - id->dword_io = __le16_to_cpu(id->dword_io); - id->reserved50 = __le16_to_cpu(id->reserved50); - id->field_valid = __le16_to_cpu(id->field_valid); - id->cur_cyls = __le16_to_cpu(id->cur_cyls); - id->cur_heads = __le16_to_cpu(id->cur_heads); - id->cur_sectors = __le16_to_cpu(id->cur_sectors); - id->cur_capacity0 = __le16_to_cpu(id->cur_capacity0); - id->cur_capacity1 = __le16_to_cpu(id->cur_capacity1); - id->lba_capacity = __le32_to_cpu(id->lba_capacity); - id->dma_1word = __le16_to_cpu(id->dma_1word); - id->dma_mword = __le16_to_cpu(id->dma_mword); - id->eide_pio_modes = __le16_to_cpu(id->eide_pio_modes); - id->eide_dma_min = __le16_to_cpu(id->eide_dma_min); - id->eide_dma_time = __le16_to_cpu(id->eide_dma_time); - id->eide_pio = __le16_to_cpu(id->eide_pio); - id->eide_pio_iordy = __le16_to_cpu(id->eide_pio_iordy); - for (i = 0; i < 2; ++i) - id->words69_70[i] = __le16_to_cpu(id->words69_70[i]); - for (i = 0; i < 4; ++i) - id->words71_74[i] = __le16_to_cpu(id->words71_74[i]); - id->queue_depth = __le16_to_cpu(id->queue_depth); - for (i = 0; i < 4; ++i) - id->words76_79[i] = __le16_to_cpu(id->words76_79[i]); - id->major_rev_num = __le16_to_cpu(id->major_rev_num); - id->minor_rev_num = __le16_to_cpu(id->minor_rev_num); - id->command_set_1 = __le16_to_cpu(id->command_set_1); - id->command_set_2 = __le16_to_cpu(id->command_set_2); - id->cfsse = __le16_to_cpu(id->cfsse); - id->cfs_enable_1 = __le16_to_cpu(id->cfs_enable_1); - id->cfs_enable_2 = __le16_to_cpu(id->cfs_enable_2); - id->csf_default = __le16_to_cpu(id->csf_default); - id->dma_ultra = __le16_to_cpu(id->dma_ultra); - id->trseuc = __le16_to_cpu(id->trseuc); - id->trsEuc = __le16_to_cpu(id->trsEuc); - id->CurAPMvalues = __le16_to_cpu(id->CurAPMvalues); - id->mprc = __le16_to_cpu(id->mprc); - id->hw_config = __le16_to_cpu(id->hw_config); - id->acoustic = __le16_to_cpu(id->acoustic); - id->msrqs = __le16_to_cpu(id->msrqs); - id->sxfert = __le16_to_cpu(id->sxfert); - id->sal = __le16_to_cpu(id->sal); - id->spg = __le32_to_cpu(id->spg); - id->lba_capacity_2 = __le64_to_cpu(id->lba_capacity_2); - for (i = 0; i < 22; i++) - id->words104_125[i] = __le16_to_cpu(id->words104_125[i]); - id->last_lun = __le16_to_cpu(id->last_lun); - id->word127 = __le16_to_cpu(id->word127); - id->dlf = __le16_to_cpu(id->dlf); - id->csfo = __le16_to_cpu(id->csfo); - for (i = 0; i < 26; i++) - id->words130_155[i] = __le16_to_cpu(id->words130_155[i]); - id->word156 = __le16_to_cpu(id->word156); - for (i = 0; i < 3; i++) - id->words157_159[i] = __le16_to_cpu(id->words157_159[i]); - id->cfa_power = __le16_to_cpu(id->cfa_power); - for (i = 0; i < 15; i++) - id->words161_175[i] = __le16_to_cpu(id->words161_175[i]); - for (i = 0; i < 30; i++) - id->words176_205[i] = __le16_to_cpu(id->words176_205[i]); - for (i = 0; i < 49; i++) - id->words206_254[i] = __le16_to_cpu(id->words206_254[i]); - id->integrity_word = __le16_to_cpu(id->integrity_word); + + for (i = 0; i < 256; i++) + id[i] = __le16_to_cpu(id[i]); # else # error "Please fix <asm/byteorder.h>" # endif @@ -501,19 +417,21 @@ void ide_fix_driveid (struct hd_driveid *id) * ide_fixstring() cleans up and (optionally) byte-swaps a text string, * removing leading/trailing blanks and compressing internal blanks. * It is primarily used to tidy up the model name/number fields as - * returned by the WIN_[P]IDENTIFY commands. + * returned by the ATA_CMD_ID_ATA[PI] commands. */ void ide_fixstring (u8 *s, const int bytecount, const int byteswap) { - u8 *p = s, *end = &s[bytecount & ~1]; /* bytecount must be even */ + u8 *p, *end = &s[bytecount & ~1]; /* bytecount must be even */ if (byteswap) { /* convert from big-endian to host byte order */ - for (p = end ; p != s;) - be16_to_cpus((u16 *)(p -= 2)); + for (p = s ; p != end ; p += 2) + be16_to_cpus((u16 *) p); } + /* strip leading blanks */ + p = s; while (s != end && *s == ' ') ++s; /* compress internal blanks and strip trailing blanks */ @@ -556,7 +474,7 @@ int drive_is_ready (ide_drive_t *drive) /* Note: this may clear a pending IRQ!! */ stat = hwif->tp_ops->read_status(hwif); - if (stat & BUSY_STAT) + if (stat & ATA_BUSY) /* drive busy: definitely not interrupting */ return 0; @@ -588,10 +506,10 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti udelay(1); /* spec allows drive 400ns to assert "BUSY" */ stat = tp_ops->read_status(hwif); - if (stat & BUSY_STAT) { + if (stat & ATA_BUSY) { local_irq_set(flags); timeout += jiffies; - while ((stat = tp_ops->read_status(hwif)) & BUSY_STAT) { + while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) { if (time_after(jiffies, timeout)) { /* * One last read after the timeout in case @@ -599,7 +517,7 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti * progress during the timeout.. */ stat = tp_ops->read_status(hwif); - if (!(stat & BUSY_STAT)) + if ((stat & ATA_BUSY) == 0) break; local_irq_restore(flags); @@ -660,18 +578,18 @@ EXPORT_SYMBOL(ide_wait_stat); /** * ide_in_drive_list - look for drive in black/white list * @id: drive identifier - * @drive_table: list to inspect + * @table: list to inspect * * Look for a drive in the blacklist and the whitelist tables * Returns 1 if the drive is found in the table. */ -int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table) +int ide_in_drive_list(u16 *id, const struct drive_list_entry *table) { - for ( ; drive_table->id_model; drive_table++) - if ((!strcmp(drive_table->id_model, id->model)) && - (!drive_table->id_firmware || - strstr(id->fw_rev, drive_table->id_firmware))) + for ( ; table->id_model; table++) + if ((!strcmp(table->id_model, (char *)&id[ATA_ID_PROD])) && + (!table->id_firmware || + strstr((char *)&id[ATA_ID_FW_REV], table->id_firmware))) return 1; return 0; } @@ -702,7 +620,7 @@ static const struct drive_list_entry ivb_list[] = { u8 eighty_ninty_three (ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - struct hd_driveid *id = drive->id; + u16 *id = drive->id; int ivb = ide_in_drive_list(id, ivb_list); if (hwif->cbl == ATA_CBL_PATA40_SHORT) @@ -712,7 +630,7 @@ u8 eighty_ninty_three (ide_drive_t *drive) printk(KERN_DEBUG "%s: skipping word 93 validity check\n", drive->name); - if (ide_dev_is_sata(id) && !ivb) + if (ata_id_is_sata(id) && !ivb) return 1; if (hwif->cbl != ATA_CBL_PATA80 && !ivb) @@ -724,7 +642,8 @@ u8 eighty_ninty_three (ide_drive_t *drive) * - force bit13 (80c cable present) check also for !ivb devices * (unless the slave device is pre-ATA3) */ - if ((id->hw_config & 0x4000) || (ivb && (id->hw_config & 0x2000))) + if ((id[ATA_ID_HW_CONFIG] & 0x4000) || + (ivb && (id[ATA_ID_HW_CONFIG] & 0x2000))) return 1; no_80w: @@ -745,8 +664,8 @@ int ide_driveid_update(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; - struct hd_driveid *id; - unsigned long timeout, flags; + u16 *id; + unsigned long flags; u8 stat; /* @@ -757,29 +676,24 @@ int ide_driveid_update(ide_drive_t *drive) SELECT_MASK(drive, 1); tp_ops->set_irq(hwif, 0); msleep(50); - tp_ops->exec_command(hwif, WIN_IDENTIFY); - timeout = jiffies + WAIT_WORSTCASE; - do { - if (time_after(jiffies, timeout)) { - SELECT_MASK(drive, 0); - return 0; /* drive timed-out */ - } + tp_ops->exec_command(hwif, ATA_CMD_ID_ATA); - msleep(50); /* give drive a breather */ - stat = tp_ops->read_altstatus(hwif); - } while (stat & BUSY_STAT); + if (ide_busy_sleep(hwif, WAIT_WORSTCASE, 1)) { + SELECT_MASK(drive, 0); + return 0; + } - msleep(50); /* wait for IRQ and DRQ_STAT */ + msleep(50); /* wait for IRQ and ATA_DRQ */ stat = tp_ops->read_status(hwif); - if (!OK_STAT(stat, DRQ_STAT, BAD_R_STAT)) { + if (!OK_STAT(stat, ATA_DRQ, BAD_R_STAT)) { SELECT_MASK(drive, 0); printk("%s: CHECK for good STATUS\n", drive->name); return 0; } local_irq_save(flags); SELECT_MASK(drive, 0); - id = kmalloc(SECTOR_WORDS*4, GFP_ATOMIC); + id = kmalloc(SECTOR_SIZE, GFP_ATOMIC); if (!id) { local_irq_restore(flags); return 0; @@ -789,16 +703,16 @@ int ide_driveid_update(ide_drive_t *drive) local_irq_enable(); local_irq_restore(flags); ide_fix_driveid(id); - if (id) { - drive->id->dma_ultra = id->dma_ultra; - drive->id->dma_mword = id->dma_mword; - drive->id->dma_1word = id->dma_1word; - /* anything more ? */ - kfree(id); - - if (drive->using_dma && ide_id_dma_bug(drive)) - ide_dma_off(drive); - } + + drive->id[ATA_ID_UDMA_MODES] = id[ATA_ID_UDMA_MODES]; + drive->id[ATA_ID_MWDMA_MODES] = id[ATA_ID_MWDMA_MODES]; + drive->id[ATA_ID_SWDMA_MODES] = id[ATA_ID_SWDMA_MODES]; + /* anything more ? */ + + kfree(id); + + if (drive->using_dma && ide_id_dma_bug(drive)) + ide_dma_off(drive); return 1; } @@ -807,6 +721,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) { ide_hwif_t *hwif = drive->hwif; const struct ide_tp_ops *tp_ops = hwif->tp_ops; + u16 *id = drive->id, i; int error = 0; u8 stat; ide_task_t task; @@ -817,7 +732,7 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) #endif /* Skip setting PIO flow-control modes on pre-EIDE drives */ - if ((speed & 0xf8) == XFER_PIO_0 && !(drive->id->capability & 0x08)) + if ((speed & 0xf8) == XFER_PIO_0 && ata_id_has_iordy(drive->id) == 0) goto skip; /* @@ -851,13 +766,13 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) tp_ops->tf_load(drive, &task); - tp_ops->exec_command(hwif, WIN_SETFEATURES); + tp_ops->exec_command(hwif, ATA_CMD_SET_FEATURES); if (drive->quirk_list == 2) tp_ops->set_irq(hwif, 1); error = __ide_wait_stat(drive, drive->ready_stat, - BUSY_STAT|DRQ_STAT|ERR_STAT, + ATA_BUSY | ATA_DRQ | ATA_ERR, WAIT_CMD, &stat); SELECT_MASK(drive, 0); @@ -869,9 +784,9 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) return error; } - drive->id->dma_ultra &= ~0xFF00; - drive->id->dma_mword &= ~0x0F00; - drive->id->dma_1word &= ~0x0F00; + id[ATA_ID_UDMA_MODES] &= ~0xFF00; + id[ATA_ID_MWDMA_MODES] &= ~0x0F00; + id[ATA_ID_SWDMA_MODES] &= ~0x0F00; skip: #ifdef CONFIG_BLK_DEV_IDEDMA @@ -881,23 +796,17 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed) ide_dma_off_quietly(drive); #endif - switch(speed) { - case XFER_UDMA_7: drive->id->dma_ultra |= 0x8080; break; - case XFER_UDMA_6: drive->id->dma_ultra |= 0x4040; break; - case XFER_UDMA_5: drive->id->dma_ultra |= 0x2020; break; - case XFER_UDMA_4: drive->id->dma_ultra |= 0x1010; break; - case XFER_UDMA_3: drive->id->dma_ultra |= 0x0808; break; - case XFER_UDMA_2: drive->id->dma_ultra |= 0x0404; break; - case XFER_UDMA_1: drive->id->dma_ultra |= 0x0202; break; - case XFER_UDMA_0: drive->id->dma_ultra |= 0x0101; break; - case XFER_MW_DMA_2: drive->id->dma_mword |= 0x0404; break; - case XFER_MW_DMA_1: drive->id->dma_mword |= 0x0202; break; - case XFER_MW_DMA_0: drive->id->dma_mword |= 0x0101; break; - case XFER_SW_DMA_2: drive->id->dma_1word |= 0x0404; break; - case XFER_SW_DMA_1: drive->id->dma_1word |= 0x0202; break; - case XFER_SW_DMA_0: drive->id->dma_1word |= 0x0101; break; - default: break; + if (speed >= XFER_UDMA_0) { + i = 1 << (speed - XFER_UDMA_0); + id[ATA_ID_UDMA_MODES] |= (i << 8 | i); + } else if (speed >= XFER_MW_DMA_0) { + i = 1 << (speed - XFER_MW_DMA_0); + id[ATA_ID_MWDMA_MODES] |= (i << 8 | i); + } else if (speed >= XFER_SW_DMA_0) { + i = 1 << (speed - XFER_SW_DMA_0); + id[ATA_ID_SWDMA_MODES] |= (i << 8 | i); } + if (!drive->init_speed) drive->init_speed = speed; drive->current_speed = speed; @@ -977,7 +886,7 @@ void ide_execute_pkt_cmd(ide_drive_t *drive) unsigned long flags; spin_lock_irqsave(&ide_lock, flags); - hwif->tp_ops->exec_command(hwif, WIN_PACKETCMD); + hwif->tp_ops->exec_command(hwif, ATA_CMD_PACKET); ndelay(400); spin_unlock_irqrestore(&ide_lock, flags); } @@ -1010,7 +919,7 @@ static ide_startstop_t atapi_reset_pollfunc (ide_drive_t *drive) udelay (10); stat = hwif->tp_ops->read_status(hwif); - if (OK_STAT(stat, 0, BUSY_STAT)) + if (OK_STAT(stat, 0, ATA_BUSY)) printk("%s: ATAPI reset complete\n", drive->name); else { if (time_before(jiffies, hwgroup->poll_timeout)) { @@ -1056,7 +965,7 @@ static ide_startstop_t reset_pollfunc (ide_drive_t *drive) tmp = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(tmp, 0, BUSY_STAT)) { + if (!OK_STAT(tmp, 0, ATA_BUSY)) { if (time_before(jiffies, hwgroup->poll_timeout)) { ide_set_handler(drive, &reset_pollfunc, HZ/20, NULL); /* continue polling */ @@ -1102,7 +1011,7 @@ out: static void ide_disk_pre_reset(ide_drive_t *drive) { - int legacy = (drive->id->cfs_enable_2 & 0x0400) ? 0 : 1; + int legacy = (drive->id[ATA_ID_CFS_ENABLE_2] & 0x0400) ? 0 : 1; drive->special.all = 0; drive->special.b.set_geometry = legacy; @@ -1187,7 +1096,7 @@ static ide_startstop_t do_reset1 (ide_drive_t *drive, int do_not_try_atapi) pre_reset(drive); SELECT_DRIVE(drive); udelay (20); - tp_ops->exec_command(hwif, WIN_SRST); + tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); ndelay(400); hwgroup->poll_timeout = jiffies + WAIT_WORSTCASE; hwgroup->polling = 1; @@ -1270,7 +1179,7 @@ int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout) */ mdelay(1); stat = hwif->tp_ops->read_status(hwif); - if ((stat & BUSY_STAT) == 0) + if ((stat & ATA_BUSY) == 0) return 0; /* * Assume a value of 0xff means nothing is connected to diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 97fefabea8b..ed426dd0fdd 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -2,7 +2,6 @@ #include <linux/string.h> #include <linux/kernel.h> #include <linux/interrupt.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/bitops.h> @@ -90,29 +89,31 @@ static u8 ide_rate_filter(ide_drive_t *drive, u8 speed) u8 ide_get_best_pio_mode (ide_drive_t *drive, u8 mode_wanted, u8 max_mode) { - int pio_mode; - struct hd_driveid* id = drive->id; - int overridden = 0; + u16 *id = drive->id; + int pio_mode = -1, overridden = 0; if (mode_wanted != 255) return min_t(u8, mode_wanted, max_mode); - if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0 && - (pio_mode = ide_scan_pio_blacklist(id->model)) != -1) { + if ((drive->hwif->host_flags & IDE_HFLAG_PIO_NO_BLACKLIST) == 0) + pio_mode = ide_scan_pio_blacklist((char *)&id[ATA_ID_PROD]); + + if (pio_mode != -1) { printk(KERN_INFO "%s: is on PIO blacklist\n", drive->name); } else { - pio_mode = id->tPIO; + pio_mode = id[ATA_ID_OLD_PIO_MODES] >> 8; if (pio_mode > 2) { /* 2 is maximum allowed tPIO value */ pio_mode = 2; overridden = 1; } - if (id->field_valid & 2) { /* drive implements ATA2? */ - if (id->capability & 8) { /* IORDY supported? */ - if (id->eide_pio_modes & 7) { + + if (id[ATA_ID_FIELD_VALID] & 2) { /* ATA2? */ + if (ata_id_has_iordy(id)) { + if (id[ATA_ID_PIO_MODES] & 7) { overridden = 0; - if (id->eide_pio_modes & 4) + if (id[ATA_ID_PIO_MODES] & 4) pio_mode = 5; - else if (id->eide_pio_modes & 2) + else if (id[ATA_ID_PIO_MODES] & 2) pio_mode = 4; else pio_mode = 3; @@ -338,16 +339,16 @@ static void ide_dump_sector(ide_drive_t *drive) static void ide_dump_ata_error(ide_drive_t *drive, u8 err) { printk("{ "); - if (err & ABRT_ERR) printk("DriveStatusError "); - if (err & ICRC_ERR) - printk((err & ABRT_ERR) ? "BadCRC " : "BadSector "); - if (err & ECC_ERR) printk("UncorrectableError "); - if (err & ID_ERR) printk("SectorIdNotFound "); - if (err & TRK0_ERR) printk("TrackZeroNotFound "); - if (err & MARK_ERR) printk("AddrMarkNotFound "); + if (err & ATA_ABORTED) printk("DriveStatusError "); + if (err & ATA_ICRC) + printk((err & ATA_ABORTED) ? "BadCRC " : "BadSector "); + if (err & ATA_UNC) printk("UncorrectableError "); + if (err & ATA_IDNF) printk("SectorIdNotFound "); + if (err & ATA_TRK0NF) printk("TrackZeroNotFound "); + if (err & ATA_AMNF) printk("AddrMarkNotFound "); printk("}"); - if ((err & (BBD_ERR | ABRT_ERR)) == BBD_ERR || - (err & (ECC_ERR|ID_ERR|MARK_ERR))) { + if ((err & (ATA_BBK | ATA_ABORTED)) == ATA_BBK || + (err & (ATA_UNC | ATA_IDNF | ATA_AMNF))) { ide_dump_sector(drive); if (HWGROUP(drive) && HWGROUP(drive)->rq) printk(", sector=%llu", @@ -359,12 +360,12 @@ static void ide_dump_ata_error(ide_drive_t *drive, u8 err) static void ide_dump_atapi_error(ide_drive_t *drive, u8 err) { printk("{ "); - if (err & ILI_ERR) printk("IllegalLengthIndication "); - if (err & EOM_ERR) printk("EndOfMedia "); - if (err & ABRT_ERR) printk("AbortedCommand "); - if (err & MCR_ERR) printk("MediaChangeRequested "); - if (err & LFS_ERR) printk("LastFailedSense=0x%02x ", - (err & LFS_ERR) >> 4); + if (err & ATAPI_ILI) printk("IllegalLengthIndication "); + if (err & ATAPI_EOM) printk("EndOfMedia "); + if (err & ATA_ABORTED) printk("AbortedCommand "); + if (err & ATA_MCR) printk("MediaChangeRequested "); + if (err & ATAPI_LFS) printk("LastFailedSense=0x%02x ", + (err & ATAPI_LFS) >> 4); printk("}\n"); } @@ -386,19 +387,19 @@ u8 ide_dump_status(ide_drive_t *drive, const char *msg, u8 stat) local_irq_save(flags); printk("%s: %s: status=0x%02x { ", drive->name, msg, stat); - if (stat & BUSY_STAT) + if (stat & ATA_BUSY) printk("Busy "); else { - if (stat & READY_STAT) printk("DriveReady "); - if (stat & WRERR_STAT) printk("DeviceFault "); - if (stat & SEEK_STAT) printk("SeekComplete "); - if (stat & DRQ_STAT) printk("DataRequest "); - if (stat & ECC_STAT) printk("CorrectedError "); - if (stat & INDEX_STAT) printk("Index "); - if (stat & ERR_STAT) printk("Error "); + if (stat & ATA_DRDY) printk("DriveReady "); + if (stat & ATA_DF) printk("DeviceFault "); + if (stat & ATA_DSC) printk("SeekComplete "); + if (stat & ATA_DRQ) printk("DataRequest "); + if (stat & ATA_CORR) printk("CorrectedError "); + if (stat & ATA_IDX) printk("Index "); + if (stat & ATA_ERR) printk("Error "); } printk("}\n"); - if ((stat & (BUSY_STAT|ERR_STAT)) == ERR_STAT) { + if ((stat & (ATA_BUSY | ATA_ERR)) == ATA_ERR) { err = ide_read_error(drive); printk("%s: %s: error=0x%02x ", drive->name, msg, err); if (drive->media == ide_disk) diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 70aa86c8807..06575a12b63 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -50,59 +50,54 @@ static void generic_id(ide_drive_t *drive) { - drive->id->cyls = drive->cyl; - drive->id->heads = drive->head; - drive->id->sectors = drive->sect; - drive->id->cur_cyls = drive->cyl; - drive->id->cur_heads = drive->head; - drive->id->cur_sectors = drive->sect; + u16 *id = drive->id; + + id[ATA_ID_CUR_CYLS] = id[ATA_ID_CYLS] = drive->cyl; + id[ATA_ID_CUR_HEADS] = id[ATA_ID_HEADS] = drive->head; + id[ATA_ID_CUR_SECTORS] = id[ATA_ID_SECTORS] = drive->sect; } static void ide_disk_init_chs(ide_drive_t *drive) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; /* Extract geometry if we did not already have one for the drive */ if (!drive->cyl || !drive->head || !drive->sect) { - drive->cyl = drive->bios_cyl = id->cyls; - drive->head = drive->bios_head = id->heads; - drive->sect = drive->bios_sect = id->sectors; + drive->cyl = drive->bios_cyl = id[ATA_ID_CYLS]; + drive->head = drive->bios_head = id[ATA_ID_HEADS]; + drive->sect = drive->bios_sect = id[ATA_ID_SECTORS]; } /* Handle logical geometry translation by the drive */ - if ((id->field_valid & 1) && id->cur_cyls && - id->cur_heads && (id->cur_heads <= 16) && id->cur_sectors) { - drive->cyl = id->cur_cyls; - drive->head = id->cur_heads; - drive->sect = id->cur_sectors; + if (ata_id_current_chs_valid(id)) { + drive->cyl = id[ATA_ID_CUR_CYLS]; + drive->head = id[ATA_ID_CUR_HEADS]; + drive->sect = id[ATA_ID_CUR_SECTORS]; } /* Use physical geometry if what we have still makes no sense */ - if (drive->head > 16 && id->heads && id->heads <= 16) { - drive->cyl = id->cyls; - drive->head = id->heads; - drive->sect = id->sectors; + if (drive->head > 16 && id[ATA_ID_HEADS] && id[ATA_ID_HEADS] <= 16) { + drive->cyl = id[ATA_ID_CYLS]; + drive->head = id[ATA_ID_HEADS]; + drive->sect = id[ATA_ID_SECTORS]; } } static void ide_disk_init_mult_count(ide_drive_t *drive) { - struct hd_driveid *id = drive->id; - - drive->mult_count = 0; - if (id->max_multsect) { -#ifdef CONFIG_IDEDISK_MULTI_MODE - id->multsect = ((id->max_multsect/2) > 1) ? id->max_multsect : 0; - id->multsect_valid = id->multsect ? 1 : 0; - drive->mult_req = id->multsect_valid ? id->max_multsect : 0; - drive->special.b.set_multmode = drive->mult_req ? 1 : 0; -#else /* original, pre IDE-NFG, per request of AC */ - drive->mult_req = 0; - if (drive->mult_req > id->max_multsect) - drive->mult_req = id->max_multsect; - if (drive->mult_req || ((id->multsect_valid & 1) && id->multsect)) + u16 *id = drive->id; + u8 max_multsect = id[ATA_ID_MAX_MULTSECT] & 0xff; + + if (max_multsect) { + if ((max_multsect / 2) > 1) + id[ATA_ID_MULTSECT] = max_multsect | 0x100; + else + id[ATA_ID_MULTSECT] &= ~0x1ff; + + drive->mult_req = id[ATA_ID_MULTSECT] & 0xff; + + if (drive->mult_req) drive->special.b.set_multmode = 1; -#endif } } @@ -119,10 +114,10 @@ static void ide_disk_init_mult_count(ide_drive_t *drive) static inline void do_identify (ide_drive_t *drive, u8 cmd) { ide_hwif_t *hwif = HWIF(drive); - int bswap = 1; - struct hd_driveid *id; + u16 *id = drive->id; + char *m = (char *)&id[ATA_ID_PROD]; + int bswap = 1, is_cfa; - id = drive->id; /* read 512 bytes of id info */ hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE); @@ -135,27 +130,28 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) ide_fix_driveid(id); /* - * WIN_IDENTIFY returns little-endian info, - * WIN_PIDENTIFY *usually* returns little-endian info. + * ATA_CMD_ID_ATA returns little-endian info, + * ATA_CMD_ID_ATAPI *usually* returns little-endian info. */ - if (cmd == WIN_PIDENTIFY) { - if ((id->model[0] == 'N' && id->model[1] == 'E') /* NEC */ - || (id->model[0] == 'F' && id->model[1] == 'X') /* Mitsumi */ - || (id->model[0] == 'P' && id->model[1] == 'i'))/* Pioneer */ + if (cmd == ATA_CMD_ID_ATAPI) { + if ((m[0] == 'N' && m[1] == 'E') || /* NEC */ + (m[0] == 'F' && m[1] == 'X') || /* Mitsumi */ + (m[0] == 'P' && m[1] == 'i')) /* Pioneer */ /* Vertos drives may still be weird */ - bswap ^= 1; + bswap ^= 1; } - ide_fixstring(id->model, sizeof(id->model), bswap); - ide_fixstring(id->fw_rev, sizeof(id->fw_rev), bswap); - ide_fixstring(id->serial_no, sizeof(id->serial_no), bswap); + + ide_fixstring(m, ATA_ID_PROD_LEN, bswap); + ide_fixstring((char *)&id[ATA_ID_FW_REV], ATA_ID_FW_REV_LEN, bswap); + ide_fixstring((char *)&id[ATA_ID_SERNO], ATA_ID_SERNO_LEN, bswap); /* we depend on this a lot! */ - id->model[sizeof(id->model)-1] = '\0'; + m[ATA_ID_PROD_LEN - 1] = '\0'; - if (strstr(id->model, "E X A B Y T E N E S T")) + if (strstr(m, "E X A B Y T E N E S T")) goto err_misc; - printk(KERN_INFO "%s: %s, ", drive->name, id->model); + printk(KERN_INFO "%s: %s, ", drive->name, m); drive->present = 1; drive->dead = 0; @@ -163,16 +159,16 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) /* * Check for an ATAPI device */ - if (cmd == WIN_PIDENTIFY) { - u8 type = (id->config >> 8) & 0x1f; + if (cmd == ATA_CMD_ID_ATAPI) { + u8 type = (id[ATA_ID_CONFIG] >> 8) & 0x1f; printk(KERN_CONT "ATAPI "); switch (type) { case ide_floppy: - if (!strstr(id->model, "CD-ROM")) { - if (!strstr(id->model, "oppy") && - !strstr(id->model, "poyp") && - !strstr(id->model, "ZIP")) + if (!strstr(m, "CD-ROM")) { + if (!strstr(m, "oppy") && + !strstr(m, "poyp") && + !strstr(m, "ZIP")) printk(KERN_CONT "cdrom or floppy?, assuming "); if (drive->media != ide_cdrom) { printk(KERN_CONT "FLOPPY"); @@ -186,8 +182,7 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) drive->removable = 1; #ifdef CONFIG_PPC /* kludge for Apple PowerBook internal zip */ - if (!strstr(id->model, "CD-ROM") && - strstr(id->model, "ZIP")) { + if (!strstr(m, "CD-ROM") && strstr(m, "ZIP")) { printk(KERN_CONT "FLOPPY"); type = ide_floppy; break; @@ -217,18 +212,15 @@ static inline void do_identify (ide_drive_t *drive, u8 cmd) * Not an ATAPI device: looks like a "regular" hard disk */ - /* - * 0x848a = CompactFlash device - * These are *not* removable in Linux definition of the term - */ + is_cfa = ata_id_is_cfa(id); - if ((id->config != 0x848a) && (id->config & (1<<7))) + /* CF devices are *not* removable in Linux definition of the term */ + if (is_cfa == 0 && (id[ATA_ID_CONFIG] & (1 << 7))) drive->removable = 1; drive->media = ide_disk; - printk(KERN_CONT "%s DISK drive\n", - (id->config == 0x848a) ? "CFA" : "ATA"); + printk(KERN_CONT "%s DISK drive\n", is_cfa ? "CFA" : "ATA"); return; @@ -268,7 +260,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) if (io_ports->ctl_addr) { a = tp_ops->read_altstatus(hwif); s = tp_ops->read_status(hwif); - if ((a ^ s) & ~INDEX_STAT) + if ((a ^ s) & ~ATA_IDX) /* ancient Seagate drives, broken interfaces */ printk(KERN_INFO "%s: probing with STATUS(0x%02x) " "instead of ALTSTATUS(0x%02x)\n", @@ -281,7 +273,7 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) /* set features register for atapi * identify command to be sure of reply */ - if (cmd == WIN_PIDENTIFY) { + if (cmd == ATA_CMD_ID_ATAPI) { ide_task_t task; memset(&task, 0, sizeof(task)); @@ -294,24 +286,16 @@ static int actual_try_to_identify (ide_drive_t *drive, u8 cmd) /* ask drive for ID */ tp_ops->exec_command(hwif, cmd); - timeout = ((cmd == WIN_IDENTIFY) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - timeout += jiffies; - do { - if (time_after(jiffies, timeout)) { - /* drive timed-out */ - return 1; - } - /* give drive a breather */ - msleep(50); - s = use_altstatus ? tp_ops->read_altstatus(hwif) - : tp_ops->read_status(hwif); - } while (s & BUSY_STAT); + timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2; - /* wait for IRQ and DRQ_STAT */ + if (ide_busy_sleep(hwif, timeout, use_altstatus)) + return 1; + + /* wait for IRQ and ATA_DRQ */ msleep(50); s = tp_ops->read_status(hwif); - if (OK_STAT(s, DRQ_STAT, BAD_R_STAT)) { + if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) { unsigned long flags; /* local CPU only; some systems need this */ @@ -387,19 +371,21 @@ static int try_to_identify (ide_drive_t *drive, u8 cmd) return retval; } -static int ide_busy_sleep(ide_hwif_t *hwif) +int ide_busy_sleep(ide_hwif_t *hwif, unsigned long timeout, int altstatus) { - unsigned long timeout = jiffies + WAIT_WORSTCASE; u8 stat; + timeout += jiffies; + do { - msleep(50); - stat = hwif->tp_ops->read_status(hwif); - if ((stat & BUSY_STAT) == 0) + msleep(50); /* give drive a breather */ + stat = altstatus ? hwif->tp_ops->read_altstatus(hwif) + : hwif->tp_ops->read_status(hwif); + if ((stat & ATA_BUSY) == 0) return 0; } while (time_before(jiffies, timeout)); - return 1; + return 1; /* drive timed-out */ } static u8 ide_read_device(ide_drive_t *drive) @@ -444,13 +430,13 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (drive->present) { /* avoid waiting for inappropriate probes */ - if ((drive->media != ide_disk) && (cmd == WIN_IDENTIFY)) + if (drive->media != ide_disk && cmd == ATA_CMD_ID_ATA) return 4; } #ifdef DEBUG printk(KERN_INFO "probing for %s: present=%d, media=%d, probetype=%s\n", drive->name, drive->present, drive->media, - (cmd == WIN_IDENTIFY) ? "ATA" : "ATAPI"); + (cmd == ATA_CMD_ID_ATA) ? "ATA" : "ATAPI"); #endif /* needed for some systems @@ -464,7 +450,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd) if (drive->select.b.unit != 0) { /* exit with drive0 selected */ SELECT_DRIVE(&hwif->drives[0]); - /* allow BUSY_STAT to assert & clear */ + /* allow ATA_BUSY to assert & clear */ msleep(50); } /* no i/f present: mmm.. this should be a 4 -ml */ @@ -473,8 +459,8 @@ static int do_probe (ide_drive_t *drive, u8 cmd) stat = tp_ops->read_status(hwif); - if (OK_STAT(stat, READY_STAT, BUSY_STAT) || - drive->present || cmd == WIN_PIDENTIFY) { + if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) || + drive->present || cmd == ATA_CMD_ID_ATAPI) { /* send cmd and wait */ if ((rc = try_to_identify(drive, cmd))) { /* failed: try again */ @@ -483,17 +469,17 @@ static int do_probe (ide_drive_t *drive, u8 cmd) stat = tp_ops->read_status(hwif); - if (stat == (BUSY_STAT | READY_STAT)) + if (stat == (ATA_BUSY | ATA_DRDY)) return 4; - if (rc == 1 && cmd == WIN_PIDENTIFY) { + if (rc == 1 && cmd == ATA_CMD_ID_ATAPI) { printk(KERN_ERR "%s: no response (status = 0x%02x), " "resetting drive\n", drive->name, stat); msleep(50); SELECT_DRIVE(drive); msleep(50); - tp_ops->exec_command(hwif, WIN_SRST); - (void)ide_busy_sleep(hwif); + tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET); + (void)ide_busy_sleep(hwif, WAIT_WORSTCASE, 0); rc = try_to_identify(drive, cmd); } @@ -526,13 +512,14 @@ static void enable_nest (ide_drive_t *drive) const struct ide_tp_ops *tp_ops = hwif->tp_ops; u8 stat; - printk(KERN_INFO "%s: enabling %s -- ", hwif->name, drive->id->model); + printk(KERN_INFO "%s: enabling %s -- ", + hwif->name, (char *)&drive->id[ATA_ID_PROD]); SELECT_DRIVE(drive); msleep(50); - tp_ops->exec_command(hwif, EXABYTE_ENABLE_NEST); + tp_ops->exec_command(hwif, ATA_EXABYTE_ENABLE_NEST); - if (ide_busy_sleep(hwif)) { + if (ide_busy_sleep(hwif, WAIT_WORSTCASE, 0)) { printk(KERN_CONT "failed (timeout)\n"); return; } @@ -545,12 +532,6 @@ static void enable_nest (ide_drive_t *drive) printk(KERN_CONT "failed (status = 0x%02x)\n", stat); else printk(KERN_CONT "success\n"); - - /* if !(success||timed-out) */ - if (do_probe(drive, WIN_IDENTIFY) >= 2) { - /* look for ATAPI device */ - (void) do_probe(drive, WIN_PIDENTIFY); - } } /** @@ -567,6 +548,8 @@ static void enable_nest (ide_drive_t *drive) static inline u8 probe_for_drive (ide_drive_t *drive) { + char *m; + /* * In order to keep things simple we have an id * block for all drives at all times. If the device @@ -576,29 +559,34 @@ static inline u8 probe_for_drive (ide_drive_t *drive) * Also note that 0 everywhere means "can't do X" */ - drive->id = kzalloc(SECTOR_WORDS *4, GFP_KERNEL); + drive->id = kzalloc(SECTOR_SIZE, GFP_KERNEL); drive->id_read = 0; if(drive->id == NULL) { printk(KERN_ERR "ide: out of memory for id data.\n"); return 0; } - strcpy(drive->id->model, "UNKNOWN"); - + + m = (char *)&drive->id[ATA_ID_PROD]; + strcpy(m, "UNKNOWN"); + /* skip probing? */ - if (!drive->noprobe) - { + if (!drive->noprobe) { +retry: /* if !(success||timed-out) */ - if (do_probe(drive, WIN_IDENTIFY) >= 2) { + if (do_probe(drive, ATA_CMD_ID_ATA) >= 2) /* look for ATAPI device */ - (void) do_probe(drive, WIN_PIDENTIFY); - } + (void)do_probe(drive, ATA_CMD_ID_ATAPI); + if (!drive->present) /* drive not found */ return 0; - if (strstr(drive->id->model, "E X A B Y T E N E S T")) + + if (strstr(m, "E X A B Y T E N E S T")) { enable_nest(drive); - + goto retry; + } + /* identification failed? */ if (!drive->id_read) { if (drive->media == ide_disk) { @@ -740,36 +728,38 @@ out: /** * ide_undecoded_slave - look for bad CF adapters - * @drive1: drive + * @dev1: slave device * * Analyse the drives on the interface and attempt to decide if we * have the same drive viewed twice. This occurs with crap CF adapters * and PCMCIA sometimes. */ -void ide_undecoded_slave(ide_drive_t *drive1) +void ide_undecoded_slave(ide_drive_t *dev1) { - ide_drive_t *drive0 = &drive1->hwif->drives[0]; + ide_drive_t *dev0 = &dev1->hwif->drives[0]; - if ((drive1->dn & 1) == 0 || drive0->present == 0) + if ((dev1->dn & 1) == 0 || dev0->present == 0) return; /* If the models don't match they are not the same product */ - if (strcmp(drive0->id->model, drive1->id->model)) + if (strcmp((char *)&dev0->id[ATA_ID_PROD], + (char *)&dev1->id[ATA_ID_PROD])) return; /* Serial numbers do not match */ - if (strncmp(drive0->id->serial_no, drive1->id->serial_no, 20)) + if (strncmp((char *)&dev0->id[ATA_ID_SERNO], + (char *)&dev1->id[ATA_ID_SERNO], ATA_ID_SERNO_LEN)) return; /* No serial number, thankfully very rare for CF */ - if (drive0->id->serial_no[0] == 0) + if (*(char *)&dev0->id[ATA_ID_SERNO] == 0) return; /* Appears to be an IDE flash adapter with decode bugs */ printk(KERN_WARNING "ide-probe: ignoring undecoded slave\n"); - drive1->present = 0; + dev1->present = 0; } EXPORT_SYMBOL_GPL(ide_undecoded_slave); @@ -853,7 +843,7 @@ static void ide_port_tune_devices(ide_hwif_t *hwif) if (hwif->host_flags & IDE_HFLAG_NO_IO_32BIT) drive->no_io_32bit = 1; else - drive->no_io_32bit = drive->id->dword_io ? 1 : 0; + drive->no_io_32bit = drive->id[ATA_ID_DWORD_IO] ? 1 : 0; } } @@ -1037,11 +1027,6 @@ static int init_irq (ide_hwif_t *hwif) ide_hwgroup_t *hwgroup; ide_hwif_t *match = NULL; - - BUG_ON(in_interrupt()); - BUG_ON(irqs_disabled()); - BUG_ON(hwif == NULL); - mutex_lock(&ide_cfg_mtx); hwif->hwgroup = NULL; #if MAX_HWIFS > 1 @@ -1116,7 +1101,8 @@ static int init_irq (ide_hwif_t *hwif) sa = IRQF_SHARED; #endif /* __mc68000__ */ - if (IDE_CHIPSET_IS_PCI(hwif->chipset)) + if (hwif->chipset == ide_pci || hwif->chipset == ide_cmd646 || + hwif->chipset == ide_ali14xx) sa = IRQF_SHARED; if (io_ports->ctl_addr) @@ -1344,8 +1330,6 @@ static void hwif_register_devices(ide_hwif_t *hwif) if (!drive->present) continue; - ide_add_generic_settings(drive); - snprintf(dev->bus_id, BUS_ID_SIZE, "%u.%u", hwif->index, i); dev->parent = &hwif->gendev; dev->bus = &ide_bus_type; @@ -1602,8 +1586,10 @@ struct ide_host *ide_host_alloc_all(const struct ide_port_info *d, if (hws[0]) host->dev[0] = hws[0]->dev; - if (d) + if (d) { + host->init_chipset = d->init_chipset; host->host_flags = d->host_flags; + } return host; } diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c index f66c9c3f6fc..e7030a49146 100644 --- a/drivers/ide/ide-proc.c +++ b/drivers/ide/ide-proc.c @@ -12,14 +12,6 @@ * "settings" files. e.g. "cat /proc/ide0/hda/settings" * To write a new value "val" into a specific setting "name", use: * echo "name:val" >/proc/ide/ide0/hda/settings - * - * Also useful, "cat /proc/ide0/hda/[identify, smart_values, - * smart_thresholds, capabilities]" will issue an IDENTIFY / - * PACKET_IDENTIFY / SMART_READ_VALUES / SMART_READ_THRESHOLDS / - * SENSE CAPABILITIES command to /dev/hda, and then dump out the - * returned data as 256 16-bit words. The "hdparm" utility will - * be updated someday soon to use this mechanism. - * */ #include <linux/module.h> @@ -31,7 +23,6 @@ #include <linux/mm.h> #include <linux/pci.h> #include <linux/ctype.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/seq_file.h> @@ -109,13 +100,14 @@ static int proc_ide_read_identify err = taskfile_lib_get_identify(drive, page); if (!err) { - char *out = ((char *)page) + (SECTOR_WORDS * 4); + char *out = (char *)page + SECTOR_SIZE; + page = out; do { out += sprintf(out, "%04x%c", le16_to_cpup(val), (++i & 7) ? ' ' : '\n'); val += 1; - } while (i < (SECTOR_WORDS * 2)); + } while (i < SECTOR_SIZE / 2); len = out - page; } } @@ -123,140 +115,25 @@ static int proc_ide_read_identify } /** - * __ide_add_setting - add an ide setting option - * @drive: drive to use - * @name: setting name - * @rw: true if the function is read write - * @data_type: type of data - * @min: range minimum - * @max: range maximum - * @mul_factor: multiplication scale - * @div_factor: divison scale - * @data: private data field - * @set: setting - * @auto_remove: setting auto removal flag - * - * Removes the setting named from the device if it is present. - * The function takes the settings_lock to protect against - * parallel changes. This function must not be called from IRQ - * context. Returns 0 on success or -1 on failure. - * - * BUGS: This code is seriously over-engineered. There is also - * magic about how the driver specific features are setup. If - * a driver is attached we assume the driver settings are auto - * remove. - */ - -static int __ide_add_setting(ide_drive_t *drive, const char *name, int rw, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set, int auto_remove) -{ - ide_settings_t **p = (ide_settings_t **) &drive->settings, *setting = NULL; - - mutex_lock(&ide_setting_mtx); - while ((*p) && strcmp((*p)->name, name) < 0) - p = &((*p)->next); - if ((setting = kzalloc(sizeof(*setting), GFP_KERNEL)) == NULL) - goto abort; - if ((setting->name = kmalloc(strlen(name) + 1, GFP_KERNEL)) == NULL) - goto abort; - strcpy(setting->name, name); - setting->rw = rw; - setting->data_type = data_type; - setting->min = min; - setting->max = max; - setting->mul_factor = mul_factor; - setting->div_factor = div_factor; - setting->data = data; - setting->set = set; - - setting->next = *p; - if (auto_remove) - setting->auto_remove = 1; - *p = setting; - mutex_unlock(&ide_setting_mtx); - return 0; -abort: - mutex_unlock(&ide_setting_mtx); - kfree(setting); - return -1; -} - -int ide_add_setting(ide_drive_t *drive, const char *name, int rw, int data_type, int min, int max, int mul_factor, int div_factor, void *data, ide_procset_t *set) -{ - return __ide_add_setting(drive, name, rw, data_type, min, max, mul_factor, div_factor, data, set, 1); -} - -EXPORT_SYMBOL(ide_add_setting); - -/** - * __ide_remove_setting - remove an ide setting option - * @drive: drive to use - * @name: setting name - * - * Removes the setting named from the device if it is present. - * The caller must hold the setting semaphore. - */ - -static void __ide_remove_setting(ide_drive_t *drive, char *name) -{ - ide_settings_t **p, *setting; - - p = (ide_settings_t **) &drive->settings; - - while ((*p) && strcmp((*p)->name, name)) - p = &((*p)->next); - setting = (*p); - if (setting == NULL) - return; - - (*p) = setting->next; - - kfree(setting->name); - kfree(setting); -} - -/** - * auto_remove_settings - remove driver specific settings - * @drive: drive - * - * Automatically remove all the driver specific settings for this - * drive. This function may not be called from IRQ context. The - * caller must hold ide_setting_mtx. - */ - -static void auto_remove_settings(ide_drive_t *drive) -{ - ide_settings_t *setting; -repeat: - setting = drive->settings; - while (setting) { - if (setting->auto_remove) { - __ide_remove_setting(drive, setting->name); - goto repeat; - } - setting = setting->next; - } -} - -/** - * ide_find_setting_by_name - find a drive specific setting - * @drive: drive to scan + * ide_find_setting - find a specific setting + * @st: setting table pointer * @name: setting name * - * Scan's the device setting table for a matching entry and returns + * Scan's the setting table for a matching entry and returns * this or NULL if no entry is found. The caller must hold the * setting semaphore */ -static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name) +static +const struct ide_proc_devset *ide_find_setting(const struct ide_proc_devset *st, + char *name) { - ide_settings_t *setting = drive->settings; - - while (setting) { - if (strcmp(setting->name, name) == 0) + while (st->name) { + if (strcmp(st->name, name) == 0) break; - setting = setting->next; + st++; } - return setting; + return st->name ? st : NULL; } /** @@ -272,26 +149,20 @@ static ide_settings_t *ide_find_setting_by_name(ide_drive_t *drive, char *name) * be told apart */ -static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting) +static int ide_read_setting(ide_drive_t *drive, + const struct ide_proc_devset *setting) { - int val = -EINVAL; - unsigned long flags; + const struct ide_devset *ds = setting->setting; + int val = -EINVAL; + + if (ds->get) { + unsigned long flags; - if ((setting->rw & SETTING_READ)) { spin_lock_irqsave(&ide_lock, flags); - switch (setting->data_type) { - case TYPE_BYTE: - val = *((u8 *) setting->data); - break; - case TYPE_SHORT: - val = *((u16 *) setting->data); - break; - case TYPE_INT: - val = *((u32 *) setting->data); - break; - } + val = ds->get(drive); spin_unlock_irqrestore(&ide_lock, flags); } + return val; } @@ -313,33 +184,23 @@ static int ide_read_setting(ide_drive_t *drive, ide_settings_t *setting) * The current scheme of polling is kludgy, though safe enough. */ -static int ide_write_setting(ide_drive_t *drive, ide_settings_t *setting, int val) +static int ide_write_setting(ide_drive_t *drive, + const struct ide_proc_devset *setting, int val) { + const struct ide_devset *ds = setting->setting; + if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (setting->set) - return setting->set(drive, val); - if (!(setting->rw & SETTING_WRITE)) + if (!ds->set) return -EPERM; - if (val < setting->min || val > setting->max) + if ((ds->flags & DS_SYNC) + && (val < setting->min || val > setting->max)) return -EINVAL; - if (ide_spin_wait_hwgroup(drive)) - return -EBUSY; - switch (setting->data_type) { - case TYPE_BYTE: - *((u8 *) setting->data) = val; - break; - case TYPE_SHORT: - *((u16 *) setting->data) = val; - break; - case TYPE_INT: - *((u32 *) setting->data) = val; - break; - } - spin_unlock_irq(&ide_lock); - return 0; + return ide_devset_execute(drive, ds, val); } +ide_devset_get(xfer_rate, current_speed); + static int set_xfer_rate (ide_drive_t *drive, int arg) { ide_task_t task; @@ -349,7 +210,7 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) return -EINVAL; memset(&task, 0, sizeof(task)); - task.tf.command = WIN_SETFEATURES; + task.tf.command = ATA_CMD_SET_FEATURES; task.tf.feature = SETFEATURES_XFER; task.tf.nsect = (u8)arg; task.tf_flags = IDE_TFLAG_OUT_FEATURE | IDE_TFLAG_OUT_NSECT | @@ -364,29 +225,23 @@ static int set_xfer_rate (ide_drive_t *drive, int arg) return err; } -/** - * ide_add_generic_settings - generic ide settings - * @drive: drive being configured - * - * Add the generic parts of the system settings to the /proc files. - * The caller must not be holding the ide_setting_mtx. - */ - -void ide_add_generic_settings (ide_drive_t *drive) -{ -/* - * drive setting name read/write access data type min max mul_factor div_factor data pointer set function - */ - __ide_add_setting(drive, "io_32bit", drive->no_io_32bit ? SETTING_READ : SETTING_RW, TYPE_BYTE, 0, 1 + (SUPPORT_VLB_SYNC << 1), 1, 1, &drive->io_32bit, set_io_32bit, 0); - __ide_add_setting(drive, "keepsettings", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->keep_settings, NULL, 0); - __ide_add_setting(drive, "nice1", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->nice1, NULL, 0); - __ide_add_setting(drive, "pio_mode", SETTING_WRITE, TYPE_BYTE, 0, 255, 1, 1, NULL, set_pio_mode, 0); - __ide_add_setting(drive, "unmaskirq", drive->no_unmask ? SETTING_READ : SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->unmask, NULL, 0); - __ide_add_setting(drive, "using_dma", SETTING_RW, TYPE_BYTE, 0, 1, 1, 1, &drive->using_dma, set_using_dma, 0); - __ide_add_setting(drive, "init_speed", SETTING_RW, TYPE_BYTE, 0, 70, 1, 1, &drive->init_speed, NULL, 0); - __ide_add_setting(drive, "current_speed", SETTING_RW, TYPE_BYTE, 0, 70, 1, 1, &drive->current_speed, set_xfer_rate, 0); - __ide_add_setting(drive, "number", SETTING_RW, TYPE_BYTE, 0, 3, 1, 1, &drive->dn, NULL, 0); -} +ide_devset_rw(current_speed, xfer_rate); +ide_devset_rw_field(init_speed, init_speed); +ide_devset_rw_field(nice1, nice1); +ide_devset_rw_field(number, dn); + +static const struct ide_proc_devset ide_generic_settings[] = { + IDE_PROC_DEVSET(current_speed, 0, 70), + IDE_PROC_DEVSET(init_speed, 0, 70), + IDE_PROC_DEVSET(io_32bit, 0, 1 + (SUPPORT_VLB_SYNC << 1)), + IDE_PROC_DEVSET(keepsettings, 0, 1), + IDE_PROC_DEVSET(nice1, 0, 1), + IDE_PROC_DEVSET(number, 0, 3), + IDE_PROC_DEVSET(pio_mode, 0, 255), + IDE_PROC_DEVSET(unmaskirq, 0, 1), + IDE_PROC_DEVSET(using_dma, 0, 1), + { 0 }, +}; static void proc_ide_settings_warn(void) { @@ -403,19 +258,32 @@ static void proc_ide_settings_warn(void) static int proc_ide_read_settings (char *page, char **start, off_t off, int count, int *eof, void *data) { + const struct ide_proc_devset *setting, *g, *d; + const struct ide_devset *ds; ide_drive_t *drive = (ide_drive_t *) data; - ide_settings_t *setting = (ide_settings_t *) drive->settings; char *out = page; int len, rc, mul_factor, div_factor; proc_ide_settings_warn(); mutex_lock(&ide_setting_mtx); + g = ide_generic_settings; + d = drive->settings; out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n"); out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n"); - while (setting) { - mul_factor = setting->mul_factor; - div_factor = setting->div_factor; + while (g->name || (d && d->name)) { + /* read settings in the alphabetical order */ + if (g->name && d && d->name) { + if (strcmp(d->name, g->name) < 0) + setting = d++; + else + setting = g++; + } else if (d && d->name) { + setting = d++; + } else + setting = g++; + mul_factor = setting->mulf ? setting->mulf(drive) : 1; + div_factor = setting->divf ? setting->divf(drive) : 1; out += sprintf(out, "%-24s", setting->name); rc = ide_read_setting(drive, setting); if (rc >= 0) @@ -423,12 +291,12 @@ static int proc_ide_read_settings else out += sprintf(out, "%-16s", "write-only"); out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor); - if (setting->rw & SETTING_READ) + ds = setting->setting; + if (ds->get) out += sprintf(out, "r"); - if (setting->rw & SETTING_WRITE) + if (ds->set) out += sprintf(out, "w"); out += sprintf(out, "\n"); - setting = setting->next; } len = out - page; mutex_unlock(&ide_setting_mtx); @@ -442,9 +310,10 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, { ide_drive_t *drive = (ide_drive_t *) data; char name[MAX_LEN + 1]; - int for_real = 0; + int for_real = 0, mul_factor, div_factor; unsigned long n; - ide_settings_t *setting; + + const struct ide_proc_devset *setting; char *buf, *s; if (!capable(CAP_SYS_ADMIN)) @@ -512,13 +381,21 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer, } mutex_lock(&ide_setting_mtx); - setting = ide_find_setting_by_name(drive, name); + /* generic settings first, then driver specific ones */ + setting = ide_find_setting(ide_generic_settings, name); if (!setting) { - mutex_unlock(&ide_setting_mtx); - goto parse_error; + if (drive->settings) + setting = ide_find_setting(drive->settings, name); + if (!setting) { + mutex_unlock(&ide_setting_mtx); + goto parse_error; + } + } + if (for_real) { + mul_factor = setting->mulf ? setting->mulf(drive) : 1; + div_factor = setting->divf ? setting->divf(drive) : 1; + ide_write_setting(drive, setting, val * div_factor / mul_factor); } - if (for_real) - ide_write_setting(drive, setting, val * setting->div_factor / setting->mul_factor); mutex_unlock(&ide_setting_mtx); } } while (!for_real++); @@ -561,11 +438,10 @@ static int proc_ide_read_dmodel (char *page, char **start, off_t off, int count, int *eof, void *data) { ide_drive_t *drive = (ide_drive_t *) data; - struct hd_driveid *id = drive->id; + char *m = (char *)&drive->id[ATA_ID_PROD]; int len; - len = sprintf(page, "%.40s\n", - (id && id->model[0]) ? (char *)id->model : "(none)"); + len = sprintf(page, "%.40s\n", m[0] ? m : "(none)"); PROC_IDE_READ_RETURN(page, start, off, count, eof, len); } @@ -690,6 +566,10 @@ static void ide_remove_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { + mutex_lock(&ide_setting_mtx); + drive->settings = driver->settings; + mutex_unlock(&ide_setting_mtx); + ide_add_proc_entries(drive->proc, driver->proc, drive); } @@ -726,7 +606,7 @@ void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) * OTOH both ide_{read,write}_setting are only ever used under * ide_setting_mtx. */ - auto_remove_settings(drive); + drive->settings = NULL; spin_unlock_irqrestore(&ide_lock, flags); mutex_unlock(&ide_setting_mtx); } diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 3833189144e..f8c84df4a0b 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -15,6 +15,8 @@ * Documentation/ide/ChangeLog.ide-tape.1995-2002 */ +#define DRV_NAME "ide-tape" + #define IDETAPE_VERSION "1.20" #include <linux/module.h> @@ -54,8 +56,6 @@ enum { DBG_CHRDEV = (1 << 2), /* all remaining procedures */ DBG_PROCS = (1 << 3), - /* buffer alloc info (pc_stack & rq_stack) */ - DBG_PCRQ_STACK = (1 << 4), }; /* define to see debug info */ @@ -81,26 +81,6 @@ enum { #define IDETAPE_MAX_PC_RETRIES 3 /* - * With each packet command, we allocate a buffer of IDETAPE_PC_BUFFER_SIZE - * bytes. This is used for several packet commands (Not for READ/WRITE commands) - */ -#define IDETAPE_PC_BUFFER_SIZE 256 - -/* - * In various places in the driver, we need to allocate storage - * for packet commands and requests, which will remain valid while - * we leave the driver to wait for an interrupt or a timeout event. - */ -#define IDETAPE_PC_STACK (10 + IDETAPE_MAX_PC_RETRIES) - -/* - * Some drives (for example, Seagate STT3401A Travan) require a very long - * timeout, because they don't return an interrupt or clear their busy bit - * until after the command completes (even retension commands). - */ -#define IDETAPE_WAIT_CMD (900*HZ) - -/* * The following parameter is used to select the point in the internal tape fifo * in which we will start to refill the buffer. Decreasing the following * parameter will improve the system's latency and interactive response, while @@ -172,20 +152,6 @@ struct idetape_bh { #define IDETAPE_LU_RETENSION_MASK 2 #define IDETAPE_LU_EOT_MASK 4 -/* - * Special requests for our block device strategy routine. - * - * In order to service a character device command, we add special requests to - * the tail of our block device request queue and wait for their completion. - */ - -enum { - REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */ - REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */ - REQ_IDETAPE_READ = (1 << 2), - REQ_IDETAPE_WRITE = (1 << 3), -}; - /* Error codes returned in rq->errors to the higher part of the driver. */ #define IDETAPE_ERROR_GENERAL 101 #define IDETAPE_ERROR_FILEMARK 102 @@ -206,13 +172,6 @@ typedef struct ide_tape_obj { struct kref kref; /* - * Since a typical character device operation requires more - * than one packet command, we provide here enough memory - * for the maximum of interconnected packet commands. - * The packet commands are stored in the circular array pc_stack. - * pc_stack_index points to the last used entry, and warps around - * to the start when we get to the last array entry. - * * pc points to the current processed packet command. * * failed_pc points to the last failed packet command, or contains @@ -224,13 +183,11 @@ typedef struct ide_tape_obj { struct ide_atapi_pc *pc; /* Last failed packet command */ struct ide_atapi_pc *failed_pc; - /* Packet command stack */ - struct ide_atapi_pc pc_stack[IDETAPE_PC_STACK]; - /* Next free packet command storage space */ - int pc_stack_index; - struct request rq_stack[IDETAPE_PC_STACK]; - /* We implement a circular array */ - int rq_stack_index; + /* used by REQ_IDETAPE_{READ,WRITE} requests */ + struct ide_atapi_pc queued_pc; + + struct ide_atapi_pc request_sense_pc; + struct request request_sense_rq; /* * DSC polling variables. @@ -451,47 +408,6 @@ static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc) } /* - * idetape_next_pc_storage returns a pointer to a place in which we can - * safely store a packet command, even though we intend to leave the - * driver. A storage space for a maximum of IDETAPE_PC_STACK packet - * commands is allocated at initialization time. - */ -static struct ide_atapi_pc *idetape_next_pc_storage(ide_drive_t *drive) -{ - idetape_tape_t *tape = drive->driver_data; - - debug_log(DBG_PCRQ_STACK, "pc_stack_index=%d\n", tape->pc_stack_index); - - if (tape->pc_stack_index == IDETAPE_PC_STACK) - tape->pc_stack_index = 0; - return (&tape->pc_stack[tape->pc_stack_index++]); -} - -/* - * idetape_next_rq_storage is used along with idetape_next_pc_storage. - * Since we queue packet commands in the request queue, we need to - * allocate a request, along with the allocation of a packet command. - */ - -/************************************************************** - * * - * This should get fixed to use kmalloc(.., GFP_ATOMIC) * - * followed later on by kfree(). -ml * - * * - **************************************************************/ - -static struct request *idetape_next_rq_storage(ide_drive_t *drive) -{ - idetape_tape_t *tape = drive->driver_data; - - debug_log(DBG_PCRQ_STACK, "rq_stack_index=%d\n", tape->rq_stack_index); - - if (tape->rq_stack_index == IDETAPE_PC_STACK) - tape->rq_stack_index = 0; - return (&tape->rq_stack[tape->rq_stack_index++]); -} - -/* * called on each failed packet command retry to analyze the request sense. We * currently do not utilize this information. */ @@ -667,61 +583,14 @@ static void ide_tape_callback(ide_drive_t *drive) idetape_end_request(drive, uptodate, 0); } -static void idetape_init_pc(struct ide_atapi_pc *pc) -{ - memset(pc->c, 0, 12); - pc->retries = 0; - pc->flags = 0; - pc->req_xfer = 0; - pc->buf = pc->pc_buf; - pc->buf_size = IDETAPE_PC_BUFFER_SIZE; - pc->bh = NULL; - pc->b_data = NULL; -} - static void idetape_create_request_sense_cmd(struct ide_atapi_pc *pc) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = REQUEST_SENSE; pc->c[4] = 20; pc->req_xfer = 20; } -static void idetape_init_rq(struct request *rq, u8 cmd) -{ - blk_rq_init(NULL, rq); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd[13] = cmd; -} - -/* - * Generate a new packet command request in front of the request queue, before - * the current request, so that it will be processed immediately, on the next - * pass through the driver. The function below is called from the request - * handling part of the driver (the "bottom" part). Safe storage for the request - * should be allocated with ide_tape_next_{pc,rq}_storage() prior to that. - * - * Memory for those requests is pre-allocated at initialization time, and is - * limited to IDETAPE_PC_STACK requests. We assume that we have enough space for - * the maximum possible number of inter-dependent packet commands. - * - * The higher level of the driver - The ioctl handler and the character device - * handling functions should queue request to the lower level part and wait for - * their completion using idetape_queue_pc_tail or idetape_queue_rw_tail. - */ -static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc, - struct request *rq) -{ - struct ide_tape_obj *tape = drive->driver_data; - - idetape_init_rq(rq, REQ_IDETAPE_PC1); - rq->cmd_flags |= REQ_PREEMPT; - rq->buffer = (char *) pc; - rq->rq_disk = tape->disk; - memcpy(rq->cmd, pc->c, 12); - ide_do_drive_cmd(drive, rq); -} - /* * idetape_retry_pc is called when an error was detected during the * last packet command. We queue a request sense packet command in @@ -729,15 +598,14 @@ static void idetape_queue_pc_head(ide_drive_t *drive, struct ide_atapi_pc *pc, */ static void idetape_retry_pc(ide_drive_t *drive) { - struct ide_atapi_pc *pc; - struct request *rq; + struct ide_tape_obj *tape = drive->driver_data; + struct request *rq = &tape->request_sense_rq; + struct ide_atapi_pc *pc = &tape->request_sense_pc; (void)ide_read_error(drive); - pc = idetape_next_pc_storage(drive); - rq = idetape_next_rq_storage(drive); idetape_create_request_sense_cmd(pc); set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags); - idetape_queue_pc_head(drive, pc, rq); + ide_queue_pc_head(drive, tape->disk, pc, rq); } /* @@ -766,13 +634,15 @@ static void ide_tape_handle_dsc(ide_drive_t *drive) idetape_postpone_request(drive); } -static void ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, +static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int bcount, int write) { if (write) idetape_output_buffers(drive, pc, bcount); else idetape_input_buffers(drive, pc, bcount); + + return bcount; } /* @@ -786,7 +656,7 @@ static ide_startstop_t idetape_pc_intr(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; - return ide_pc_intr(drive, tape->pc, idetape_pc_intr, IDETAPE_WAIT_CMD, + return ide_pc_intr(drive, tape->pc, idetape_pc_intr, WAIT_TAPE_CMD, NULL, idetape_update_buffers, idetape_retry_pc, ide_tape_handle_dsc, ide_tape_io_buffers); } @@ -832,7 +702,7 @@ static ide_startstop_t idetape_transfer_pc(ide_drive_t *drive) idetape_tape_t *tape = drive->driver_data; return ide_transfer_pc(drive, tape->pc, idetape_pc_intr, - IDETAPE_WAIT_CMD, NULL); + WAIT_TAPE_CMD, NULL); } static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, @@ -881,13 +751,13 @@ static ide_startstop_t idetape_issue_pc(ide_drive_t *drive, pc->retries++; return ide_issue_pc(drive, pc, idetape_transfer_pc, - IDETAPE_WAIT_CMD, NULL); + WAIT_TAPE_CMD, NULL); } /* A mode sense command is used to "sense" tape parameters. */ static void idetape_create_mode_sense_cmd(struct ide_atapi_pc *pc, u8 page_code) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = MODE_SENSE; if (page_code != IDETAPE_BLOCK_DESCRIPTOR) /* DBD = 1 - Don't return block descriptors */ @@ -920,8 +790,8 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive) stat = hwif->tp_ops->read_status(hwif); - if (stat & SEEK_STAT) { - if (stat & ERR_STAT) { + if (stat & ATA_DSC) { + if (stat & ATA_ERR) { /* Error detected */ if (pc->c[0] != TEST_UNIT_READY) printk(KERN_ERR "ide-tape: %s: I/O error, ", @@ -946,7 +816,7 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape, struct idetape_bh *bh = (struct idetape_bh *)rq->special; unsigned int length = rq->current_nr_sectors; - idetape_init_pc(pc); + ide_init_pc(pc); put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]); pc->c[1] = 1; pc->bh = bh; @@ -978,9 +848,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, struct request *postponed_rq = tape->postponed_rq; u8 stat; - debug_log(DBG_SENSE, "sector: %ld, nr_sectors: %ld," - " current_nr_sectors: %d\n", - rq->sector, rq->nr_sectors, rq->current_nr_sectors); + debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu," + " current_nr_sectors: %u\n", + (unsigned long long)rq->sector, rq->nr_sectors, + rq->current_nr_sectors); if (!blk_special_request(rq)) { /* We do not support buffer cache originated requests. */ @@ -1021,7 +892,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, } if (!test_and_clear_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags) && - (stat & SEEK_STAT) == 0) { + (stat & ATA_DSC) == 0) { if (postponed_rq == NULL) { tape->dsc_polling_start = jiffies; tape->dsc_poll_freq = tape->best_dsc_rw_freq; @@ -1043,12 +914,12 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, return ide_stopped; } if (rq->cmd[13] & REQ_IDETAPE_READ) { - pc = idetape_next_pc_storage(drive); + pc = &tape->queued_pc; ide_tape_create_rw_cmd(tape, pc, rq, READ_6); goto out; } if (rq->cmd[13] & REQ_IDETAPE_WRITE) { - pc = idetape_next_pc_storage(drive); + pc = &tape->queued_pc; ide_tape_create_rw_cmd(tape, pc, rq, WRITE_6); goto out; } @@ -1235,77 +1106,30 @@ static void idetape_init_merge_buffer(idetape_tape_t *tape) static void idetape_create_write_filemark_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc, int write_filemark) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = WRITE_FILEMARKS; pc->c[4] = write_filemark; pc->flags |= PC_FLAG_WAIT_FOR_DSC; } -static void idetape_create_test_unit_ready_cmd(struct ide_atapi_pc *pc) -{ - idetape_init_pc(pc); - pc->c[0] = TEST_UNIT_READY; -} - -/* - * We add a special packet command request to the tail of the request queue, and - * wait for it to be serviced. This is not to be called from within the request - * handling part of the driver! We allocate here data on the stack and it is - * valid until the request is finished. This is not the case for the bottom part - * of the driver, where we are always leaving the functions to wait for an - * interrupt or a timer event. - * - * From the bottom part of the driver, we should allocate safe memory using - * idetape_next_pc_storage() and ide_tape_next_rq_storage(), and add the request - * to the request list without waiting for it to be serviced! In that case, we - * usually use idetape_queue_pc_head(). - */ -static int idetape_queue_pc_tail(ide_drive_t *drive, struct ide_atapi_pc *pc) -{ - struct ide_tape_obj *tape = drive->driver_data; - struct request *rq; - int error; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd[13] = REQ_IDETAPE_PC1; - rq->buffer = (char *)pc; - memcpy(rq->cmd, pc->c, 12); - error = blk_execute_rq(drive->queue, tape->disk, rq, 0); - blk_put_request(rq); - return error; -} - -static void idetape_create_load_unload_cmd(ide_drive_t *drive, - struct ide_atapi_pc *pc, int cmd) -{ - idetape_init_pc(pc); - pc->c[0] = START_STOP; - pc->c[4] = cmd; - pc->flags |= PC_FLAG_WAIT_FOR_DSC; -} - static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout) { idetape_tape_t *tape = drive->driver_data; - struct ide_atapi_pc pc; + struct gendisk *disk = tape->disk; int load_attempted = 0; /* Wait for the tape to become ready */ set_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); timeout += jiffies; while (time_before(jiffies, timeout)) { - idetape_create_test_unit_ready_cmd(&pc); - if (!idetape_queue_pc_tail(drive, &pc)) + if (ide_do_test_unit_ready(drive, disk) == 0) return 0; if ((tape->sense_key == 2 && tape->asc == 4 && tape->ascq == 2) || (tape->asc == 0x3A)) { /* no media */ if (load_attempted) return -ENOMEDIUM; - idetape_create_load_unload_cmd(drive, &pc, - IDETAPE_LU_LOAD_MASK); - idetape_queue_pc_tail(drive, &pc); + ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK); load_attempted = 1; /* not about to be ready */ } else if (!(tape->sense_key == 2 && tape->asc == 4 && @@ -1318,11 +1142,12 @@ static int idetape_wait_ready(ide_drive_t *drive, unsigned long timeout) static int idetape_flush_tape_buffers(ide_drive_t *drive) { + struct ide_tape_obj *tape = drive->driver_data; struct ide_atapi_pc pc; int rc; idetape_create_write_filemark_cmd(drive, &pc, 0); - rc = idetape_queue_pc_tail(drive, &pc); + rc = ide_queue_pc_tail(drive, tape->disk, &pc); if (rc) return rc; idetape_wait_ready(drive, 60 * 5 * HZ); @@ -1331,7 +1156,7 @@ static int idetape_flush_tape_buffers(ide_drive_t *drive) static void idetape_create_read_position_cmd(struct ide_atapi_pc *pc) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = READ_POSITION; pc->req_xfer = 20; } @@ -1345,7 +1170,7 @@ static int idetape_read_position(ide_drive_t *drive) debug_log(DBG_PROCS, "Enter %s\n", __func__); idetape_create_read_position_cmd(&pc); - if (idetape_queue_pc_tail(drive, &pc)) + if (ide_queue_pc_tail(drive, tape->disk, &pc)) return -1; position = tape->first_frame; return position; @@ -1355,7 +1180,7 @@ static void idetape_create_locate_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc, unsigned int block, u8 partition, int skip) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = POSITION_TO_ELEMENT; pc->c[1] = 2; put_unaligned(cpu_to_be32(block), (unsigned int *) &pc->c[3]); @@ -1363,21 +1188,6 @@ static void idetape_create_locate_cmd(ide_drive_t *drive, pc->flags |= PC_FLAG_WAIT_FOR_DSC; } -static int idetape_create_prevent_cmd(ide_drive_t *drive, - struct ide_atapi_pc *pc, int prevent) -{ - idetape_tape_t *tape = drive->driver_data; - - /* device supports locking according to capabilities page */ - if (!(tape->caps[6] & 0x01)) - return 0; - - idetape_init_pc(pc); - pc->c[0] = ALLOW_MEDIUM_REMOVAL; - pc->c[4] = prevent; - return 1; -} - static void __ide_tape_discard_merge_buffer(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; @@ -1405,6 +1215,7 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block, u8 partition, int skip) { idetape_tape_t *tape = drive->driver_data; + struct gendisk *disk = tape->disk; int retval; struct ide_atapi_pc pc; @@ -1412,12 +1223,12 @@ static int idetape_position_tape(ide_drive_t *drive, unsigned int block, __ide_tape_discard_merge_buffer(drive); idetape_wait_ready(drive, 60 * 5 * HZ); idetape_create_locate_cmd(drive, &pc, block, partition, skip); - retval = idetape_queue_pc_tail(drive, &pc); + retval = ide_queue_pc_tail(drive, disk, &pc); if (retval) return (retval); idetape_create_read_position_cmd(&pc); - return (idetape_queue_pc_tail(drive, &pc)); + return ide_queue_pc_tail(drive, disk, &pc); } static void ide_tape_discard_merge_buffer(ide_drive_t *drive, @@ -1477,7 +1288,7 @@ static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks, static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = INQUIRY; pc->c[4] = 254; pc->req_xfer = 254; @@ -1486,14 +1297,14 @@ static void idetape_create_inquiry_cmd(struct ide_atapi_pc *pc) static void idetape_create_rewind_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = REZERO_UNIT; pc->flags |= PC_FLAG_WAIT_FOR_DSC; } static void idetape_create_erase_cmd(struct ide_atapi_pc *pc) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = ERASE; pc->c[1] = 1; pc->flags |= PC_FLAG_WAIT_FOR_DSC; @@ -1501,7 +1312,7 @@ static void idetape_create_erase_cmd(struct ide_atapi_pc *pc) static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd) { - idetape_init_pc(pc); + ide_init_pc(pc); pc->c[0] = SPACE; put_unaligned(cpu_to_be32(count), (unsigned int *) &pc->c[1]); pc->c[1] = cmd; @@ -1664,20 +1475,20 @@ static void idetape_pad_zeros(ide_drive_t *drive, int bcount) */ static int idetape_rewind_tape(ide_drive_t *drive) { + struct ide_tape_obj *tape = drive->driver_data; + struct gendisk *disk = tape->disk; int retval; struct ide_atapi_pc pc; - idetape_tape_t *tape; - tape = drive->driver_data; debug_log(DBG_SENSE, "Enter %s\n", __func__); idetape_create_rewind_cmd(drive, &pc); - retval = idetape_queue_pc_tail(drive, &pc); + retval = ide_queue_pc_tail(drive, disk, &pc); if (retval) return retval; idetape_create_read_position_cmd(&pc); - retval = idetape_queue_pc_tail(drive, &pc); + retval = ide_queue_pc_tail(drive, disk, &pc); if (retval) return retval; return 0; @@ -1720,6 +1531,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, int mt_count) { idetape_tape_t *tape = drive->driver_data; + struct gendisk *disk = tape->disk; struct ide_atapi_pc pc; int retval, count = 0; int sprev = !!(tape->caps[4] & 0x20); @@ -1744,7 +1556,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op, case MTBSF: idetape_create_space_cmd(&pc, mt_count - count, IDETAPE_SPACE_OVER_FILEMARK); - return idetape_queue_pc_tail(drive, &pc); + return ide_queue_pc_tail(drive, disk, &pc); case MTFSFM: case MTBSFM: if (!sprev) @@ -1933,11 +1745,12 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf, static int idetape_write_filemark(ide_drive_t *drive) { + struct ide_tape_obj *tape = drive->driver_data; struct ide_atapi_pc pc; /* Write a filemark */ idetape_create_write_filemark_cmd(drive, &pc, 1); - if (idetape_queue_pc_tail(drive, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc)) { printk(KERN_ERR "ide-tape: Couldn't write a filemark\n"); return -EIO; } @@ -1960,6 +1773,7 @@ static int idetape_write_filemark(ide_drive_t *drive) static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) { idetape_tape_t *tape = drive->driver_data; + struct gendisk *disk = tape->disk; struct ide_atapi_pc pc; int i, retval; @@ -1996,9 +1810,7 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) return 0; case MTLOAD: ide_tape_discard_merge_buffer(drive, 0); - idetape_create_load_unload_cmd(drive, &pc, - IDETAPE_LU_LOAD_MASK); - return idetape_queue_pc_tail(drive, &pc); + return ide_do_start_stop(drive, disk, IDETAPE_LU_LOAD_MASK); case MTUNLOAD: case MTOFFL: /* @@ -2006,14 +1818,11 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) * attempting to eject. */ if (tape->door_locked) { - if (idetape_create_prevent_cmd(drive, &pc, 0)) - if (!idetape_queue_pc_tail(drive, &pc)) - tape->door_locked = DOOR_UNLOCKED; + if (!ide_set_media_lock(drive, disk, 0)) + tape->door_locked = DOOR_UNLOCKED; } ide_tape_discard_merge_buffer(drive, 0); - idetape_create_load_unload_cmd(drive, &pc, - !IDETAPE_LU_LOAD_MASK); - retval = idetape_queue_pc_tail(drive, &pc); + retval = ide_do_start_stop(drive, disk, !IDETAPE_LU_LOAD_MASK); if (!retval) clear_bit(IDE_AFLAG_MEDIUM_PRESENT, &drive->atapi_flags); return retval; @@ -2022,16 +1831,15 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) return idetape_flush_tape_buffers(drive); case MTRETEN: ide_tape_discard_merge_buffer(drive, 0); - idetape_create_load_unload_cmd(drive, &pc, + return ide_do_start_stop(drive, disk, IDETAPE_LU_RETENSION_MASK | IDETAPE_LU_LOAD_MASK); - return idetape_queue_pc_tail(drive, &pc); case MTEOM: idetape_create_space_cmd(&pc, 0, IDETAPE_SPACE_TO_EOD); - return idetape_queue_pc_tail(drive, &pc); + return ide_queue_pc_tail(drive, disk, &pc); case MTERASE: (void)idetape_rewind_tape(drive); idetape_create_erase_cmd(&pc); - return idetape_queue_pc_tail(drive, &pc); + return ide_queue_pc_tail(drive, disk, &pc); case MTSETBLK: if (mt_count) { if (mt_count < tape->blk_size || @@ -2052,17 +1860,13 @@ static int idetape_mtioctop(ide_drive_t *drive, short mt_op, int mt_count) case MTFSR: case MTBSR: case MTLOCK: - if (!idetape_create_prevent_cmd(drive, &pc, 1)) - return 0; - retval = idetape_queue_pc_tail(drive, &pc); + retval = ide_set_media_lock(drive, disk, 1); if (retval) return retval; tape->door_locked = DOOR_EXPLICITLY_LOCKED; return 0; case MTUNLOCK: - if (!idetape_create_prevent_cmd(drive, &pc, 0)) - return 0; - retval = idetape_queue_pc_tail(drive, &pc); + retval = ide_set_media_lock(drive, disk, 0); if (retval) return retval; tape->door_locked = DOOR_UNLOCKED; @@ -2144,7 +1948,7 @@ static void ide_tape_get_bsize_from_bdesc(ide_drive_t *drive) struct ide_atapi_pc pc; idetape_create_mode_sense_cmd(&pc, IDETAPE_BLOCK_DESCRIPTOR); - if (idetape_queue_pc_tail(drive, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc)) { printk(KERN_ERR "ide-tape: Can't get block descriptor\n"); if (tape->blk_size == 0) { printk(KERN_WARNING "ide-tape: Cannot deal with zero " @@ -2164,7 +1968,6 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp) unsigned int minor = iminor(inode), i = minor & ~0xc0; ide_drive_t *drive; idetape_tape_t *tape; - struct ide_atapi_pc pc; int retval; if (i >= MAX_HWIFS * MAX_DRIVES) @@ -2227,11 +2030,9 @@ static int idetape_chrdev_open(struct inode *inode, struct file *filp) /* Lock the tape drive door so user can't eject. */ if (tape->chrdev_dir == IDETAPE_DIR_NONE) { - if (idetape_create_prevent_cmd(drive, &pc, 1)) { - if (!idetape_queue_pc_tail(drive, &pc)) { - if (tape->door_locked != DOOR_EXPLICITLY_LOCKED) - tape->door_locked = DOOR_LOCKED; - } + if (!ide_set_media_lock(drive, tape->disk, 1)) { + if (tape->door_locked != DOOR_EXPLICITLY_LOCKED) + tape->door_locked = DOOR_LOCKED; } } unlock_kernel(); @@ -2264,7 +2065,6 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp) { struct ide_tape_obj *tape = ide_tape_f(filp); ide_drive_t *drive = tape->drive; - struct ide_atapi_pc pc; unsigned int minor = iminor(inode); lock_kernel(); @@ -2283,10 +2083,8 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp) (void) idetape_rewind_tape(drive); if (tape->chrdev_dir == IDETAPE_DIR_NONE) { if (tape->door_locked == DOOR_LOCKED) { - if (idetape_create_prevent_cmd(drive, &pc, 0)) { - if (!idetape_queue_pc_tail(drive, &pc)) - tape->door_locked = DOOR_UNLOCKED; - } + if (!ide_set_media_lock(drive, tape->disk, 0)) + tape->door_locked = DOOR_UNLOCKED; } } clear_bit(IDE_AFLAG_BUSY, &drive->atapi_flags); @@ -2295,45 +2093,6 @@ static int idetape_chrdev_release(struct inode *inode, struct file *filp) return 0; } -/* - * check the contents of the ATAPI IDENTIFY command results. We return: - * - * 1 - If the tape can be supported by us, based on the information we have so - * far. - * - * 0 - If this tape driver is not currently supported by us. - */ -static int idetape_identify_device(ide_drive_t *drive) -{ - u8 gcw[2], protocol, device_type, removable, packet_size; - - if (drive->id_read == 0) - return 1; - - *((unsigned short *) &gcw) = drive->id->config; - - protocol = (gcw[1] & 0xC0) >> 6; - device_type = gcw[1] & 0x1F; - removable = !!(gcw[0] & 0x80); - packet_size = gcw[0] & 0x3; - - /* Check that we can support this device */ - if (protocol != 2) - printk(KERN_ERR "ide-tape: Protocol (0x%02x) is not ATAPI\n", - protocol); - else if (device_type != 1) - printk(KERN_ERR "ide-tape: Device type (0x%02x) is not set " - "to tape\n", device_type); - else if (!removable) - printk(KERN_ERR "ide-tape: The removable flag is not set\n"); - else if (packet_size != 0) { - printk(KERN_ERR "ide-tape: Packet size (0x%02x) is not 12" - " bytes\n", packet_size); - } else - return 1; - return 0; -} - static void idetape_get_inquiry_results(ide_drive_t *drive) { idetape_tape_t *tape = drive->driver_data; @@ -2341,7 +2100,7 @@ static void idetape_get_inquiry_results(ide_drive_t *drive) char fw_rev[4], vendor_id[8], product_id[16]; idetape_create_inquiry_cmd(&pc); - if (idetape_queue_pc_tail(drive, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc)) { printk(KERN_ERR "ide-tape: %s: can't get INQUIRY results\n", tape->name); return; @@ -2370,7 +2129,7 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) u8 speed, max_speed; idetape_create_mode_sense_cmd(&pc, IDETAPE_CAPABILITIES_PAGE); - if (idetape_queue_pc_tail(drive, &pc)) { + if (ide_queue_pc_tail(drive, tape->disk, &pc)) { printk(KERN_ERR "ide-tape: Can't get tape parameters - assuming" " some default values\n"); tape->blk_size = 512; @@ -2402,6 +2161,11 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) } memcpy(&tape->caps, caps, 20); + + /* device lacks locking support according to capabilities page */ + if ((caps[6] & 1) == 0) + drive->atapi_flags |= IDE_AFLAG_NO_DOORLOCK; + if (caps[7] & 0x02) tape->blk_size = 512; else if (caps[7] & 0x04) @@ -2409,28 +2173,56 @@ static void idetape_get_mode_sense_results(ide_drive_t *drive) } #ifdef CONFIG_IDE_PROC_FS -static void idetape_add_settings(ide_drive_t *drive) -{ - idetape_tape_t *tape = drive->driver_data; - - ide_add_setting(drive, "buffer", SETTING_READ, TYPE_SHORT, 0, 0xffff, - 1, 2, (u16 *)&tape->caps[16], NULL); - ide_add_setting(drive, "speed", SETTING_READ, TYPE_SHORT, 0, 0xffff, - 1, 1, (u16 *)&tape->caps[14], NULL); - ide_add_setting(drive, "buffer_size", SETTING_READ, TYPE_INT, 0, 0xffff, - 1, 1024, &tape->buffer_size, NULL); - ide_add_setting(drive, "tdsc", SETTING_RW, TYPE_INT, IDETAPE_DSC_RW_MIN, - IDETAPE_DSC_RW_MAX, 1000, HZ, &tape->best_dsc_rw_freq, - NULL); - ide_add_setting(drive, "dsc_overlap", SETTING_RW, TYPE_BYTE, 0, 1, 1, - 1, &drive->dsc_overlap, NULL); - ide_add_setting(drive, "avg_speed", SETTING_READ, TYPE_INT, 0, 0xffff, - 1, 1, &tape->avg_speed, NULL); - ide_add_setting(drive, "debug_mask", SETTING_RW, TYPE_INT, 0, 0xffff, 1, - 1, &tape->debug_mask, NULL); -} -#else -static inline void idetape_add_settings(ide_drive_t *drive) { ; } +#define ide_tape_devset_get(name, field) \ +static int get_##name(ide_drive_t *drive) \ +{ \ + idetape_tape_t *tape = drive->driver_data; \ + return tape->field; \ +} + +#define ide_tape_devset_set(name, field) \ +static int set_##name(ide_drive_t *drive, int arg) \ +{ \ + idetape_tape_t *tape = drive->driver_data; \ + tape->field = arg; \ + return 0; \ +} + +#define ide_tape_devset_rw_field(_name, _field) \ +ide_tape_devset_get(_name, _field) \ +ide_tape_devset_set(_name, _field) \ +IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name) + +#define ide_tape_devset_r_field(_name, _field) \ +ide_tape_devset_get(_name, _field) \ +IDE_DEVSET(_name, 0, get_##_name, NULL) + +static int mulf_tdsc(ide_drive_t *drive) { return 1000; } +static int divf_tdsc(ide_drive_t *drive) { return HZ; } +static int divf_buffer(ide_drive_t *drive) { return 2; } +static int divf_buffer_size(ide_drive_t *drive) { return 1024; } + +ide_devset_rw_field(dsc_overlap, dsc_overlap); + +ide_tape_devset_rw_field(debug_mask, debug_mask); +ide_tape_devset_rw_field(tdsc, best_dsc_rw_freq); + +ide_tape_devset_r_field(avg_speed, avg_speed); +ide_tape_devset_r_field(speed, caps[14]); +ide_tape_devset_r_field(buffer, caps[16]); +ide_tape_devset_r_field(buffer_size, buffer_size); + +static const struct ide_proc_devset idetape_settings[] = { + __IDE_PROC_DEVSET(avg_speed, 0, 0xffff, NULL, NULL), + __IDE_PROC_DEVSET(buffer, 0, 0xffff, NULL, divf_buffer), + __IDE_PROC_DEVSET(buffer_size, 0, 0xffff, NULL, divf_buffer_size), + __IDE_PROC_DEVSET(debug_mask, 0, 0xffff, NULL, NULL), + __IDE_PROC_DEVSET(dsc_overlap, 0, 1, NULL, NULL), + __IDE_PROC_DEVSET(speed, 0, 0xffff, NULL, NULL), + __IDE_PROC_DEVSET(tdsc, IDETAPE_DSC_RW_MIN, IDETAPE_DSC_RW_MAX, + mulf_tdsc, divf_tdsc), + { 0 }, +}; #endif /* @@ -2462,15 +2254,15 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) drive->dsc_overlap = 0; } /* Seagate Travan drives do not support DSC overlap. */ - if (strstr(drive->id->model, "Seagate STT3401")) + if (strstr((char *)&drive->id[ATA_ID_PROD], "Seagate STT3401")) drive->dsc_overlap = 0; tape->minor = minor; tape->name[0] = 'h'; tape->name[1] = 't'; tape->name[2] = '0' + minor; tape->chrdev_dir = IDETAPE_DIR_NONE; - tape->pc = tape->pc_stack; - *((unsigned short *) &gcw) = drive->id->config; + + *((u16 *)&gcw) = drive->id[ATA_ID_CONFIG]; /* Command packet DRQ type */ if (((gcw[0] & 0x60) >> 5) == 1) @@ -2512,7 +2304,7 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor) tape->best_dsc_rw_freq * 1000 / HZ, drive->using_dma ? ", DMA":""); - idetape_add_settings(drive); + ide_proc_register_driver(drive, tape->driver); } static void ide_tape_remove(ide_drive_t *drive) @@ -2577,12 +2369,12 @@ static ide_driver_t idetape_driver = { .remove = ide_tape_remove, .version = IDETAPE_VERSION, .media = ide_tape, - .supports_dsc_overlap = 1, .do_request = idetape_do_request, .end_request = idetape_end_request, .error = __ide_error, #ifdef CONFIG_IDE_PROC_FS .proc = idetape_proc, + .settings = idetape_settings, #endif }; @@ -2645,11 +2437,11 @@ static int ide_tape_probe(ide_drive_t *drive) if (!strstr("ide-tape", drive->driver_req)) goto failed; - if (!drive->present) - goto failed; + if (drive->media != ide_tape) goto failed; - if (!idetape_identify_device(drive)) { + + if (drive->id_read == 1 && !ide_check_atapi_device(drive, DRV_NAME)) { printk(KERN_ERR "ide-tape: %s: not supported by this version of" " the driver\n", drive->name); goto failed; @@ -2667,8 +2459,6 @@ static int ide_tape_probe(ide_drive_t *drive) ide_init_disk(g, drive); - ide_proc_register_driver(drive, &idetape_driver); - kref_init(&tape->kref); tape->drive = drive; diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 7fb6f1c8627..487b18b3eba 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -44,9 +44,9 @@ int taskfile_lib_get_identify (ide_drive_t *drive, u8 *buf) memset(&args, 0, sizeof(ide_task_t)); args.tf.nsect = 0x01; if (drive->media == ide_disk) - args.tf.command = WIN_IDENTIFY; + args.tf.command = ATA_CMD_ID_ATA; else - args.tf.command = WIN_PIDENTIFY; + args.tf.command = ATA_CMD_ID_ATAPI; args.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; args.data_phase = TASKFILE_IN; return ide_raw_taskfile(drive, &args, buf, 1); @@ -99,12 +99,17 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) case TASKFILE_NO_DATA: if (handler == NULL) handler = task_no_data_intr; - /* WIN_{SPECIFY,RESTORE,SETMULT} use custom handlers */ if (task->tf_flags & IDE_TFLAG_CUSTOM_HANDLER) { switch (tf->command) { - case WIN_SPECIFY: handler = set_geometry_intr; break; - case WIN_RESTORE: handler = recal_intr; break; - case WIN_SETMULT: handler = set_multmode_intr; break; + case ATA_CMD_INIT_DEV_PARAMS: + handler = set_geometry_intr; + break; + case ATA_CMD_RESTORE: + handler = recal_intr; + break; + case ATA_CMD_SET_MULTI: + handler = set_multmode_intr; + break; } } ide_execute_command(drive, tf->command, handler, @@ -121,7 +126,7 @@ ide_startstop_t do_rw_taskfile (ide_drive_t *drive, ide_task_t *task) EXPORT_SYMBOL_GPL(do_rw_taskfile); /* - * set_multmode_intr() is invoked on completion of a WIN_SETMULT cmd. + * set_multmode_intr() is invoked on completion of a ATA_CMD_SET_MULTI cmd. */ static ide_startstop_t set_multmode_intr(ide_drive_t *drive) { @@ -131,7 +136,7 @@ static ide_startstop_t set_multmode_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); stat = hwif->tp_ops->read_status(hwif); - if (OK_STAT(stat, READY_STAT, BAD_STAT)) + if (OK_STAT(stat, ATA_DRDY, BAD_STAT)) drive->mult_count = drive->mult_req; else { drive->mult_req = drive->mult_count = 0; @@ -142,7 +147,7 @@ static ide_startstop_t set_multmode_intr(ide_drive_t *drive) } /* - * set_geometry_intr() is invoked on completion of a WIN_SPECIFY cmd. + * set_geometry_intr() is invoked on completion of a ATA_CMD_INIT_DEV_PARAMS cmd. */ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) { @@ -154,15 +159,15 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) while (1) { stat = hwif->tp_ops->read_status(hwif); - if ((stat & BUSY_STAT) == 0 || retries-- == 0) + if ((stat & ATA_BUSY) == 0 || retries-- == 0) break; udelay(10); }; - if (OK_STAT(stat, READY_STAT, BAD_STAT)) + if (OK_STAT(stat, ATA_DRDY, BAD_STAT)) return ide_stopped; - if (stat & (ERR_STAT|DRQ_STAT)) + if (stat & (ATA_ERR | ATA_DRQ)) return ide_error(drive, "set_geometry_intr", stat); ide_set_handler(drive, &set_geometry_intr, WAIT_WORSTCASE, NULL); @@ -170,7 +175,7 @@ static ide_startstop_t set_geometry_intr(ide_drive_t *drive) } /* - * recal_intr() is invoked on completion of a WIN_RESTORE (recalibrate) cmd. + * recal_intr() is invoked on completion of a ATA_CMD_RESTORE (recalibrate) cmd. */ static ide_startstop_t recal_intr(ide_drive_t *drive) { @@ -180,7 +185,7 @@ static ide_startstop_t recal_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); stat = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(stat, READY_STAT, BAD_STAT)) + if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) return ide_error(drive, "recal_intr", stat); return ide_stopped; } @@ -197,7 +202,7 @@ static ide_startstop_t task_no_data_intr(ide_drive_t *drive) local_irq_enable_in_hardirq(); stat = hwif->tp_ops->read_status(hwif); - if (!OK_STAT(stat, READY_STAT, BAD_STAT)) + if (!OK_STAT(stat, ATA_DRDY, BAD_STAT)) return ide_error(drive, "task_no_data_intr", stat); /* calls ide_end_drive_cmd */ @@ -220,13 +225,13 @@ static u8 wait_drive_not_busy(ide_drive_t *drive) for (retries = 0; retries < 1000; retries++) { stat = hwif->tp_ops->read_status(hwif); - if (stat & BUSY_STAT) + if (stat & ATA_BUSY) udelay(10); else break; } - if (stat & BUSY_STAT) + if (stat & ATA_BUSY) printk(KERN_ERR "%s: drive still BUSY!\n", drive->name); return stat; @@ -385,7 +390,7 @@ void task_end_request(ide_drive_t *drive, struct request *rq, u8 stat) static ide_startstop_t task_in_unexpected(ide_drive_t *drive, struct request *rq, u8 stat) { /* Command all done? */ - if (OK_STAT(stat, READY_STAT, BUSY_STAT)) { + if (OK_STAT(stat, ATA_DRDY, ATA_BUSY)) { task_end_request(drive, rq, stat); return ide_stopped; } @@ -405,11 +410,11 @@ static ide_startstop_t task_in_intr(ide_drive_t *drive) u8 stat = hwif->tp_ops->read_status(hwif); /* Error? */ - if (stat & ERR_STAT) + if (stat & ATA_ERR) return task_error(drive, rq, __func__, stat); /* Didn't want any data? Odd. */ - if (!(stat & DRQ_STAT)) + if ((stat & ATA_DRQ) == 0) return task_in_unexpected(drive, rq, stat); ide_pio_datablock(drive, rq, 0); @@ -442,7 +447,7 @@ static ide_startstop_t task_out_intr (ide_drive_t *drive) return task_error(drive, rq, __func__, stat); /* Deal with unexpected ATA data phase. */ - if (((stat & DRQ_STAT) == 0) ^ !hwif->nleft) + if (((stat & ATA_DRQ) == 0) ^ !hwif->nleft) return task_error(drive, rq, __func__, stat); if (!hwif->nleft) { @@ -461,7 +466,7 @@ static ide_startstop_t pre_task_out_intr(ide_drive_t *drive, struct request *rq) { ide_startstop_t startstop; - if (ide_wait_stat(&startstop, drive, DRQ_STAT, + if (ide_wait_stat(&startstop, drive, ATA_DRQ, drive->bad_wstat, WAIT_DRQ)) { printk(KERN_ERR "%s: no DRQ after issuing %sWRITE%s\n", drive->name, @@ -721,110 +726,3 @@ abort: return err; } #endif - -int ide_cmd_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg) -{ - u8 *buf = NULL; - int bufsize = 0, err = 0; - u8 args[4], xfer_rate = 0; - ide_task_t tfargs; - struct ide_taskfile *tf = &tfargs.tf; - struct hd_driveid *id = drive->id; - - if (NULL == (void *) arg) { - struct request *rq; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_ATA_TASKFILE; - err = blk_execute_rq(drive->queue, NULL, rq, 0); - blk_put_request(rq); - - return err; - } - - if (copy_from_user(args, (void __user *)arg, 4)) - return -EFAULT; - - memset(&tfargs, 0, sizeof(ide_task_t)); - tf->feature = args[2]; - if (args[0] == WIN_SMART) { - tf->nsect = args[3]; - tf->lbal = args[1]; - tf->lbam = 0x4f; - tf->lbah = 0xc2; - tfargs.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_IN_NSECT; - } else { - tf->nsect = args[1]; - tfargs.tf_flags = IDE_TFLAG_OUT_FEATURE | - IDE_TFLAG_OUT_NSECT | IDE_TFLAG_IN_NSECT; - } - tf->command = args[0]; - tfargs.data_phase = args[3] ? TASKFILE_IN : TASKFILE_NO_DATA; - - if (args[3]) { - tfargs.tf_flags |= IDE_TFLAG_IO_16BIT; - bufsize = SECTOR_WORDS * 4 * args[3]; - buf = kzalloc(bufsize, GFP_KERNEL); - if (buf == NULL) - return -ENOMEM; - } - - if (tf->command == WIN_SETFEATURES && - tf->feature == SETFEATURES_XFER && - tf->nsect >= XFER_SW_DMA_0 && - (id->dma_ultra || id->dma_mword || id->dma_1word)) { - xfer_rate = args[1]; - if (tf->nsect > XFER_UDMA_2 && !eighty_ninty_three(drive)) { - printk(KERN_WARNING "%s: UDMA speeds >UDMA33 cannot " - "be set\n", drive->name); - goto abort; - } - } - - err = ide_raw_taskfile(drive, &tfargs, buf, args[3]); - - args[0] = tf->status; - args[1] = tf->error; - args[2] = tf->nsect; - - if (!err && xfer_rate) { - /* active-retuning-calls future */ - ide_set_xfer_rate(drive, xfer_rate); - ide_driveid_update(drive); - } -abort: - if (copy_to_user((void __user *)arg, &args, 4)) - err = -EFAULT; - if (buf) { - if (copy_to_user((void __user *)(arg + 4), buf, bufsize)) - err = -EFAULT; - kfree(buf); - } - return err; -} - -int ide_task_ioctl (ide_drive_t *drive, unsigned int cmd, unsigned long arg) -{ - void __user *p = (void __user *)arg; - int err = 0; - u8 args[7]; - ide_task_t task; - - if (copy_from_user(args, p, 7)) - return -EFAULT; - - memset(&task, 0, sizeof(task)); - memcpy(&task.tf_array[7], &args[1], 6); - task.tf.command = args[0]; - task.tf_flags = IDE_TFLAG_TF | IDE_TFLAG_DEVICE; - - err = ide_no_data_taskfile(drive, &task); - - args[0] = task.tf.command; - memcpy(&args[1], &task.tf_array[7], 6); - - if (copy_to_user(p, args, 7)) - err = -EFAULT; - - return err; -} diff --git a/drivers/ide/ide-timings.c b/drivers/ide/ide-timings.c index 8c2f8327f48..81f527af8fa 100644 --- a/drivers/ide/ide-timings.c +++ b/drivers/ide/ide-timings.c @@ -22,7 +22,6 @@ */ #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/module.h> @@ -78,15 +77,15 @@ EXPORT_SYMBOL_GPL(ide_timing_find_mode); u16 ide_pio_cycle_time(ide_drive_t *drive, u8 pio) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; struct ide_timing *t = ide_timing_find_mode(XFER_PIO_0 + pio); u16 cycle = 0; - if (id->field_valid & 2) { - if (id->capability & 8) - cycle = id->eide_pio_iordy; + if (id[ATA_ID_FIELD_VALID] & 2) { + if (ata_id_has_iordy(drive->id)) + cycle = id[ATA_ID_EIDE_PIO_IORDY]; else - cycle = id->eide_pio; + cycle = id[ATA_ID_EIDE_PIO]; /* conservative "downgrade" for all pre-ATA2 drives */ if (pio < 3 && cycle < t->cycle) @@ -138,7 +137,7 @@ EXPORT_SYMBOL_GPL(ide_timing_merge); int ide_timing_compute(ide_drive_t *drive, u8 speed, struct ide_timing *t, int T, int UT) { - struct hd_driveid *id = drive->id; + u16 *id = drive->id; struct ide_timing *s, p; /* @@ -157,16 +156,15 @@ int ide_timing_compute(ide_drive_t *drive, u8 speed, * If the drive is an EIDE drive, it can tell us it needs extended * PIO/MWDMA cycle timing. */ - if (id && id->field_valid & 2) { /* EIDE drive */ - + if (id[ATA_ID_FIELD_VALID] & 2) { /* EIDE drive */ memset(&p, 0, sizeof(p)); if (speed <= XFER_PIO_2) - p.cycle = p.cyc8b = id->eide_pio; + p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO]; else if (speed <= XFER_PIO_5) - p.cycle = p.cyc8b = id->eide_pio_iordy; + p.cycle = p.cyc8b = id[ATA_ID_EIDE_PIO_IORDY]; else if (speed >= XFER_MW_DMA_0 && speed <= XFER_MW_DMA_2) - p.cycle = id->eide_dma_min; + p.cycle = id[ATA_ID_EIDE_DMA_MIN]; ide_timing_merge(&p, t, t, IDE_TIMING_CYCLE | IDE_TIMING_CYC8B); } diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index 772451600e4..9dcf5aed92c 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -44,8 +44,6 @@ * inspiration from lots of linux users, esp. hamish@zot.apana.org.au */ -#define _IDE_C /* Tell ide.h it's really us */ - #include <linux/module.h> #include <linux/types.h> #include <linux/string.h> @@ -58,6 +56,7 @@ #include <linux/init.h> #include <linux/pci.h> #include <linux/ide.h> +#include <linux/hdreg.h> #include <linux/completion.h> #include <linux/device.h> @@ -97,8 +96,6 @@ void ide_init_port_data(ide_hwif_t *hwif, unsigned int index) hwif->name[2] = 'e'; hwif->name[3] = '0' + index; - hwif->bus_state = BUSSTATE_ON; - init_completion(&hwif->gendev_rel_comp); hwif->tp_ops = &default_tp_ops; @@ -119,7 +116,7 @@ static void ide_port_init_devices_data(ide_hwif_t *hwif) drive->media = ide_disk; drive->select.all = (unit<<4)|0xa0; drive->hwif = hwif; - drive->ready_stat = READY_STAT; + drive->ready_stat = ATA_DRDY; drive->bad_wstat = BAD_W_STAT; drive->special.b.recalibrate = 1; drive->special.b.set_geometry = 1; @@ -253,42 +250,9 @@ void ide_init_port_hw(ide_hwif_t *hwif, hw_regs_t *hw) DEFINE_MUTEX(ide_setting_mtx); -EXPORT_SYMBOL_GPL(ide_setting_mtx); - -/** - * ide_spin_wait_hwgroup - wait for group - * @drive: drive in the group - * - * Wait for an IDE device group to go non busy and then return - * holding the ide_lock which guards the hwgroup->busy status - * and right to use it. - */ +ide_devset_get(io_32bit, io_32bit); -int ide_spin_wait_hwgroup (ide_drive_t *drive) -{ - ide_hwgroup_t *hwgroup = HWGROUP(drive); - unsigned long timeout = jiffies + (3 * HZ); - - spin_lock_irq(&ide_lock); - - while (hwgroup->busy) { - unsigned long lflags; - spin_unlock_irq(&ide_lock); - local_irq_set(lflags); - if (time_after(jiffies, timeout)) { - local_irq_restore(lflags); - printk(KERN_ERR "%s: channel busy\n", drive->name); - return -EBUSY; - } - local_irq_restore(lflags); - spin_lock_irq(&ide_lock); - } - return 0; -} - -EXPORT_SYMBOL(ide_spin_wait_hwgroup); - -int set_io_32bit(ide_drive_t *drive, int arg) +static int set_io_32bit(ide_drive_t *drive, int arg) { if (drive->no_io_32bit) return -EPERM; @@ -296,53 +260,39 @@ int set_io_32bit(ide_drive_t *drive, int arg) if (arg < 0 || arg > 1 + (SUPPORT_VLB_SYNC << 1)) return -EINVAL; - if (ide_spin_wait_hwgroup(drive)) - return -EBUSY; - drive->io_32bit = arg; - spin_unlock_irq(&ide_lock); - return 0; } +ide_devset_get(ksettings, keep_settings); + static int set_ksettings(ide_drive_t *drive, int arg) { if (arg < 0 || arg > 1) return -EINVAL; - if (ide_spin_wait_hwgroup(drive)) - return -EBUSY; drive->keep_settings = arg; - spin_unlock_irq(&ide_lock); return 0; } -int set_using_dma(ide_drive_t *drive, int arg) +ide_devset_get(using_dma, using_dma); + +static int set_using_dma(ide_drive_t *drive, int arg) { #ifdef CONFIG_BLK_DEV_IDEDMA - ide_hwif_t *hwif = drive->hwif; int err = -EPERM; if (arg < 0 || arg > 1) return -EINVAL; - if (!drive->id || !(drive->id->capability & 1)) + if (ata_id_has_dma(drive->id) == 0) goto out; - if (hwif->dma_ops == NULL) + if (drive->hwif->dma_ops == NULL) goto out; - err = -EBUSY; - if (ide_spin_wait_hwgroup(drive)) - goto out; - /* - * set ->busy flag, unlock and let it ride - */ - hwif->hwgroup->busy = 1; - spin_unlock_irq(&ide_lock); - err = 0; if (arg) { @@ -351,12 +301,6 @@ int set_using_dma(ide_drive_t *drive, int arg) } else ide_dma_off(drive); - /* - * lock, clear ->busy flag and unlock before leaving - */ - spin_lock_irq(&ide_lock); - hwif->hwgroup->busy = 0; - spin_unlock_irq(&ide_lock); out: return err; #else @@ -367,7 +311,7 @@ out: #endif } -int set_pio_mode(ide_drive_t *drive, int arg) +static int set_pio_mode(ide_drive_t *drive, int arg) { struct request *rq; ide_hwif_t *hwif = drive->hwif; @@ -395,6 +339,8 @@ int set_pio_mode(ide_drive_t *drive, int arg) return 0; } +ide_devset_get(unmaskirq, unmask); + static int set_unmaskirq(ide_drive_t *drive, int arg) { if (drive->no_unmask) @@ -403,14 +349,20 @@ static int set_unmaskirq(ide_drive_t *drive, int arg) if (arg < 0 || arg > 1) return -EINVAL; - if (ide_spin_wait_hwgroup(drive)) - return -EBUSY; drive->unmask = arg; - spin_unlock_irq(&ide_lock); return 0; } +#define ide_gen_devset_rw(_name, _func) \ +__IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func) + +ide_gen_devset_rw(io_32bit, io_32bit); +ide_gen_devset_rw(keepsettings, ksettings); +ide_gen_devset_rw(unmaskirq, unmaskirq); +ide_gen_devset_rw(using_dma, using_dma); +__IDE_DEVSET(pio_mode, 0, NULL, set_pio_mode); + static int generic_ide_suspend(struct device *dev, pm_message_t mesg) { ide_drive_t *drive = dev->driver_data; @@ -486,138 +438,6 @@ static int generic_ide_resume(struct device *dev) return err; } -static int generic_drive_reset(ide_drive_t *drive) -{ - struct request *rq; - int ret = 0; - - rq = blk_get_request(drive->queue, READ, __GFP_WAIT); - rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_len = 1; - rq->cmd[0] = REQ_DRIVE_RESET; - rq->cmd_flags |= REQ_SOFTBARRIER; - if (blk_execute_rq(drive->queue, NULL, rq, 1)) - ret = rq->errors; - blk_put_request(rq); - return ret; -} - -int generic_ide_ioctl(ide_drive_t *drive, struct file *file, struct block_device *bdev, - unsigned int cmd, unsigned long arg) -{ - unsigned long flags; - ide_driver_t *drv; - void __user *p = (void __user *)arg; - int err = 0, (*setfunc)(ide_drive_t *, int); - u8 *val; - - switch (cmd) { - case HDIO_GET_32BIT: val = &drive->io_32bit; goto read_val; - case HDIO_GET_KEEPSETTINGS: val = &drive->keep_settings; goto read_val; - case HDIO_GET_UNMASKINTR: val = &drive->unmask; goto read_val; - case HDIO_GET_DMA: val = &drive->using_dma; goto read_val; - case HDIO_SET_32BIT: setfunc = set_io_32bit; goto set_val; - case HDIO_SET_KEEPSETTINGS: setfunc = set_ksettings; goto set_val; - case HDIO_SET_PIO_MODE: setfunc = set_pio_mode; goto set_val; - case HDIO_SET_UNMASKINTR: setfunc = set_unmaskirq; goto set_val; - case HDIO_SET_DMA: setfunc = set_using_dma; goto set_val; - } - - switch (cmd) { - case HDIO_OBSOLETE_IDENTITY: - case HDIO_GET_IDENTITY: - if (bdev != bdev->bd_contains) - return -EINVAL; - if (drive->id_read == 0) - return -ENOMSG; - if (copy_to_user(p, drive->id, (cmd == HDIO_GET_IDENTITY) ? sizeof(*drive->id) : 142)) - return -EFAULT; - return 0; - - case HDIO_GET_NICE: - return put_user(drive->dsc_overlap << IDE_NICE_DSC_OVERLAP | - drive->atapi_overlap << IDE_NICE_ATAPI_OVERLAP | - drive->nice1 << IDE_NICE_1, - (long __user *) arg); -#ifdef CONFIG_IDE_TASK_IOCTL - case HDIO_DRIVE_TASKFILE: - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) - return -EACCES; - switch(drive->media) { - case ide_disk: - return ide_taskfile_ioctl(drive, cmd, arg); - default: - return -ENOMSG; - } -#endif /* CONFIG_IDE_TASK_IOCTL */ - - case HDIO_DRIVE_CMD: - if (!capable(CAP_SYS_RAWIO)) - return -EACCES; - return ide_cmd_ioctl(drive, cmd, arg); - - case HDIO_DRIVE_TASK: - if (!capable(CAP_SYS_RAWIO)) - return -EACCES; - return ide_task_ioctl(drive, cmd, arg); - case HDIO_SET_NICE: - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (arg != (arg & ((1 << IDE_NICE_DSC_OVERLAP) | (1 << IDE_NICE_1)))) - return -EPERM; - drive->dsc_overlap = (arg >> IDE_NICE_DSC_OVERLAP) & 1; - drv = *(ide_driver_t **)bdev->bd_disk->private_data; - if (drive->dsc_overlap && !drv->supports_dsc_overlap) { - drive->dsc_overlap = 0; - return -EPERM; - } - drive->nice1 = (arg >> IDE_NICE_1) & 1; - return 0; - case HDIO_DRIVE_RESET: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - - return generic_drive_reset(drive); - - case HDIO_GET_BUSSTATE: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - if (put_user(HWIF(drive)->bus_state, (long __user *)arg)) - return -EFAULT; - return 0; - - case HDIO_SET_BUSSTATE: - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; - return -EOPNOTSUPP; - default: - return -EINVAL; - } - -read_val: - mutex_lock(&ide_setting_mtx); - spin_lock_irqsave(&ide_lock, flags); - err = *val; - spin_unlock_irqrestore(&ide_lock, flags); - mutex_unlock(&ide_setting_mtx); - return err >= 0 ? put_user(err, (long __user *)arg) : err; - -set_val: - if (bdev != bdev->bd_contains) - err = -EINVAL; - else { - if (!capable(CAP_SYS_ADMIN)) - err = -EACCES; - else { - mutex_lock(&ide_setting_mtx); - err = setfunc(drive, arg); - mutex_unlock(&ide_setting_mtx); - } - } - return err; -} - -EXPORT_SYMBOL(generic_ide_ioctl); - /** * ide_device_get - get an additional reference to a ide_drive_t * @drive: device to get a reference to @@ -710,21 +530,21 @@ static ssize_t model_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", drive->id->model); + return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_PROD]); } static ssize_t firmware_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", drive->id->fw_rev); + return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_FW_REV]); } static ssize_t serial_show(struct device *dev, struct device_attribute *attr, char *buf) { ide_drive_t *drive = to_ide_device(dev); - return sprintf(buf, "%s\n", drive->id->serial_no); + return sprintf(buf, "%s\n", (char *)&drive->id[ATA_ID_SERNO]); } static struct device_attribute ide_dev_attrs[] = { @@ -841,7 +661,7 @@ MODULE_PARM_DESC(noprobe, "skip probing for a device"); static unsigned int ide_nowerr; module_param_call(nowerr, ide_set_dev_param_mask, NULL, &ide_nowerr, 0); -MODULE_PARM_DESC(nowerr, "ignore the WRERR_STAT bit for a device"); +MODULE_PARM_DESC(nowerr, "ignore the ATA_DF bit for a device"); static unsigned int ide_cdroms; @@ -906,7 +726,7 @@ static void ide_dev_apply_params(ide_drive_t *drive) drive->noprobe = 1; } if (ide_nowerr & (1 << i)) { - printk(KERN_INFO "ide: ignoring the WRERR_STAT bit for %s\n", + printk(KERN_INFO "ide: ignoring the ATA_DF bit for %s\n", drive->name); drive->bad_wstat = BAD_R_STAT; } @@ -927,7 +747,7 @@ static void ide_dev_apply_params(ide_drive_t *drive) drive->cyl, drive->head, drive->sect); drive->present = 1; drive->media = ide_disk; - drive->ready_stat = READY_STAT; + drive->ready_stat = ATA_DRDY; } } diff --git a/drivers/ide/legacy/ali14xx.c b/drivers/ide/legacy/ali14xx.c index 4ec19737f3c..7276c96aaa2 100644 --- a/drivers/ide/legacy/ali14xx.c +++ b/drivers/ide/legacy/ali14xx.c @@ -43,7 +43,6 @@ #include <linux/mm.h> #include <linux/ioport.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/legacy/buddha.c b/drivers/ide/legacy/buddha.c index 7c2afa97f41..c5a3c9ef6a5 100644 --- a/drivers/ide/legacy/buddha.c +++ b/drivers/ide/legacy/buddha.c @@ -20,7 +20,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/zorro.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/legacy/dtc2278.c b/drivers/ide/legacy/dtc2278.c index af791a02a12..689b2e49341 100644 --- a/drivers/ide/legacy/dtc2278.c +++ b/drivers/ide/legacy/dtc2278.c @@ -10,7 +10,6 @@ #include <linux/mm.h> #include <linux/ioport.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c index 724f95073d8..39d500d84b0 100644 --- a/drivers/ide/legacy/falconide.c +++ b/drivers/ide/legacy/falconide.c @@ -13,7 +13,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c index 51ba085d7aa..69150688656 100644 --- a/drivers/ide/legacy/gayle.c +++ b/drivers/ide/legacy/gayle.c @@ -12,7 +12,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> #include <linux/zorro.h> diff --git a/drivers/ide/legacy/ht6560b.c b/drivers/ide/legacy/ht6560b.c index 98f7c95e39e..5123ea291d0 100644 --- a/drivers/ide/legacy/ht6560b.c +++ b/drivers/ide/legacy/ht6560b.c @@ -24,7 +24,6 @@ #include <linux/mm.h> #include <linux/ioport.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/legacy/ide-cs.c b/drivers/ide/legacy/ide-cs.c index 21bfac13784..ee6fc30d5e2 100644 --- a/drivers/ide/legacy/ide-cs.c +++ b/drivers/ide/legacy/ide-cs.c @@ -38,7 +38,6 @@ #include <linux/timer.h> #include <linux/ioport.h> #include <linux/ide.h> -#include <linux/hdreg.h> #include <linux/major.h> #include <linux/delay.h> #include <asm/io.h> diff --git a/drivers/ide/legacy/macide.c b/drivers/ide/legacy/macide.c index a0bb167980e..43f97cc1d30 100644 --- a/drivers/ide/legacy/macide.c +++ b/drivers/ide/legacy/macide.c @@ -15,7 +15,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/delay.h> #include <linux/ide.h> diff --git a/drivers/ide/legacy/q40ide.c b/drivers/ide/legacy/q40ide.c index 4abd8fc7819..4af4a8ce4cd 100644 --- a/drivers/ide/legacy/q40ide.c +++ b/drivers/ide/legacy/q40ide.c @@ -14,8 +14,6 @@ #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> - #include <linux/ide.h> /* diff --git a/drivers/ide/legacy/qd65xx.c b/drivers/ide/legacy/qd65xx.c index 2338f344ea2..ec408b3a710 100644 --- a/drivers/ide/legacy/qd65xx.c +++ b/drivers/ide/legacy/qd65xx.c @@ -27,7 +27,6 @@ #include <linux/mm.h> #include <linux/ioport.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> #include <asm/system.h> @@ -151,12 +150,14 @@ static int qd_find_disk_type (ide_drive_t *drive, int *active_time, int *recovery_time) { struct qd65xx_timing_s *p; - char model[40]; + char *m = (char *)&drive->id[ATA_ID_PROD]; + char model[ATA_ID_PROD_LEN]; - if (!*drive->id->model) return 0; + if (*m == 0) + return 0; - strncpy(model,drive->id->model,40); - ide_fixstring(model,40,1); /* byte-swap */ + strncpy(model, m, ATA_ID_PROD_LEN); + ide_fixstring(model, ATA_ID_PROD_LEN, 1); /* byte-swap */ for (p = qd65xx_timing ; p->offset != -1 ; p++) { if (!strncmp(p->model, model+p->offset, 4)) { @@ -185,20 +186,20 @@ static void qd_set_timing (ide_drive_t *drive, u8 timing) static void qd6500_set_pio_mode(ide_drive_t *drive, const u8 pio) { + u16 *id = drive->id; int active_time = 175; int recovery_time = 415; /* worst case values from the dos driver */ /* * FIXME: use "pio" value */ - if (drive->id && !qd_find_disk_type(drive, &active_time, &recovery_time) - && drive->id->tPIO && (drive->id->field_valid & 0x02) - && drive->id->eide_pio >= 240) { - + if (!qd_find_disk_type(drive, &active_time, &recovery_time) && + (id[ATA_ID_OLD_PIO_MODES] & 0xff) && (id[ATA_ID_FIELD_VALID] & 2) && + id[ATA_ID_EIDE_PIO] >= 240) { printk(KERN_INFO "%s: PIO mode%d\n", drive->name, - drive->id->tPIO); + id[ATA_ID_OLD_PIO_MODES] & 0xff); active_time = 110; - recovery_time = drive->id->eide_pio - 120; + recovery_time = drive->id[ATA_ID_EIDE_PIO] - 120; } qd_set_timing(drive, qd6500_compute_timing(HWIF(drive), active_time, recovery_time)); diff --git a/drivers/ide/legacy/umc8672.c b/drivers/ide/legacy/umc8672.c index b54a14a5775..1da076e0c91 100644 --- a/drivers/ide/legacy/umc8672.c +++ b/drivers/ide/legacy/umc8672.c @@ -45,7 +45,6 @@ #include <linux/mm.h> #include <linux/ioport.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/pci/aec62xx.c b/drivers/ide/pci/aec62xx.c index 3187215e8f8..e7475ba559c 100644 --- a/drivers/ide/pci/aec62xx.c +++ b/drivers/ide/pci/aec62xx.c @@ -7,7 +7,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -140,7 +139,7 @@ static void aec_set_pio_mode(ide_drive_t *drive, const u8 pio) drive->hwif->port_ops->set_dma_mode(drive, pio + XFER_PIO_0); } -static unsigned int __devinit init_chipset_aec62xx(struct pci_dev *dev) +static unsigned int init_chipset_aec62xx(struct pci_dev *dev) { /* These are necessary to get AEC6280 Macintosh cards to work */ if ((dev->device == PCI_DEVICE_ID_ARTOP_ATP865) || @@ -308,6 +307,8 @@ static struct pci_driver driver = { .id_table = aec62xx_pci_tbl, .probe = aec62xx_init_one, .remove = __devexit_p(aec62xx_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init aec62xx_ide_init(void) diff --git a/drivers/ide/pci/alim15x3.c b/drivers/ide/pci/alim15x3.c index d647526af55..053c7526391 100644 --- a/drivers/ide/pci/alim15x3.c +++ b/drivers/ide/pci/alim15x3.c @@ -31,7 +31,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> #include <linux/dmi.h> @@ -134,8 +133,8 @@ static u8 ali_udma_filter(ide_drive_t *drive) if (m5229_revision > 0x20 && m5229_revision < 0xC2) { if (drive->media != ide_disk) return 0; - if (chip_is_1543c_e && strstr(drive->id->model, "WDC ") && - wdc_udma == 0) + if (wdc_udma == 0 && chip_is_1543c_e && + strstr((char *)&drive->id[ATA_ID_PROD], "WDC ")) return 0; } @@ -214,7 +213,7 @@ static int ali15x3_dma_setup(ide_drive_t *drive) * appropriate also sets up the 1533 southbridge. */ -static unsigned int __devinit init_chipset_ali15x3(struct pci_dev *dev) +static unsigned int init_chipset_ali15x3(struct pci_dev *dev) { unsigned long flags; u8 tmpbyte; @@ -582,6 +581,8 @@ static struct pci_driver driver = { .id_table = alim15x3_pci_tbl, .probe = alim15x3_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init ali15x3_ide_init(void) diff --git a/drivers/ide/pci/amd74xx.c b/drivers/ide/pci/amd74xx.c index 1e66a960a96..824471f91bf 100644 --- a/drivers/ide/pci/amd74xx.c +++ b/drivers/ide/pci/amd74xx.c @@ -112,13 +112,13 @@ static void amd_set_pio_mode(ide_drive_t *drive, const u8 pio) amd_set_drive(drive, XFER_PIO_0 + pio); } -static void __devinit amd7409_cable_detect(struct pci_dev *dev) +static void amd7409_cable_detect(struct pci_dev *dev) { /* no host side cable detection */ amd_80w = 0x03; } -static void __devinit amd7411_cable_detect(struct pci_dev *dev) +static void amd7411_cable_detect(struct pci_dev *dev) { int i; u32 u = 0; @@ -140,7 +140,7 @@ static void __devinit amd7411_cable_detect(struct pci_dev *dev) * The initialization callback. Initialize drive independent registers. */ -static unsigned int __devinit init_chipset_amd74xx(struct pci_dev *dev) +static unsigned int init_chipset_amd74xx(struct pci_dev *dev) { u8 t = 0, offset = amd_offset(dev); @@ -324,6 +324,8 @@ static struct pci_driver driver = { .id_table = amd74xx_pci_tbl, .probe = amd74xx_probe, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init amd74xx_ide_init(void) diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c index 41f6cb6c163..e4437034dd0 100644 --- a/drivers/ide/pci/atiixp.c +++ b/drivers/ide/pci/atiixp.c @@ -7,7 +7,6 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -188,6 +187,8 @@ static struct pci_driver driver = { .id_table = atiixp_pci_tbl, .probe = atiixp_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init atiixp_ide_init(void) diff --git a/drivers/ide/pci/cmd640.c b/drivers/ide/pci/cmd640.c index e6c62006ca1..7f39cdb4141 100644 --- a/drivers/ide/pci/cmd640.c +++ b/drivers/ide/pci/cmd640.c @@ -103,7 +103,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/delay.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -375,6 +374,21 @@ static void cmd640_dump_regs(void) } #endif +static void __set_prefetch_mode(ide_drive_t *drive, int mode) +{ + if (mode) { /* want prefetch on? */ +#if CMD640_PREFETCH_MASKS + drive->no_unmask = 1; + drive->unmask = 0; +#endif + drive->no_io_32bit = 0; + } else { + drive->no_unmask = 0; + drive->no_io_32bit = 1; + drive->io_32bit = 0; + } +} + #ifndef CONFIG_BLK_DEV_CMD640_ENHANCED /* * Check whether prefetch is on for a drive, @@ -384,19 +398,10 @@ static void __init check_prefetch(ide_drive_t *drive, unsigned int index) { u8 b = get_cmd640_reg(prefetch_regs[index]); - if (b & prefetch_masks[index]) { /* is prefetch off? */ - drive->no_unmask = 0; - drive->no_io_32bit = 1; - drive->io_32bit = 0; - } else { -#if CMD640_PREFETCH_MASKS - drive->no_unmask = 1; - drive->unmask = 0; -#endif - drive->no_io_32bit = 0; - } + __set_prefetch_mode(drive, (b & prefetch_masks[index]) ? 0 : 1); } #else + /* * Sets prefetch mode for a drive. */ @@ -408,19 +413,11 @@ static void set_prefetch_mode(ide_drive_t *drive, unsigned int index, int mode) spin_lock_irqsave(&cmd640_lock, flags); b = __get_cmd640_reg(reg); - if (mode) { /* want prefetch on? */ -#if CMD640_PREFETCH_MASKS - drive->no_unmask = 1; - drive->unmask = 0; -#endif - drive->no_io_32bit = 0; + __set_prefetch_mode(drive, mode); + if (mode) b &= ~prefetch_masks[index]; /* enable prefetch */ - } else { - drive->no_unmask = 0; - drive->no_io_32bit = 1; - drive->io_32bit = 0; + else b |= prefetch_masks[index]; /* disable prefetch */ - } __put_cmd640_reg(reg, b); spin_unlock_irqrestore(&cmd640_lock, flags); } diff --git a/drivers/ide/pci/cmd64x.c b/drivers/ide/pci/cmd64x.c index e064398e03b..456dee18b66 100644 --- a/drivers/ide/pci/cmd64x.c +++ b/drivers/ide/pci/cmd64x.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -332,7 +331,7 @@ static int cmd646_1_dma_end(ide_drive_t *drive) return (dma_stat & 7) != 4; } -static unsigned int __devinit init_chipset_cmd64x(struct pci_dev *dev) +static unsigned int init_chipset_cmd64x(struct pci_dev *dev) { u8 mrdmode = 0; @@ -511,6 +510,8 @@ static struct pci_driver driver = { .id_table = cmd64x_pci_tbl, .probe = cmd64x_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init cmd64x_ide_init(void) diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c index 151844fcbb0..d6341f7c414 100644 --- a/drivers/ide/pci/cs5520.c +++ b/drivers/ide/pci/cs5520.c @@ -35,7 +35,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/init.h> #include <linux/pci.h> #include <linux/ide.h> @@ -150,6 +149,8 @@ static struct pci_driver driver = { .name = "Cyrix_IDE", .id_table = cs5520_pci_tbl, .probe = cs5520_init_one, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init cs5520_ide_init(void) diff --git a/drivers/ide/pci/cs5530.c b/drivers/ide/pci/cs5530.c index f235db8c678..da42fa7e9f9 100644 --- a/drivers/ide/pci/cs5530.c +++ b/drivers/ide/pci/cs5530.c @@ -15,7 +15,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/ide.h> @@ -81,17 +80,19 @@ static void cs5530_set_pio_mode(ide_drive_t *drive, const u8 pio) static u8 cs5530_udma_filter(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *mate = &hwif->drives[(drive->dn & 1) ^ 1]; - struct hd_driveid *mateid = mate->id; + ide_drive_t *mate = ide_get_pair_dev(drive); + u16 *mateid = mate->id; u8 mask = hwif->ultra_mask; - if (mate->present == 0) + if (mate == NULL) goto out; - if ((mateid->capability & 1) && __ide_dma_bad_drive(mate) == 0) { - if ((mateid->field_valid & 4) && (mateid->dma_ultra & 7)) + if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) { + if ((mateid[ATA_ID_FIELD_VALID] & 4) && + (mateid[ATA_ID_UDMA_MODES] & 7)) goto out; - if ((mateid->field_valid & 2) && (mateid->dma_mword & 7)) + if ((mateid[ATA_ID_FIELD_VALID] & 2) && + (mateid[ATA_ID_MWDMA_MODES] & 7)) mask = 0; } out: @@ -133,7 +134,7 @@ static void cs5530_set_dma_mode(ide_drive_t *drive, const u8 mode) * Initialize the cs5530 bridge for reliable IDE DMA operation. */ -static unsigned int __devinit init_chipset_cs5530(struct pci_dev *dev) +static unsigned int init_chipset_cs5530(struct pci_dev *dev) { struct pci_dev *master_0 = NULL, *cs5530_0 = NULL; @@ -271,6 +272,8 @@ static struct pci_driver driver = { .id_table = cs5530_pci_tbl, .probe = cs5530_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init cs5530_ide_init(void) diff --git a/drivers/ide/pci/cs5535.c b/drivers/ide/pci/cs5535.c index dd3dc23af99..1e5bc59ea2f 100644 --- a/drivers/ide/pci/cs5535.c +++ b/drivers/ide/pci/cs5535.c @@ -80,12 +80,12 @@ static void cs5535_set_speed(ide_drive_t *drive, const u8 speed) /* Set the PIO timings */ if (speed < XFER_SW_DMA_0) { - ide_drive_t *pair = ide_get_paired_drive(drive); + ide_drive_t *pair = ide_get_pair_dev(drive); u8 cmd, pioa; cmd = pioa = speed - XFER_PIO_0; - if (pair->present) { + if (pair) { u8 piob = ide_get_best_pio_mode(pair, 255, 4); if (piob < cmd) @@ -193,10 +193,12 @@ static const struct pci_device_id cs5535_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, cs5535_pci_tbl); static struct pci_driver driver = { - .name = "CS5535_IDE", - .id_table = cs5535_pci_tbl, - .probe = cs5535_init_one, - .remove = ide_pci_remove, + .name = "CS5535_IDE", + .id_table = cs5535_pci_tbl, + .probe = cs5535_init_one, + .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init cs5535_ide_init(void) diff --git a/drivers/ide/pci/cy82c693.c b/drivers/ide/pci/cy82c693.c index e6d8ee88d56..69820e9224d 100644 --- a/drivers/ide/pci/cy82c693.c +++ b/drivers/ide/pci/cy82c693.c @@ -332,7 +332,7 @@ static void cy82c693_set_pio_mode(ide_drive_t *drive, const u8 pio) /* * this function is called during init and is used to setup the cy82c693 chip */ -static unsigned int __devinit init_chipset_cy82c693(struct pci_dev *dev) +static unsigned int init_chipset_cy82c693(struct pci_dev *dev) { if (PCI_FUNC(dev->devfn) != 1) return 0; @@ -448,6 +448,8 @@ static struct pci_driver driver = { .id_table = cy82c693_pci_tbl, .probe = cy82c693_init_one, .remove = __devexit_p(cy82c693_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init cy82c693_ide_init(void) diff --git a/drivers/ide/pci/delkin_cb.c b/drivers/ide/pci/delkin_cb.c index f84bfb4f600..83b63b365e5 100644 --- a/drivers/ide/pci/delkin_cb.c +++ b/drivers/ide/pci/delkin_cb.c @@ -19,7 +19,6 @@ #include <linux/types.h> #include <linux/module.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> #include <linux/pci.h> diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c index b07d4f4273b..092b238cb25 100644 --- a/drivers/ide/pci/generic.c +++ b/drivers/ide/pci/generic.c @@ -22,7 +22,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/ide.h> #include <linux/init.h> @@ -172,6 +171,8 @@ static struct pci_driver driver = { .id_table = generic_pci_tbl, .probe = generic_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init generic_ide_init(void) diff --git a/drivers/ide/pci/hpt34x.c b/drivers/ide/pci/hpt34x.c index 6009b0b9655..644de29f8fe 100644 --- a/drivers/ide/pci/hpt34x.c +++ b/drivers/ide/pci/hpt34x.c @@ -27,7 +27,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/ioport.h> -#include <linux/hdreg.h> #include <linux/interrupt.h> #include <linux/pci.h> #include <linux/init.h> @@ -79,7 +78,7 @@ static void hpt34x_set_pio_mode(ide_drive_t *drive, const u8 pio) */ #define HPT34X_PCI_INIT_REG 0x80 -static unsigned int __devinit init_chipset_hpt34x(struct pci_dev *dev) +static unsigned int init_chipset_hpt34x(struct pci_dev *dev) { int i = 0; unsigned long hpt34xIoBase = pci_resource_start(dev, 4); @@ -172,6 +171,8 @@ static struct pci_driver driver = { .id_table = hpt34x_pci_tbl, .probe = hpt34x_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init hpt34x_ide_init(void) diff --git a/drivers/ide/pci/hpt366.c b/drivers/ide/pci/hpt366.c index c37ab174381..a194022b6a6 100644 --- a/drivers/ide/pci/hpt366.c +++ b/drivers/ide/pci/hpt366.c @@ -122,7 +122,6 @@ #include <linux/kernel.h> #include <linux/delay.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/interrupt.h> #include <linux/pci.h> #include <linux/init.h> @@ -605,10 +604,10 @@ static const struct hpt_info hpt371n __devinitdata = { static int check_in_drive_list(ide_drive_t *drive, const char **list) { - struct hd_driveid *id = drive->id; + char *m = (char *)&drive->id[ATA_ID_PROD]; while (*list) - if (!strcmp(*list++,id->model)) + if (!strcmp(*list++, m)) return 1; return 0; } @@ -655,7 +654,7 @@ static u8 hpt3xx_udma_filter(ide_drive_t *drive) case HPT372A: case HPT372N: case HPT374 : - if (ide_dev_is_sata(drive->id)) + if (ata_id_is_sata(drive->id)) mask &= ~0x0e; /* Fall thru */ default: @@ -675,7 +674,7 @@ static u8 hpt3xx_mdma_filter(ide_drive_t *drive) case HPT372A: case HPT372N: case HPT374 : - if (ide_dev_is_sata(drive->id)) + if (ata_id_is_sata(drive->id)) return 0x00; /* Fall thru */ default: @@ -731,11 +730,11 @@ static void hpt3xx_set_pio_mode(ide_drive_t *drive, const u8 pio) static void hpt3xx_quirkproc(ide_drive_t *drive) { - struct hd_driveid *id = drive->id; + char *m = (char *)&drive->id[ATA_ID_PROD]; const char **list = quirk_drives; while (*list) - if (strstr(id->model, *list++)) { + if (strstr(m, *list++)) { drive->quirk_list = 1; return; } @@ -944,7 +943,7 @@ static void hpt3xxn_rw_disk(ide_drive_t *drive, struct request *rq) * Perform a calibration cycle on the DPLL. * Returns 1 if this succeeds */ -static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high) +static int hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f_high) { u32 dpll = (f_high << 16) | f_low | 0x100; u8 scr2; @@ -972,7 +971,37 @@ static int __devinit hpt37x_calibrate_dpll(struct pci_dev *dev, u16 f_low, u16 f return 1; } -static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev) +static void hpt3xx_disable_fast_irq(struct pci_dev *dev, u8 mcr_addr) +{ + struct ide_host *host = pci_get_drvdata(dev); + struct hpt_info *info = host->host_priv + (&dev->dev == host->dev[1]); + u8 chip_type = info->chip_type; + u8 new_mcr, old_mcr = 0; + + /* + * Disable the "fast interrupt" prediction. Don't hold off + * on interrupts. (== 0x01 despite what the docs say) + */ + pci_read_config_byte(dev, mcr_addr + 1, &old_mcr); + + if (chip_type >= HPT374) + new_mcr = old_mcr & ~0x07; + else if (chip_type >= HPT370) { + new_mcr = old_mcr; + new_mcr &= ~0x02; +#ifdef HPT_DELAY_INTERRUPT + new_mcr &= ~0x01; +#else + new_mcr |= 0x01; +#endif + } else /* HPT366 and HPT368 */ + new_mcr = old_mcr & ~0x80; + + if (new_mcr != old_mcr) + pci_write_config_byte(dev, mcr_addr + 1, new_mcr); +} + +static unsigned int init_chipset_hpt366(struct pci_dev *dev) { unsigned long io_base = pci_resource_start(dev, 4); struct hpt_info *info = hpt3xx_get_info(&dev->dev); @@ -1209,9 +1238,11 @@ static unsigned int __devinit init_chipset_hpt366(struct pci_dev *dev) * NOTE: This register is only writeable via I/O space. */ if (chip_type == HPT371N && clock == ATA_CLOCK_66MHZ) - outb(inb(io_base + 0x9c) | 0x04, io_base + 0x9c); + hpt3xx_disable_fast_irq(dev, 0x50); + hpt3xx_disable_fast_irq(dev, 0x54); + return dev->irq; } @@ -1265,7 +1296,6 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) struct hpt_info *info = hpt3xx_get_info(hwif->dev); int serialize = HPT_SERIALIZE_IO; u8 chip_type = info->chip_type; - u8 new_mcr, old_mcr = 0; /* Cache the channel's MISC. control registers' offset */ hwif->select_data = hwif->channel ? 0x54 : 0x50; @@ -1288,29 +1318,6 @@ static void __devinit init_hwif_hpt366(ide_hwif_t *hwif) /* Serialize access to this device if needed */ if (serialize && hwif->mate) hwif->serialized = hwif->mate->serialized = 1; - - /* - * Disable the "fast interrupt" prediction. Don't hold off - * on interrupts. (== 0x01 despite what the docs say) - */ - pci_read_config_byte(dev, hwif->select_data + 1, &old_mcr); - - if (info->chip_type >= HPT374) - new_mcr = old_mcr & ~0x07; - else if (info->chip_type >= HPT370) { - new_mcr = old_mcr; - new_mcr &= ~0x02; - -#ifdef HPT_DELAY_INTERRUPT - new_mcr &= ~0x01; -#else - new_mcr |= 0x01; -#endif - } else /* HPT366 and HPT368 */ - new_mcr = old_mcr & ~0x80; - - if (new_mcr != old_mcr) - pci_write_config_byte(dev, hwif->select_data + 1, new_mcr); } static int __devinit init_dma_hpt366(ide_hwif_t *hwif, @@ -1620,6 +1627,8 @@ static struct pci_driver driver = { .id_table = hpt366_pci_tbl, .probe = hpt366_init_one, .remove = __devexit_p(hpt366_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init hpt366_ide_init(void) diff --git a/drivers/ide/pci/it8213.c b/drivers/ide/pci/it8213.c index 652e47dd7e8..0954ccd08d6 100644 --- a/drivers/ide/pci/it8213.c +++ b/drivers/ide/pci/it8213.c @@ -10,7 +10,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -195,6 +194,8 @@ static struct pci_driver driver = { .id_table = it8213_pci_tbl, .probe = it8213_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init it8213_ide_init(void) diff --git a/drivers/ide/pci/it821x.c b/drivers/ide/pci/it821x.c index 4a1508a707c..46edd083b34 100644 --- a/drivers/ide/pci/it821x.c +++ b/drivers/ide/pci/it821x.c @@ -63,7 +63,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -446,8 +445,7 @@ static u8 it821x_cable_detect(ide_hwif_t *hwif) static void it821x_quirkproc(ide_drive_t *drive) { struct it821x_dev *itdev = ide_get_hwifdata(drive->hwif); - struct hd_driveid *id = drive->id; - u16 *idbits = (u16 *)drive->id; + u16 *id = drive->id; if (!itdev->smart) { /* @@ -466,36 +464,36 @@ static void it821x_quirkproc(ide_drive_t *drive) */ /* Check for RAID v native */ - if(strstr(id->model, "Integrated Technology Express")) { + if (strstr((char *)&id[ATA_ID_PROD], + "Integrated Technology Express")) { /* In raid mode the ident block is slightly buggy We need to set the bits so that the IDE layer knows LBA28. LBA48 and DMA ar valid */ - id->capability |= 3; /* LBA28, DMA */ - id->command_set_2 |= 0x0400; /* LBA48 valid */ - id->cfs_enable_2 |= 0x0400; /* LBA48 on */ + id[ATA_ID_CAPABILITY] |= (3 << 8); /* LBA28, DMA */ + id[ATA_ID_COMMAND_SET_2] |= 0x0400; /* LBA48 valid */ + id[ATA_ID_CFS_ENABLE_2] |= 0x0400; /* LBA48 on */ /* Reporting logic */ printk(KERN_INFO "%s: IT8212 %sRAID %d volume", - drive->name, - idbits[147] ? "Bootable ":"", - idbits[129]); - if(idbits[129] != 1) - printk("(%dK stripe)", idbits[146]); - printk(".\n"); + drive->name, id[147] ? "Bootable " : "", + id[ATA_ID_CSFO]); + if (id[ATA_ID_CSFO] != 1) + printk(KERN_CONT "(%dK stripe)", id[146]); + printk(KERN_CONT ".\n"); } else { /* Non RAID volume. Fixups to stop the core code doing unsupported things */ - id->field_valid &= 3; - id->queue_depth = 0; - id->command_set_1 = 0; - id->command_set_2 &= 0xC400; - id->cfsse &= 0xC000; - id->cfs_enable_1 = 0; - id->cfs_enable_2 &= 0xC400; - id->csf_default &= 0xC000; - id->word127 = 0; - id->dlf = 0; - id->csfo = 0; - id->cfa_power = 0; + id[ATA_ID_FIELD_VALID] &= 3; + id[ATA_ID_QUEUE_DEPTH] = 0; + id[ATA_ID_COMMAND_SET_1] = 0; + id[ATA_ID_COMMAND_SET_2] &= 0xC400; + id[ATA_ID_CFSSE] &= 0xC000; + id[ATA_ID_CFS_ENABLE_1] = 0; + id[ATA_ID_CFS_ENABLE_2] &= 0xC400; + id[ATA_ID_CSF_DEFAULT] &= 0xC000; + id[127] = 0; + id[ATA_ID_DLF] = 0; + id[ATA_ID_CSFO] = 0; + id[ATA_ID_CFA_POWER] = 0; printk(KERN_INFO "%s: Performing identify fixups.\n", drive->name); } @@ -505,8 +503,8 @@ static void it821x_quirkproc(ide_drive_t *drive) * IDE core that DMA is supported (it821x hardware * takes care of DMA mode programming). */ - if (id->capability & 1) { - id->dma_mword |= 0x0101; + if (ata_id_has_dma(id)) { + id[ATA_ID_MWDMA_MODES] |= 0x0101; drive->current_speed = XFER_MW_DMA_0; } } @@ -588,7 +586,7 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif) hwif->mwdma_mask = ATA_MWDMA2; } -static void __devinit it8212_disable_raid(struct pci_dev *dev) +static void it8212_disable_raid(struct pci_dev *dev) { /* Reset local CPU, and set BIOS not ready */ pci_write_config_byte(dev, 0x5E, 0x01); @@ -605,7 +603,7 @@ static void __devinit it8212_disable_raid(struct pci_dev *dev) pci_write_config_byte(dev, PCI_LATENCY_TIMER, 0x20); } -static unsigned int __devinit init_chipset_it821x(struct pci_dev *dev) +static unsigned int init_chipset_it821x(struct pci_dev *dev) { u8 conf; static char *mode[2] = { "pass through", "smart" }; @@ -687,6 +685,8 @@ static struct pci_driver driver = { .id_table = it821x_pci_tbl, .probe = it821x_init_one, .remove = __devexit_p(it821x_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init it821x_ide_init(void) diff --git a/drivers/ide/pci/jmicron.c b/drivers/ide/pci/jmicron.c index bb9d09d8f19..acd64711064 100644 --- a/drivers/ide/pci/jmicron.c +++ b/drivers/ide/pci/jmicron.c @@ -8,7 +8,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -155,6 +154,8 @@ static struct pci_driver driver = { .id_table = jmicron_pci_tbl, .probe = jmicron_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init jmicron_ide_init(void) diff --git a/drivers/ide/pci/ns87415.c b/drivers/ide/pci/ns87415.c index ffefcd15196..53bd645736d 100644 --- a/drivers/ide/pci/ns87415.c +++ b/drivers/ide/pci/ns87415.c @@ -11,7 +11,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/interrupt.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/delay.h> #include <linux/ide.h> @@ -274,9 +273,9 @@ static void __devinit init_hwif_ns87415 (ide_hwif_t *hwif) do { udelay(50); stat = hwif->tp_ops->read_status(hwif); - if (stat == 0xff) - break; - } while ((stat & BUSY_STAT) && --timeout); + if (stat == 0xff) + break; + } while ((stat & ATA_BUSY) && --timeout); #endif } @@ -340,6 +339,8 @@ static struct pci_driver driver = { .id_table = ns87415_pci_tbl, .probe = ns87415_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init ns87415_ide_init(void) diff --git a/drivers/ide/pci/opti621.c b/drivers/ide/pci/opti621.c index e28e672ddaf..3de11ddcf86 100644 --- a/drivers/ide/pci/opti621.c +++ b/drivers/ide/pci/opti621.c @@ -85,7 +85,6 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <asm/io.h> @@ -137,7 +136,7 @@ static u8 read_reg(int reg) static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *pair = ide_get_paired_drive(drive); + ide_drive_t *pair = ide_get_pair_dev(drive); unsigned long flags; u8 tim, misc, addr_pio = pio, clk; @@ -153,7 +152,7 @@ static void opti621_set_pio_mode(ide_drive_t *drive, const u8 pio) drive->drive_data = XFER_PIO_0 + pio; - if (pair->present) { + if (pair) { if (pair->drive_data && pair->drive_data < drive->drive_data) addr_pio = pair->drive_data - XFER_PIO_0; } @@ -226,6 +225,8 @@ static struct pci_driver driver = { .id_table = opti621_pci_tbl, .probe = opti621_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init opti621_ide_init(void) diff --git a/drivers/ide/pci/pdc202xx_new.c b/drivers/ide/pci/pdc202xx_new.c index d477da6b585..9fc59962553 100644 --- a/drivers/ide/pci/pdc202xx_new.c +++ b/drivers/ide/pci/pdc202xx_new.c @@ -19,7 +19,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/delay.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/ide.h> @@ -203,10 +202,10 @@ static u8 pdcnew_cable_detect(ide_hwif_t *hwif) static void pdcnew_quirkproc(ide_drive_t *drive) { - const char **list, *model = drive->id->model; + const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; for (list = pdc_quirk_drives; *list != NULL; list++) - if (strstr(model, *list) != NULL) { + if (strstr(m, *list) != NULL) { drive->quirk_list = 2; return; } @@ -227,7 +226,7 @@ static void pdcnew_reset(ide_drive_t *drive) * read_counter - Read the byte count registers * @dma_base: for the port address */ -static long __devinit read_counter(u32 dma_base) +static long read_counter(u32 dma_base) { u32 pri_dma_base = dma_base, sec_dma_base = dma_base + 0x08; u8 cnt0, cnt1, cnt2, cnt3; @@ -267,7 +266,7 @@ static long __devinit read_counter(u32 dma_base) * @dma_base: for the port address * E.g. 16949000 on 33 MHz PCI bus, i.e. half of the PCI clock. */ -static long __devinit detect_pll_input_clock(unsigned long dma_base) +static long detect_pll_input_clock(unsigned long dma_base) { struct timeval start_time, end_time; long start_count, end_count; @@ -310,7 +309,7 @@ static long __devinit detect_pll_input_clock(unsigned long dma_base) } #ifdef CONFIG_PPC_PMAC -static void __devinit apple_kiwi_init(struct pci_dev *pdev) +static void apple_kiwi_init(struct pci_dev *pdev) { struct device_node *np = pci_device_to_OF_node(pdev); u8 conf; @@ -326,7 +325,7 @@ static void __devinit apple_kiwi_init(struct pci_dev *pdev) } #endif /* CONFIG_PPC_PMAC */ -static unsigned int __devinit init_chipset_pdcnew(struct pci_dev *dev) +static unsigned int init_chipset_pdcnew(struct pci_dev *dev) { const char *name = DRV_NAME; unsigned long dma_base = pci_resource_start(dev, 4); @@ -567,6 +566,8 @@ static struct pci_driver driver = { .id_table = pdc202new_pci_tbl, .probe = pdc202new_init_one, .remove = __devexit_p(pdc202new_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init pdc202new_ide_init(void) diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c index de9a2740046..cb6d2a00c51 100644 --- a/drivers/ide/pci/pdc202xx_old.c +++ b/drivers/ide/pci/pdc202xx_old.c @@ -13,7 +13,6 @@ #include <linux/kernel.h> #include <linux/delay.h> #include <linux/blkdev.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/ide.h> @@ -86,7 +85,7 @@ static void pdc202xx_set_mode(ide_drive_t *drive, const u8 speed) * Prefetch_EN / IORDY_EN / PA[3:0] bits of register A */ AP &= ~0x3f; - if (drive->id->capability & 4) + if (ata_id_iordy_disable(drive->id)) AP |= 0x20; /* set IORDY_EN bit */ if (drive->media == ide_disk) AP |= 0x10; /* set Prefetch_EN bit */ @@ -154,10 +153,10 @@ static void pdc_old_disable_66MHz_clock(ide_hwif_t *hwif) static void pdc202xx_quirkproc(ide_drive_t *drive) { - const char **list, *model = drive->id->model; + const char **list, *m = (char *)&drive->id[ATA_ID_PROD]; for (list = pdc_quirk_drives; *list != NULL; list++) - if (strstr(model, *list) != NULL) { + if (strstr(m, *list) != NULL) { drive->quirk_list = 2; return; } @@ -265,7 +264,7 @@ static void pdc202xx_dma_timeout(ide_drive_t *drive) ide_dma_timeout(drive); } -static unsigned int __devinit init_chipset_pdc202xx(struct pci_dev *dev) +static unsigned int init_chipset_pdc202xx(struct pci_dev *dev) { unsigned long dmabase = pci_resource_start(dev, 4); u8 udma_speed_flag = 0, primary_mode = 0, secondary_mode = 0; @@ -432,6 +431,8 @@ static struct pci_driver driver = { .id_table = pdc202xx_pci_tbl, .probe = pdc202xx_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init pdc202xx_ide_init(void) diff --git a/drivers/ide/pci/piix.c b/drivers/ide/pci/piix.c index 30cfc815fe3..a06c03f8e29 100644 --- a/drivers/ide/pci/piix.c +++ b/drivers/ide/pci/piix.c @@ -48,7 +48,6 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -205,7 +204,7 @@ static void piix_set_dma_mode(ide_drive_t *drive, const u8 speed) * out to be nice and simple. */ -static unsigned int __devinit init_chipset_ich(struct pci_dev *dev) +static unsigned int init_chipset_ich(struct pci_dev *dev) { u32 extra = 0; @@ -450,6 +449,8 @@ static struct pci_driver driver = { .id_table = piix_pci_tbl, .probe = piix_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init piix_ide_init(void) diff --git a/drivers/ide/pci/rz1000.c b/drivers/ide/pci/rz1000.c index 8d11ee838a2..c117a068761 100644 --- a/drivers/ide/pci/rz1000.c +++ b/drivers/ide/pci/rz1000.c @@ -16,7 +16,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/ide.h> #include <linux/init.h> diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c index 8efaed16fea..bdc1fed4126 100644 --- a/drivers/ide/pci/sc1200.c +++ b/drivers/ide/pci/sc1200.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/ide.h> @@ -104,17 +103,19 @@ static void sc1200_tunepio(ide_drive_t *drive, u8 pio) static u8 sc1200_udma_filter(ide_drive_t *drive) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *mate = &hwif->drives[(drive->dn & 1) ^ 1]; - struct hd_driveid *mateid = mate->id; + ide_drive_t *mate = ide_get_pair_dev(drive); + u16 *mateid = mate->id; u8 mask = hwif->ultra_mask; - if (mate->present == 0) + if (mate == NULL) goto out; - if ((mateid->capability & 1) && __ide_dma_bad_drive(mate) == 0) { - if ((mateid->field_valid & 4) && (mateid->dma_ultra & 7)) + if (ata_id_has_dma(mateid) && __ide_dma_bad_drive(mate) == 0) { + if ((mateid[ATA_ID_FIELD_VALID] & 4) && + (mateid[ATA_ID_UDMA_MODES] & 7)) goto out; - if ((mateid->field_valid & 2) && (mateid->dma_mword & 7)) + if ((mateid[ATA_ID_FIELD_VALID] & 2) && + (mateid[ATA_ID_MWDMA_MODES] & 7)) mask = 0; } out: diff --git a/drivers/ide/pci/scc_pata.c b/drivers/ide/pci/scc_pata.c index 44cccd1e086..e92a874b31d 100644 --- a/drivers/ide/pci/scc_pata.c +++ b/drivers/ide/pci/scc_pata.c @@ -26,7 +26,6 @@ #include <linux/module.h> #include <linux/pci.h> #include <linux/delay.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -400,7 +399,7 @@ static int scc_dma_end(ide_drive_t *drive) /* errata A308 workaround: Step5 (check data loss) */ /* We don't check non ide_disk because it is limited to UDMA4 */ if (!(in_be32((void __iomem *)hwif->io_ports.ctl_addr) - & ERR_STAT) && + & ATA_ERR) && drive->media == ide_disk && drive->current_speed > XFER_UDMA_4) { reg = in_be32((void __iomem *)intsts_port); if (!(reg & INTSTS_ACTEINT)) { @@ -504,7 +503,7 @@ static int scc_dma_test_irq(ide_drive_t *drive) /* SCC errata A252,A308 workaround: Step4 */ if ((in_be32((void __iomem *)hwif->io_ports.ctl_addr) - & ERR_STAT) && + & ATA_ERR) && (int_stat & INTSTS_INTRQ)) return 1; diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c index c3bdc6e51a4..3dff2aea317 100644 --- a/drivers/ide/pci/serverworks.c +++ b/drivers/ide/pci/serverworks.c @@ -32,7 +32,6 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -57,8 +56,10 @@ static struct pci_dev *isa_dev; static int check_in_drive_lists (ide_drive_t *drive, const char **list) { + char *m = (char *)&drive->id[ATA_ID_PROD]; + while (*list) - if (!strcmp(*list++, drive->id->model)) + if (!strcmp(*list++, m)) return 1; return 0; } @@ -174,7 +175,7 @@ static void svwks_set_dma_mode(ide_drive_t *drive, const u8 speed) pci_write_config_byte(dev, 0x54, ultra_enable); } -static unsigned int __devinit init_chipset_svwks(struct pci_dev *dev) +static unsigned int init_chipset_svwks(struct pci_dev *dev) { unsigned int reg; u8 btr; @@ -447,6 +448,8 @@ static struct pci_driver driver = { .id_table = svwks_pci_tbl, .probe = svwks_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init svwks_ide_init(void) diff --git a/drivers/ide/pci/sgiioc4.c b/drivers/ide/pci/sgiioc4.c index 681306c9d79..1017fb4f631 100644 --- a/drivers/ide/pci/sgiioc4.c +++ b/drivers/ide/pci/sgiioc4.c @@ -22,7 +22,6 @@ #include <linux/types.h> #include <linux/pci.h> #include <linux/delay.h> -#include <linux/hdreg.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/ioport.h> diff --git a/drivers/ide/pci/siimage.c b/drivers/ide/pci/siimage.c index db2b88a369a..174a873b4c6 100644 --- a/drivers/ide/pci/siimage.c +++ b/drivers/ide/pci/siimage.c @@ -39,7 +39,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> #include <linux/io.h> @@ -223,7 +222,9 @@ static u8 sil_pata_udma_filter(ide_drive_t *drive) static u8 sil_sata_udma_filter(ide_drive_t *drive) { - return strstr(drive->id->model, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6; + char *m = (char *)&drive->id[ATA_ID_PROD]; + + return strstr(m, "Maxtor") ? ATA_UDMA5 : ATA_UDMA6; } /** @@ -243,7 +244,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) ide_hwif_t *hwif = HWIF(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); - ide_drive_t *pair = ide_get_paired_drive(drive); + ide_drive_t *pair = ide_get_pair_dev(drive); u32 speedt = 0; u16 speedp = 0; unsigned long addr = siimage_seldev(drive, 0x04); @@ -257,7 +258,7 @@ static void sil_set_pio_mode(ide_drive_t *drive, u8 pio) u8 unit = drive->select.b.unit; /* trim *taskfile* PIO to the slowest of the master/slave */ - if (pair->present) { + if (pair) { u8 pair_pio = ide_get_best_pio_mode(pair, 255, 4); if (pair_pio < tf_pio) @@ -462,7 +463,7 @@ static void sil_sata_pre_reset(ide_drive_t *drive) * to 133 MHz clocking if the system isn't already set up to do it. */ -static unsigned int __devinit init_chipset_siimage(struct pci_dev *dev) +static unsigned int init_chipset_siimage(struct pci_dev *dev) { struct ide_host *host = pci_get_drvdata(dev); void __iomem *ioaddr = host->host_priv; @@ -616,8 +617,8 @@ static void __devinit init_mmio_iops_siimage(ide_hwif_t *hwif) static int is_dev_seagate_sata(ide_drive_t *drive) { - const char *s = &drive->id->model[0]; - unsigned len = strnlen(s, sizeof(drive->id->model)); + const char *s = (const char *)&drive->id[ATA_ID_PROD]; + unsigned len = strnlen(s, ATA_ID_PROD_LEN); if ((len > 4) && (!memcmp(s, "ST", 2))) if ((!memcmp(s + len - 2, "AS", 2)) || @@ -833,6 +834,8 @@ static struct pci_driver driver = { .id_table = siimage_pci_tbl, .probe = siimage_init_one, .remove = __devexit_p(siimage_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init siimage_ide_init(void) diff --git a/drivers/ide/pci/sis5513.c b/drivers/ide/pci/sis5513.c index 5efe21d6ef9..734dd41f1f6 100644 --- a/drivers/ide/pci/sis5513.c +++ b/drivers/ide/pci/sis5513.c @@ -47,7 +47,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/init.h> #include <linux/ide.h> @@ -448,7 +447,7 @@ static int __devinit sis_find_family(struct pci_dev *dev) return chipset_family; } -static unsigned int __devinit init_chipset_sis5513(struct pci_dev *dev) +static unsigned int init_chipset_sis5513(struct pci_dev *dev) { /* Make general config ops here 1/ tell IDE channels to operate in Compatibility mode only @@ -611,6 +610,8 @@ static struct pci_driver driver = { .id_table = sis5513_pci_tbl, .probe = sis5513_init_one, .remove = __devexit_p(sis5513_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init sis5513_ide_init(void) diff --git a/drivers/ide/pci/sl82c105.c b/drivers/ide/pci/sl82c105.c index 73905bcc08f..37a6b7bdc04 100644 --- a/drivers/ide/pci/sl82c105.c +++ b/drivers/ide/pci/sl82c105.c @@ -17,7 +17,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/ide.h> @@ -62,7 +61,7 @@ static unsigned int get_pio_timings(ide_drive_t *drive, u8 pio) if (cmd_off == 0) cmd_off = 1; - if (pio > 2 || ide_dev_has_iordy(drive->id)) + if (pio > 2 || ata_id_has_iordy(drive->id)) iordy = 0x40; return (cmd_on - 1) << 8 | (cmd_off - 1) | iordy; @@ -272,7 +271,7 @@ static u8 sl82c105_bridge_revision(struct pci_dev *dev) * channel 0 here at least, but channel 1 has to be enabled by * firmware or arch code. We still set both to 16 bits mode. */ -static unsigned int __devinit init_chipset_sl82c105(struct pci_dev *dev) +static unsigned int init_chipset_sl82c105(struct pci_dev *dev) { u32 val; @@ -351,6 +350,8 @@ static struct pci_driver driver = { .id_table = sl82c105_pci_tbl, .probe = sl82c105_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init sl82c105_ide_init(void) diff --git a/drivers/ide/pci/slc90e66.c b/drivers/ide/pci/slc90e66.c index 866d6c65e3a..a9551a13ac5 100644 --- a/drivers/ide/pci/slc90e66.c +++ b/drivers/ide/pci/slc90e66.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> -#include <linux/hdreg.h> #include <linux/ide.h> #include <linux/init.h> @@ -160,6 +159,8 @@ static struct pci_driver driver = { .id_table = slc90e66_pci_tbl, .probe = slc90e66_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init slc90e66_ide_init(void) diff --git a/drivers/ide/pci/triflex.c b/drivers/ide/pci/triflex.c index b77ec35151b..be8715dcee0 100644 --- a/drivers/ide/pci/triflex.c +++ b/drivers/ide/pci/triflex.c @@ -28,7 +28,6 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/kernel.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/ide.h> #include <linux/init.h> @@ -120,6 +119,8 @@ static struct pci_driver driver = { .id_table = triflex_pci_tbl, .probe = triflex_init_one, .remove = ide_pci_remove, + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init triflex_ide_init(void) diff --git a/drivers/ide/pci/trm290.c b/drivers/ide/pci/trm290.c index fd28b49977f..4dfbc6a68b5 100644 --- a/drivers/ide/pci/trm290.c +++ b/drivers/ide/pci/trm290.c @@ -135,7 +135,6 @@ #include <linux/interrupt.h> #include <linux/blkdev.h> #include <linux/init.h> -#include <linux/hdreg.h> #include <linux/pci.h> #include <linux/ide.h> diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c index 94fb9ab3223..acacdaab69c 100644 --- a/drivers/ide/pci/via82cxxx.c +++ b/drivers/ide/pci/via82cxxx.c @@ -154,7 +154,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) static void via_set_drive(ide_drive_t *drive, const u8 speed) { ide_hwif_t *hwif = drive->hwif; - ide_drive_t *peer = hwif->drives + (~drive->dn & 1); + ide_drive_t *peer = ide_get_pair_dev(drive); struct pci_dev *dev = to_pci_dev(hwif->dev); struct ide_host *host = pci_get_drvdata(dev); struct via82cxxx_dev *vdev = host->host_priv; @@ -173,7 +173,7 @@ static void via_set_drive(ide_drive_t *drive, const u8 speed) ide_timing_compute(drive, speed, &t, T, UT); - if (peer->present) { + if (peer) { ide_timing_compute(peer, peer->current_speed, &p, T, UT); ide_timing_merge(&p, &t, &t, IDE_TIMING_8BIT); } @@ -215,7 +215,7 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa) /* * Check and handle 80-wire cable presence */ -static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) +static void via_cable_detect(struct via82cxxx_dev *vdev, u32 u) { int i; @@ -267,7 +267,7 @@ static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u) * and initialize its drive independent registers. */ -static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev) +static unsigned int init_chipset_via82cxxx(struct pci_dev *dev) { struct ide_host *host = pci_get_drvdata(dev); struct via82cxxx_dev *vdev = host->host_priv; @@ -492,6 +492,8 @@ static struct pci_driver driver = { .id_table = via_pci_tbl, .probe = via_init_one, .remove = __devexit_p(via_remove), + .suspend = ide_pci_suspend, + .resume = ide_pci_resume, }; static int __init via_ide_init(void) diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index fa2be26272d..c3432da78d5 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -669,9 +669,9 @@ static void set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2, u8 speed) { + u16 *id = drive->id; int cycleTime, accessTime = 0, recTime = 0; unsigned accessTicks, recTicks; - struct hd_driveid *id = drive->id; struct mdma_timings_t* tm = NULL; int i; @@ -686,8 +686,8 @@ set_timings_mdma(ide_drive_t *drive, int intf_type, u32 *timings, u32 *timings2, } /* Check if drive provides explicit DMA cycle time */ - if ((id->field_valid & 2) && id->eide_dma_time) - cycleTime = max_t(int, id->eide_dma_time, cycleTime); + if ((id[ATA_ID_FIELD_VALID] & 2) && id[ATA_ID_EIDE_DMA_TIME]) + cycleTime = max_t(int, id[ATA_ID_EIDE_DMA_TIME], cycleTime); /* OHare limits according to some old Apple sources */ if ((intf_type == controller_ohare) && (cycleTime < 150)) diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index a8e9e8a69a5..9f1f9163a13 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -659,3 +659,36 @@ void ide_pci_remove(struct pci_dev *dev) pci_disable_device(dev); } EXPORT_SYMBOL_GPL(ide_pci_remove); + +#ifdef CONFIG_PM +int ide_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + pci_save_state(dev); + pci_disable_device(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + + return 0; +} +EXPORT_SYMBOL_GPL(ide_pci_suspend); + +int ide_pci_resume(struct pci_dev *dev) +{ + struct ide_host *host = pci_get_drvdata(dev); + int rc; + + pci_set_power_state(dev, PCI_D0); + + rc = pci_enable_device(dev); + if (rc) + return rc; + + pci_restore_state(dev); + pci_set_master(dev); + + if (host->init_chipset) + host->init_chipset(dev); + + return 0; +} +EXPORT_SYMBOL_GPL(ide_pci_resume); +#endif diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 3bafaede791..fac82152e4c 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -1047,6 +1047,11 @@ static int __devinit e1000_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_LLTX; + netdev->vlan_features |= NETIF_F_TSO; + netdev->vlan_features |= NETIF_F_TSO6; + netdev->vlan_features |= NETIF_F_HW_CSUM; + netdev->vlan_features |= NETIF_F_SG; + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); /* initialize eeprom parameters */ diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c index 02cc064c2c8..3d19d00e8ee 100644 --- a/drivers/net/smc911x.c +++ b/drivers/net/smc911x.c @@ -722,6 +722,9 @@ static void smc911x_phy_detect(struct net_device *dev) break; } } + if (phyaddr < 32) + /* Found an external PHY */ + break; } default: /* Internal media only */ diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 81c16cba541..90212ac33be 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -40,7 +40,6 @@ #include <linux/ioport.h> #include <linux/blkdev.h> #include <linux/errno.h> -#include <linux/hdreg.h> #include <linux/slab.h> #include <linux/ide.h> #include <linux/scatterlist.h> @@ -131,50 +130,6 @@ static inline idescsi_scsi_t *drive_to_idescsi(ide_drive_t *ide_drive) return scsihost_to_idescsi(ide_drive->driver_data); } -/* - * PIO data transfer routine using the scatter gather table. - */ -static void ide_scsi_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc, - unsigned int bcount, int write) -{ - ide_hwif_t *hwif = drive->hwif; - const struct ide_tp_ops *tp_ops = hwif->tp_ops; - xfer_func_t *xf = write ? tp_ops->output_data : tp_ops->input_data; - char *buf; - int count; - - while (bcount) { - count = min(pc->sg->length - pc->b_count, bcount); - if (PageHighMem(sg_page(pc->sg))) { - unsigned long flags; - - local_irq_save(flags); - buf = kmap_atomic(sg_page(pc->sg), KM_IRQ0) + - pc->sg->offset; - xf(drive, NULL, buf + pc->b_count, count); - kunmap_atomic(buf - pc->sg->offset, KM_IRQ0); - local_irq_restore(flags); - } else { - buf = sg_virt(pc->sg); - xf(drive, NULL, buf + pc->b_count, count); - } - bcount -= count; pc->b_count += count; - if (pc->b_count == pc->sg->length) { - if (!--pc->sg_cnt) - break; - pc->sg = sg_next(pc->sg); - pc->b_count = 0; - } - } - - if (bcount) { - printk(KERN_ERR "%s: scatter gather table too small, %s\n", - drive->name, write ? "padding with zeros" - : "discarding data"); - ide_pad_transfer(drive, write, bcount); - } -} - static void ide_scsi_hex_dump(u8 *data, int len) { print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, data, len, 0); @@ -244,9 +199,9 @@ idescsi_atapi_error(ide_drive_t *drive, struct request *rq, u8 stat, u8 err) { ide_hwif_t *hwif = drive->hwif; - if (hwif->tp_ops->read_status(hwif) & (BUSY_STAT | DRQ_STAT)) + if (hwif->tp_ops->read_status(hwif) & (ATA_BUSY | ATA_DRQ)) /* force an abort */ - hwif->tp_ops->exec_command(hwif, WIN_IDLEIMMEDIATE); + hwif->tp_ops->exec_command(hwif, ATA_CMD_IDLEIMMEDIATE); rq->errors++; @@ -344,7 +299,7 @@ static ide_startstop_t idescsi_pc_intr (ide_drive_t *drive) return ide_pc_intr(drive, pc, idescsi_pc_intr, get_timeout(pc), idescsi_expiry, NULL, NULL, NULL, - ide_scsi_io_buffers); + ide_io_buffers); } static ide_startstop_t idescsi_transfer_pc(ide_drive_t *drive) @@ -430,21 +385,41 @@ static ide_startstop_t idescsi_do_request (ide_drive_t *drive, struct request *r } #ifdef CONFIG_IDE_PROC_FS -static void idescsi_add_settings(ide_drive_t *drive) -{ - idescsi_scsi_t *scsi = drive_to_idescsi(drive); - -/* - * drive setting name read/write data type min max mul_factor div_factor data pointer set function - */ - ide_add_setting(drive, "bios_cyl", SETTING_RW, TYPE_INT, 0, 1023, 1, 1, &drive->bios_cyl, NULL); - ide_add_setting(drive, "bios_head", SETTING_RW, TYPE_BYTE, 0, 255, 1, 1, &drive->bios_head, NULL); - ide_add_setting(drive, "bios_sect", SETTING_RW, TYPE_BYTE, 0, 63, 1, 1, &drive->bios_sect, NULL); - ide_add_setting(drive, "transform", SETTING_RW, TYPE_INT, 0, 3, 1, 1, &scsi->transform, NULL); - ide_add_setting(drive, "log", SETTING_RW, TYPE_INT, 0, 1, 1, 1, &scsi->log, NULL); -} -#else -static inline void idescsi_add_settings(ide_drive_t *drive) { ; } +#define ide_scsi_devset_get(name, field) \ +static int get_##name(ide_drive_t *drive) \ +{ \ + idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ + return scsi->field; \ +} + +#define ide_scsi_devset_set(name, field) \ +static int set_##name(ide_drive_t *drive, int arg) \ +{ \ + idescsi_scsi_t *scsi = drive_to_idescsi(drive); \ + scsi->field = arg; \ + return 0; \ +} + +#define ide_scsi_devset_rw_field(_name, _field) \ +ide_scsi_devset_get(_name, _field); \ +ide_scsi_devset_set(_name, _field); \ +IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name); + +ide_devset_rw_field(bios_cyl, bios_cyl); +ide_devset_rw_field(bios_head, bios_head); +ide_devset_rw_field(bios_sect, bios_sect); + +ide_scsi_devset_rw_field(transform, transform); +ide_scsi_devset_rw_field(log, log); + +static const struct ide_proc_devset idescsi_settings[] = { + IDE_PROC_DEVSET(bios_cyl, 0, 1023), + IDE_PROC_DEVSET(bios_head, 0, 255), + IDE_PROC_DEVSET(bios_sect, 0, 63), + IDE_PROC_DEVSET(log, 0, 1), + IDE_PROC_DEVSET(transform, 0, 3), + { 0 }, +}; #endif /* @@ -452,7 +427,7 @@ static inline void idescsi_add_settings(ide_drive_t *drive) { ; } */ static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) { - if (drive->id && (drive->id->config & 0x0060) == 0x20) + if ((drive->id[ATA_ID_CONFIG] & 0x0060) == 0x20) set_bit(IDE_AFLAG_DRQ_INTERRUPT, &drive->atapi_flags); clear_bit(IDESCSI_SG_TRANSFORM, &scsi->transform); #if IDESCSI_DEBUG_LOG @@ -461,7 +436,7 @@ static void idescsi_setup (ide_drive_t *drive, idescsi_scsi_t *scsi) drive->pc_callback = ide_scsi_callback; - idescsi_add_settings(drive); + ide_proc_register_driver(drive, scsi->driver); } static void ide_scsi_remove(ide_drive_t *drive) @@ -503,12 +478,12 @@ static ide_driver_t idescsi_driver = { .remove = ide_scsi_remove, .version = IDESCSI_VERSION, .media = ide_scsi, - .supports_dsc_overlap = 0, .do_request = idescsi_do_request, .end_request = idescsi_end_request, .error = idescsi_atapi_error, #ifdef CONFIG_IDE_PROC_FS .proc = idescsi_proc, + .settings = idescsi_settings, #endif }; @@ -811,6 +786,7 @@ static int ide_scsi_probe(ide_drive_t *drive) struct gendisk *g; static int warned; int err = -ENOMEM; + u16 last_lun; if (!warned && drive->media == ide_cdrom) { printk(KERN_WARNING "ide-scsi is deprecated for cd burning! Use ide-cd and give dev=/dev/hdX as device\n"); @@ -821,7 +797,6 @@ static int ide_scsi_probe(ide_drive_t *drive) return -ENODEV; if (!strstr("ide-scsi", drive->driver_req) || - !drive->present || drive->media == ide_disk || !(host = scsi_host_alloc(&idescsi_template,sizeof(idescsi_scsi_t)))) return -ENODEV; @@ -836,12 +811,12 @@ static int ide_scsi_probe(ide_drive_t *drive) host->max_id = 1; - if (drive->id->last_lun) - debug_log("%s: id->last_lun=%u\n", drive->name, - drive->id->last_lun); + last_lun = drive->id[ATA_ID_LAST_LUN]; + if (last_lun) + debug_log("%s: last_lun=%u\n", drive->name, last_lun); - if ((drive->id->last_lun & 0x7) != 7) - host->max_lun = (drive->id->last_lun & 0x7) + 1; + if ((last_lun & 7) != 7) + host->max_lun = (last_lun & 7) + 1; else host->max_lun = 1; @@ -852,7 +827,6 @@ static int ide_scsi_probe(ide_drive_t *drive) idescsi->host = host; idescsi->disk = g; g->private_data = &idescsi->driver; - ide_proc_register_driver(drive, &idescsi_driver); err = 0; idescsi_setup(drive, idescsi); g->fops = &idescsi_ops; diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a..40183d94b68 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -136,37 +136,51 @@ config EXT3_FS_SECURITY If you are not using a security module that requires using extended attributes for file security labels, say N. -config EXT4DEV_FS - tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" - depends on EXPERIMENTAL +config EXT4_FS + tristate "The Extended 4 (ext4) filesystem" select JBD2 select CRC16 help - Ext4dev is a predecessor filesystem of the next generation - extended fs ext4, based on ext3 filesystem code. It will be - renamed ext4 fs later, once ext4dev is mature and stabilized. + This is the next generation of the ext3 filesystem. Unlike the change from ext2 filesystem to ext3 filesystem, - the on-disk format of ext4dev is not the same as ext3 any more: - it is based on extent maps and it supports 48-bit physical block - numbers. These combined on-disk format changes will allow - ext4dev/ext4 to handle more than 16 TB filesystem volumes -- - a hard limit that ext3 cannot overcome without changing the - on-disk format. - - Other than extent maps and 48-bit block numbers, ext4dev also is - likely to have other new features such as persistent preallocation, - high resolution time stamps, and larger file support etc. These - features will be added to ext4dev gradually. + the on-disk format of ext4 is not forwards compatible with + ext3; it is based on extent maps and it supports 48-bit + physical block numbers. The ext4 filesystem also supports delayed + allocation, persistent preallocation, high resolution time stamps, + and a number of other features to improve performance and speed + up fsck time. For more information, please see the web pages at + http://ext4.wiki.kernel.org. + + The ext4 filesystem will support mounting an ext3 + filesystem; while there will be some performance gains from + the delayed allocation and inode table readahead, the best + performance gains will require enabling ext4 features in the + filesystem, or formating a new filesystem as an ext4 + filesystem initially. To compile this file system support as a module, choose M here. The module will be called ext4dev. If unsure, say N. -config EXT4DEV_FS_XATTR - bool "Ext4dev extended attributes" - depends on EXT4DEV_FS +config EXT4DEV_COMPAT + bool "Enable ext4dev compatibility" + depends on EXT4_FS + help + Starting with 2.6.28, the name of the ext4 filesystem was + renamed from ext4dev to ext4. Unfortunately there are some + lagecy userspace programs (such as klibc's fstype) have + "ext4dev" hardcoded. + + To enable backwards compatibility so that systems that are + still expecting to mount ext4 filesystems using ext4dev, + chose Y here. This feature will go away by 2.6.31, so + please arrange to get your userspace programs fixed! + +config EXT4_FS_XATTR + bool "Ext4 extended attributes" + depends on EXT4_FS default y help Extended attributes are name:value pairs associated with inodes by @@ -175,11 +189,11 @@ config EXT4DEV_FS_XATTR If unsure, say N. - You need this for POSIX ACL support on ext4dev/ext4. + You need this for POSIX ACL support on ext4. -config EXT4DEV_FS_POSIX_ACL - bool "Ext4dev POSIX Access Control Lists" - depends on EXT4DEV_FS_XATTR +config EXT4_FS_POSIX_ACL + bool "Ext4 POSIX Access Control Lists" + depends on EXT4_FS_XATTR select FS_POSIX_ACL help POSIX Access Control Lists (ACLs) support permissions for users and @@ -190,14 +204,14 @@ config EXT4DEV_FS_POSIX_ACL If you don't know what Access Control Lists are, say N -config EXT4DEV_FS_SECURITY - bool "Ext4dev Security Labels" - depends on EXT4DEV_FS_XATTR +config EXT4_FS_SECURITY + bool "Ext4 Security Labels" + depends on EXT4_FS_XATTR help Security labels support alternative access control models implemented by security modules like SELinux. This option enables an extended attribute handler for file security - labels in the ext4dev/ext4 filesystem. + labels in the ext4 filesystem. If you are not using a security module that requires using extended attributes for file security labels, say N. @@ -240,22 +254,22 @@ config JBD2 help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by - the ext4dev/ext4 filesystem, but it could also be used to add + the ext4 filesystem, but it could also be used to add journal support to other file systems or block devices such as RAID or LVM. - If you are using ext4dev/ext4, you need to say Y here. If you are not - using ext4dev/ext4 then you will probably want to say N. + If you are using ext4, you need to say Y here. If you are not + using ext4 then you will probably want to say N. To compile this device as a module, choose M here. The module will be - called jbd2. If you are compiling ext4dev/ext4 into the kernel, + called jbd2. If you are compiling ext4 into the kernel, you cannot compile this code as a module. config JBD2_DEBUG - bool "JBD2 (ext4dev/ext4) debugging support" + bool "JBD2 (ext4) debugging support" depends on JBD2 && DEBUG_FS help - If you are using the ext4dev/ext4 journaled file system (or + If you are using the ext4 journaled file system (or potentially any other filesystem/device using JBD2), this option allows you to enable debugging output while the system is running, in order to help track down any problems you are having. @@ -270,9 +284,9 @@ config JBD2_DEBUG config FS_MBCACHE # Meta block cache for Extended Attributes (ext2/ext3/ext4) tristate - depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR - default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y - default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m + depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR + default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y + default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m config REISERFS_FS tristate "Reiserfs support" diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff1..de404b00eb0 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -69,7 +69,7 @@ obj-$(CONFIG_DLM) += dlm/ # Do not add any filesystems before this line obj-$(CONFIG_REISERFS_FS) += reiserfs/ obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 -obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev +obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev obj-$(CONFIG_JBD) += jbd/ obj-$(CONFIG_JBD2) += jbd2/ obj-$(CONFIG_EXT2_FS) += ext2/ diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33..bae998c1e44 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); extern int ext2_setattr (struct dentry *, struct iattr *); extern void ext2_set_inode_flags(struct inode *inode); extern void ext2_get_inode_flags(struct ext2_inode_info *); +extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); int __ext2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata); diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c3629..45ed0712218 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { #endif .setattr = ext2_setattr, .permission = ext2_permission, + .fiemap = ext2_fiemap, }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51..7658b33e265 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -31,6 +31,7 @@ #include <linux/writeback.h> #include <linux/buffer_head.h> #include <linux/mpage.h> +#include <linux/fiemap.h> #include "ext2.h" #include "acl.h" #include "xip.h" @@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ } +int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, + ext2_get_block); +} + static int ext2_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, ext2_get_block, wbc); diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d301..3be1e0689c9 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { .removexattr = generic_removexattr, #endif .permission = ext3_permission, + .fiemap = ext3_fiemap, }; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b11..ebfec4d0148 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -36,6 +36,7 @@ #include <linux/mpage.h> #include <linux/uio.h> #include <linux/bio.h> +#include <linux/fiemap.h> #include "xattr.h" #include "acl.h" @@ -981,6 +982,13 @@ out: return ret; } +int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + return generic_block_fiemap(inode, fieinfo, start, len, + ext3_get_block); +} + /* * `handle' can be NULL if create is zero */ diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ac6fa8ca0a2..a8ff003a00f 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile @@ -2,12 +2,12 @@ # Makefile for the linux ext4-filesystem routines. # -obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o +obj-$(CONFIG_EXT4_FS) += ext4.o -ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ ext4_jbd2.o migrate.o mballoc.o -ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o -ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o +ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o +ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cd2b855a07d..cb45257a246 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) } } -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl if the ACL has not been cached */ #define EXT4_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext4_permission (struct inode *, int); -extern int ext4_acl_chmod (struct inode *); -extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); +extern int ext4_permission(struct inode *, int); +extern int ext4_acl_chmod(struct inode *); +extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); -#else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ +#else /* CONFIG_EXT4_FS_POSIX_ACL */ #include <linux/sched.h> #define ext4_permission NULL @@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) { return 0; } -#endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ +#endif /* CONFIG_EXT4_FS_POSIX_ACL */ diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e9fa960ba6d..bd2ece22882 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, } return used_blocks; } + /* Initializes an uninitialized block bitmap if given, and returns the * number of blocks free in the group. */ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, @@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, */ group_blocks = ext4_blocks_count(sbi->s_es) - le32_to_cpu(sbi->s_es->s_first_data_block) - - (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); + (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); } else { group_blocks = EXT4_BLOCKS_PER_GROUP(sb); } @@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, * @bh: pointer to the buffer head to store the block * group descriptor */ -struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, +struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t block_group, - struct buffer_head ** bh) + struct buffer_head **bh) { unsigned long group_desc; unsigned long offset; - struct ext4_group_desc * desc; + struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); if (block_group >= sbi->s_groups_count) { - ext4_error (sb, "ext4_get_group_desc", - "block_group >= groups_count - " - "block_group = %lu, groups_count = %lu", - block_group, sbi->s_groups_count); + ext4_error(sb, "ext4_get_group_desc", + "block_group >= groups_count - " + "block_group = %lu, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext4_error (sb, "ext4_get_group_desc", - "Group descriptor not loaded - " - "block_group = %lu, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + ext4_error(sb, "ext4_get_group_desc", + "Group descriptor not loaded - " + "block_group = %lu, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } @@ -302,8 +303,8 @@ err_out: struct buffer_head * ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) { - struct ext4_group_desc * desc; - struct buffer_head * bh = NULL; + struct ext4_group_desc *desc; + struct buffer_head *bh = NULL; ext4_fsblk_t bitmap_blk; desc = ext4_get_group_desc(sb, block_group, NULL); @@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - if (bh_uptodate_or_lock(bh)) + if (buffer_uptodate(bh) && + !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) return bh; + lock_buffer(bh); spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); @@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) */ return bh; } -/* - * The reservation window structure operations - * -------------------------------------------- - * Operations include: - * dump, find, add, remove, is_empty, find_next_reservable_window, etc. - * - * We use a red-black tree to represent per-filesystem reservation - * windows. - * - */ - -/** - * __rsv_window_dump() -- Dump the filesystem block allocation reservation map - * @rb_root: root of per-filesystem reservation rb tree - * @verbose: verbose mode - * @fn: function which wishes to dump the reservation map - * - * If verbose is turned on, it will print the whole block reservation - * windows(start, end). Otherwise, it will only print out the "bad" windows, - * those windows that overlap with their immediate neighbors. - */ -#if 1 -static void __rsv_window_dump(struct rb_root *root, int verbose, - const char *fn) -{ - struct rb_node *n; - struct ext4_reserve_window_node *rsv, *prev; - int bad; - -restart: - n = rb_first(root); - bad = 0; - prev = NULL; - - printk("Block Allocation Reservation Windows Map (%s):\n", fn); - while (n) { - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); - if (verbose) - printk("reservation window 0x%p " - "start: %llu, end: %llu\n", - rsv, rsv->rsv_start, rsv->rsv_end); - if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { - printk("Bad reservation %p (start >= end)\n", - rsv); - bad = 1; - } - if (prev && prev->rsv_end >= rsv->rsv_start) { - printk("Bad reservation %p (prev->end >= start)\n", - rsv); - bad = 1; - } - if (bad) { - if (!verbose) { - printk("Restarting reservation walk in verbose mode\n"); - verbose = 1; - goto restart; - } - } - n = rb_next(n); - prev = rsv; - } - printk("Window map complete.\n"); - BUG_ON(bad); -} -#define rsv_window_dump(root, verbose) \ - __rsv_window_dump((root), (verbose), __func__) -#else -#define rsv_window_dump(root, verbose) do {} while (0) -#endif - -/** - * goal_in_my_reservation() - * @rsv: inode's reservation window - * @grp_goal: given goal block relative to the allocation block group - * @group: the current allocation block group - * @sb: filesystem super block - * - * Test if the given goal block (group relative) is within the file's - * own block reservation window range. - * - * If the reservation window is outside the goal allocation group, return 0; - * grp_goal (given goal block) could be -1, which means no specific - * goal block. In this case, always return 1. - * If the goal block is within the reservation window, return 1; - * otherwise, return 0; - */ -static int -goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, - ext4_group_t group, struct super_block *sb) -{ - ext4_fsblk_t group_first_block, group_last_block; - - group_first_block = ext4_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - if ((rsv->_rsv_start > group_last_block) || - (rsv->_rsv_end < group_first_block)) - return 0; - if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) - || (grp_goal + group_first_block > rsv->_rsv_end))) - return 0; - return 1; -} - -/** - * search_reserve_window() - * @rb_root: root of reservation tree - * @goal: target allocation block - * - * Find the reserved window which includes the goal, or the previous one - * if the goal is not in any window. - * Returns NULL if there are no windows or if all windows start after the goal. - */ -static struct ext4_reserve_window_node * -search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) -{ - struct rb_node *n = root->rb_node; - struct ext4_reserve_window_node *rsv; - - if (!n) - return NULL; - - do { - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); - - if (goal < rsv->rsv_start) - n = n->rb_left; - else if (goal > rsv->rsv_end) - n = n->rb_right; - else - return rsv; - } while (n); - /* - * We've fallen off the end of the tree: the goal wasn't inside - * any particular node. OK, the previous node must be to one - * side of the interval containing the goal. If it's the RHS, - * we need to back up one. - */ - if (rsv->rsv_start > goal) { - n = rb_prev(&rsv->rsv_node); - rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); - } - return rsv; -} - -/** - * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. - * @sb: super block - * @rsv: reservation window to add - * - * Must be called with rsv_lock hold. - */ -void ext4_rsv_window_add(struct super_block *sb, - struct ext4_reserve_window_node *rsv) -{ - struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; - struct rb_node *node = &rsv->rsv_node; - ext4_fsblk_t start = rsv->rsv_start; - - struct rb_node ** p = &root->rb_node; - struct rb_node * parent = NULL; - struct ext4_reserve_window_node *this; - - while (*p) - { - parent = *p; - this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); - - if (start < this->rsv_start) - p = &(*p)->rb_left; - else if (start > this->rsv_end) - p = &(*p)->rb_right; - else { - rsv_window_dump(root, 1); - BUG(); - } - } - - rb_link_node(node, parent, p); - rb_insert_color(node, root); -} - -/** - * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree - * @sb: super block - * @rsv: reservation window to remove - * - * Mark the block reservation window as not allocated, and unlink it - * from the filesystem reservation window rb tree. Must be called with - * rsv_lock hold. - */ -static void rsv_window_remove(struct super_block *sb, - struct ext4_reserve_window_node *rsv) -{ - rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_alloc_hit = 0; - rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); -} - -/* - * rsv_is_empty() -- Check if the reservation window is allocated. - * @rsv: given reservation window to check - * - * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. - */ -static inline int rsv_is_empty(struct ext4_reserve_window *rsv) -{ - /* a valid reservation end block could not be 0 */ - return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; -} - -/** - * ext4_init_block_alloc_info() - * @inode: file inode structure - * - * Allocate and initialize the reservation window structure, and - * link the window to the ext4 inode structure at last - * - * The reservation window structure is only dynamically allocated - * and linked to ext4 inode the first time the open file - * needs a new block. So, before every ext4_new_block(s) call, for - * regular files, we should check whether the reservation window - * structure exists or not. In the latter case, this function is called. - * Fail to do so will result in block reservation being turned off for that - * open file. - * - * This function is called from ext4_get_blocks_handle(), also called - * when setting the reservation window size through ioctl before the file - * is open for write (needs block allocation). - * - * Needs down_write(i_data_sem) protection prior to call this function. - */ -void ext4_init_block_alloc_info(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; - struct super_block *sb = inode->i_sb; - - block_i = kmalloc(sizeof(*block_i), GFP_NOFS); - if (block_i) { - struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; - - rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - - /* - * if filesystem is mounted with NORESERVATION, the goal - * reservation window size is set to zero to indicate - * block reservation is off - */ - if (!test_opt(sb, RESERVATION)) - rsv->rsv_goal_size = 0; - else - rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; - rsv->rsv_alloc_hit = 0; - block_i->last_alloc_logical_block = 0; - block_i->last_alloc_physical_block = 0; - } - ei->i_block_alloc_info = block_i; -} - -/** - * ext4_discard_reservation() - * @inode: inode - * - * Discard(free) block reservation window on last file close, or truncate - * or at last iput(). - * - * It is being called in three cases: - * ext4_release_file(): last writer close the file - * ext4_clear_inode(): last iput(), when nobody link to this file. - * ext4_truncate(): when the block indirect map is about to change. - * - */ -void ext4_discard_reservation(struct inode *inode) -{ - struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; - struct ext4_reserve_window_node *rsv; - spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; - - ext4_mb_discard_inode_preallocations(inode); - - if (!block_i) - return; - - rsv = &block_i->rsv_window_node; - if (!rsv_is_empty(&rsv->rsv_window)) { - spin_lock(rsv_lock); - if (!rsv_is_empty(&rsv->rsv_window)) - rsv_window_remove(inode->i_sb, rsv); - spin_unlock(rsv_lock); - } -} /** * ext4_free_blocks_sb() -- Free given blocks and update quota @@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode) * @block: start physcial block to free * @count: number of blocks to free * @pdquot_freed_blocks: pointer to quota + * + * XXX This function is only used by the on-line resizing code, which + * should probably be fixed up to call the mballoc variant. There + * this needs to be cleaned up later; in fact, I'm not convinced this + * is 100% correct in the face of the mballoc code. The online resizing + * code needs to be fixed up to more tightly (and correctly) interlock + * with the mballoc code. */ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, ext4_fsblk_t block, unsigned long count, @@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, ext4_grpblk_t bit; unsigned long i; unsigned long overflow; - struct ext4_group_desc * desc; - struct ext4_super_block * es; + struct ext4_group_desc *desc; + struct ext4_super_block *es; struct ext4_sb_info *sbi; int err = 0, ret; ext4_grpblk_t group_freed; @@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, if (block < le32_to_cpu(es->s_first_data_block) || block + count < block || block + count > ext4_blocks_count(es)) { - ext4_error (sb, "ext4_free_blocks", - "Freeing blocks not in datazone - " - "block = %llu, count = %lu", block, count); + ext4_error(sb, "ext4_free_blocks", + "Freeing blocks not in datazone - " + "block = %llu, count = %lu", block, count); goto error_return; } - ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); + ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); do_more: overflow = 0; @@ -694,7 +409,7 @@ do_more: bitmap_bh = ext4_read_block_bitmap(sb, block_group); if (!bitmap_bh) goto error_return; - desc = ext4_get_group_desc (sb, block_group, &gd_bh); + desc = ext4_get_group_desc(sb, block_group, &gd_bh); if (!desc) goto error_return; @@ -703,10 +418,10 @@ do_more: in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || in_range(block + count - 1, ext4_inode_table(sb, desc), sbi->s_itb_per_group)) { - ext4_error (sb, "ext4_free_blocks", - "Freeing blocks in system zones - " - "Block = %llu, count = %lu", - block, count); + ext4_error(sb, "ext4_free_blocks", + "Freeing blocks in system zones - " + "Block = %llu, count = %lu", + block, count); goto error_return; } @@ -848,7 +563,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int metadata) { - struct super_block * sb; + struct super_block *sb; unsigned long dquot_freed_blocks; /* this isn't the right place to decide whether block is metadata @@ -859,748 +574,52 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, sb = inode->i_sb; - if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) - ext4_free_blocks_sb(handle, sb, block, count, - &dquot_freed_blocks); - else - ext4_mb_free_blocks(handle, inode, block, count, - metadata, &dquot_freed_blocks); + ext4_mb_free_blocks(handle, inode, block, count, + metadata, &dquot_freed_blocks); if (dquot_freed_blocks) DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); return; } -/** - * ext4_test_allocatable() - * @nr: given allocation block group - * @bh: bufferhead contains the bitmap of the given block group - * - * For ext4 allocations, we must not reuse any blocks which are - * allocated in the bitmap buffer's "last committed data" copy. This - * prevents deletes from freeing up the page for reuse until we have - * committed the delete transaction. - * - * If we didn't do this, then deleting something and reallocating it as - * data would allow the old block to be overwritten before the - * transaction committed (because we force data to disk before commit). - * This would lead to corruption if we crashed between overwriting the - * data and committing the delete. - * - * @@@ We may want to make this allocation behaviour conditional on - * data-writes at some point, and disable it for metadata allocations or - * sync-data inodes. - */ -static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) -{ - int ret; - struct journal_head *jh = bh2jh(bh); - - if (ext4_test_bit(nr, bh->b_data)) - return 0; - - jbd_lock_bh_state(bh); - if (!jh->b_committed_data) - ret = 1; - else - ret = !ext4_test_bit(nr, jh->b_committed_data); - jbd_unlock_bh_state(bh); - return ret; -} - -/** - * bitmap_search_next_usable_block() - * @start: the starting block (group relative) of the search - * @bh: bufferhead contains the block group bitmap - * @maxblocks: the ending block (group relative) of the reservation - * - * The bitmap search --- search forward alternately through the actual - * bitmap on disk and the last-committed copy in journal, until we find a - * bit free in both bitmaps. - */ -static ext4_grpblk_t -bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, - ext4_grpblk_t maxblocks) +int ext4_claim_free_blocks(struct ext4_sb_info *sbi, + s64 nblocks) { - ext4_grpblk_t next; - struct journal_head *jh = bh2jh(bh); - - while (start < maxblocks) { - next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); - if (next >= maxblocks) - return -1; - if (ext4_test_allocatable(next, bh)) - return next; - jbd_lock_bh_state(bh); - if (jh->b_committed_data) - start = ext4_find_next_zero_bit(jh->b_committed_data, - maxblocks, next); - jbd_unlock_bh_state(bh); - } - return -1; -} + s64 free_blocks, dirty_blocks; + s64 root_blocks = 0; + struct percpu_counter *fbc = &sbi->s_freeblocks_counter; + struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; -/** - * find_next_usable_block() - * @start: the starting block (group relative) to find next - * allocatable block in bitmap. - * @bh: bufferhead contains the block group bitmap - * @maxblocks: the ending block (group relative) for the search - * - * Find an allocatable block in a bitmap. We honor both the bitmap and - * its last-committed copy (if that exists), and perform the "most - * appropriate allocation" algorithm of looking for a free block near - * the initial goal; then for a free byte somewhere in the bitmap; then - * for any free bit in the bitmap. - */ -static ext4_grpblk_t -find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, - ext4_grpblk_t maxblocks) -{ - ext4_grpblk_t here, next; - char *p, *r; - - if (start > 0) { - /* - * The goal was occupied; search forward for a free - * block within the next XX blocks. - * - * end_goal is more or less random, but it has to be - * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the - * next 64-bit boundary is simple.. - */ - ext4_grpblk_t end_goal = (start + 63) & ~63; - if (end_goal > maxblocks) - end_goal = maxblocks; - here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); - if (here < end_goal && ext4_test_allocatable(here, bh)) - return here; - ext4_debug("Bit not found near goal\n"); - } - - here = start; - if (here < 0) - here = 0; - - p = ((char *)bh->b_data) + (here >> 3); - r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); - next = (r - ((char *)bh->b_data)) << 3; - - if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) - return next; - - /* - * The bitmap search --- search forward alternately through the actual - * bitmap and the last-committed copy until we find a bit free in - * both - */ - here = bitmap_search_next_usable_block(here, bh, maxblocks); - return here; -} - -/** - * claim_block() - * @block: the free block (group relative) to allocate - * @bh: the bufferhead containts the block group bitmap - * - * We think we can allocate this block in this bitmap. Try to set the bit. - * If that succeeds then check that nobody has allocated and then freed the - * block since we saw that is was not marked in b_committed_data. If it _was_ - * allocated and freed then clear the bit in the bitmap again and return - * zero (failure). - */ -static inline int -claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) -{ - struct journal_head *jh = bh2jh(bh); - int ret; - - if (ext4_set_bit_atomic(lock, block, bh->b_data)) - return 0; - jbd_lock_bh_state(bh); - if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { - ext4_clear_bit_atomic(lock, block, bh->b_data); - ret = 0; - } else { - ret = 1; - } - jbd_unlock_bh_state(bh); - return ret; -} + free_blocks = percpu_counter_read_positive(fbc); + dirty_blocks = percpu_counter_read_positive(dbc); -/** - * ext4_try_to_allocate() - * @sb: superblock - * @handle: handle to this transaction - * @group: given allocation block group - * @bitmap_bh: bufferhead holds the block bitmap - * @grp_goal: given target block within the group - * @count: target number of blocks to allocate - * @my_rsv: reservation window - * - * Attempt to allocate blocks within a give range. Set the range of allocation - * first, then find the first free bit(s) from the bitmap (within the range), - * and at last, allocate the blocks by claiming the found free bit as allocated. - * - * To set the range of this allocation: - * if there is a reservation window, only try to allocate block(s) from the - * file's own reservation window; - * Otherwise, the allocation range starts from the give goal block, ends at - * the block group's last block. - * - * If we failed to allocate the desired block then we may end up crossing to a - * new bitmap. In that case we must release write access to the old one via - * ext4_journal_release_buffer(), else we'll run out of credits. - */ -static ext4_grpblk_t -ext4_try_to_allocate(struct super_block *sb, handle_t *handle, - ext4_group_t group, struct buffer_head *bitmap_bh, - ext4_grpblk_t grp_goal, unsigned long *count, - struct ext4_reserve_window *my_rsv) -{ - ext4_fsblk_t group_first_block; - ext4_grpblk_t start, end; - unsigned long num = 0; - - /* we do allocation within the reservation window if we have a window */ - if (my_rsv) { - group_first_block = ext4_group_first_block_no(sb, group); - if (my_rsv->_rsv_start >= group_first_block) - start = my_rsv->_rsv_start - group_first_block; - else - /* reservation window cross group boundary */ - start = 0; - end = my_rsv->_rsv_end - group_first_block + 1; - if (end > EXT4_BLOCKS_PER_GROUP(sb)) - /* reservation window crosses group boundary */ - end = EXT4_BLOCKS_PER_GROUP(sb); - if ((start <= grp_goal) && (grp_goal < end)) - start = grp_goal; - else - grp_goal = -1; - } else { - if (grp_goal > 0) - start = grp_goal; - else - start = 0; - end = EXT4_BLOCKS_PER_GROUP(sb); - } - - BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); - -repeat: - if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { - grp_goal = find_next_usable_block(start, bitmap_bh, end); - if (grp_goal < 0) - goto fail_access; - if (!my_rsv) { - int i; - - for (i = 0; i < 7 && grp_goal > start && - ext4_test_allocatable(grp_goal - 1, - bitmap_bh); - i++, grp_goal--) - ; - } - } - start = grp_goal; - - if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), - grp_goal, bitmap_bh)) { - /* - * The block was allocated by another thread, or it was - * allocated and then freed by another thread - */ - start++; - grp_goal++; - if (start >= end) - goto fail_access; - goto repeat; - } - num++; - grp_goal++; - while (num < *count && grp_goal < end - && ext4_test_allocatable(grp_goal, bitmap_bh) - && claim_block(sb_bgl_lock(EXT4_SB(sb), group), - grp_goal, bitmap_bh)) { - num++; - grp_goal++; - } - *count = num; - return grp_goal - num; -fail_access: - *count = num; - return -1; -} - -/** - * find_next_reservable_window(): - * find a reservable space within the given range. - * It does not allocate the reservation window for now: - * alloc_new_reservation() will do the work later. - * - * @search_head: the head of the searching list; - * This is not necessarily the list head of the whole filesystem - * - * We have both head and start_block to assist the search - * for the reservable space. The list starts from head, - * but we will shift to the place where start_block is, - * then start from there, when looking for a reservable space. - * - * @size: the target new reservation window size - * - * @group_first_block: the first block we consider to start - * the real search from - * - * @last_block: - * the maximum block number that our goal reservable space - * could start from. This is normally the last block in this - * group. The search will end when we found the start of next - * possible reservable space is out of this boundary. - * This could handle the cross boundary reservation window - * request. - * - * basically we search from the given range, rather than the whole - * reservation double linked list, (start_block, last_block) - * to find a free region that is of my size and has not - * been reserved. - * - */ -static int find_next_reservable_window( - struct ext4_reserve_window_node *search_head, - struct ext4_reserve_window_node *my_rsv, - struct super_block * sb, - ext4_fsblk_t start_block, - ext4_fsblk_t last_block) -{ - struct rb_node *next; - struct ext4_reserve_window_node *rsv, *prev; - ext4_fsblk_t cur; - int size = my_rsv->rsv_goal_size; - - /* TODO: make the start of the reservation window byte-aligned */ - /* cur = *start_block & ~7;*/ - cur = start_block; - rsv = search_head; - if (!rsv) - return -1; - - while (1) { - if (cur <= rsv->rsv_end) - cur = rsv->rsv_end + 1; - - /* TODO? - * in the case we could not find a reservable space - * that is what is expected, during the re-search, we could - * remember what's the largest reservable space we could have - * and return that one. - * - * For now it will fail if we could not find the reservable - * space with expected-size (or more)... - */ - if (cur > last_block) - return -1; /* fail */ - - prev = rsv; - next = rb_next(&rsv->rsv_node); - rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); + if (!capable(CAP_SYS_RESOURCE) && + sbi->s_resuid != current->fsuid && + (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) + root_blocks = ext4_r_blocks_count(sbi->s_es); - /* - * Reached the last reservation, we can just append to the - * previous one. - */ - if (!next) - break; - - if (cur + size <= rsv->rsv_start) { - /* - * Found a reserveable space big enough. We could - * have a reservation across the group boundary here - */ - break; + if (free_blocks - (nblocks + root_blocks + dirty_blocks) < + EXT4_FREEBLOCKS_WATERMARK) { + free_blocks = percpu_counter_sum(fbc); + dirty_blocks = percpu_counter_sum(dbc); + if (dirty_blocks < 0) { + printk(KERN_CRIT "Dirty block accounting " + "went wrong %lld\n", + dirty_blocks); } } - /* - * we come here either : - * when we reach the end of the whole list, - * and there is empty reservable space after last entry in the list. - * append it to the end of the list. - * - * or we found one reservable space in the middle of the list, - * return the reservation window that we could append to. - * succeed. + /* Check whether we have space after + * accounting for current dirty blocks */ + if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) + /* we don't have free space */ + return -ENOSPC; - if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) - rsv_window_remove(sb, my_rsv); - - /* - * Let's book the whole avaliable window for now. We will check the - * disk bitmap later and then, if there are free blocks then we adjust - * the window size if it's larger than requested. - * Otherwise, we will remove this node from the tree next time - * call find_next_reservable_window. - */ - my_rsv->rsv_start = cur; - my_rsv->rsv_end = cur + size - 1; - my_rsv->rsv_alloc_hit = 0; - - if (prev != my_rsv) - ext4_rsv_window_add(sb, my_rsv); - + /* Add the blocks to nblocks */ + percpu_counter_add(dbc, nblocks); return 0; } /** - * alloc_new_reservation()--allocate a new reservation window - * - * To make a new reservation, we search part of the filesystem - * reservation list (the list that inside the group). We try to - * allocate a new reservation window near the allocation goal, - * or the beginning of the group, if there is no goal. - * - * We first find a reservable space after the goal, then from - * there, we check the bitmap for the first free block after - * it. If there is no free block until the end of group, then the - * whole group is full, we failed. Otherwise, check if the free - * block is inside the expected reservable space, if so, we - * succeed. - * If the first free block is outside the reservable space, then - * start from the first free block, we search for next available - * space, and go on. - * - * on succeed, a new reservation will be found and inserted into the list - * It contains at least one free block, and it does not overlap with other - * reservation windows. - * - * failed: we failed to find a reservation window in this group - * - * @rsv: the reservation - * - * @grp_goal: The goal (group-relative). It is where the search for a - * free reservable space should start from. - * if we have a grp_goal(grp_goal >0 ), then start from there, - * no grp_goal(grp_goal = -1), we start from the first block - * of the group. - * - * @sb: the super block - * @group: the group we are trying to allocate in - * @bitmap_bh: the block group block bitmap - * - */ -static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, - ext4_grpblk_t grp_goal, struct super_block *sb, - ext4_group_t group, struct buffer_head *bitmap_bh) -{ - struct ext4_reserve_window_node *search_head; - ext4_fsblk_t group_first_block, group_end_block, start_block; - ext4_grpblk_t first_free_block; - struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; - unsigned long size; - int ret; - spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; - - group_first_block = ext4_group_first_block_no(sb, group); - group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - if (grp_goal < 0) - start_block = group_first_block; - else - start_block = grp_goal + group_first_block; - - size = my_rsv->rsv_goal_size; - - if (!rsv_is_empty(&my_rsv->rsv_window)) { - /* - * if the old reservation is cross group boundary - * and if the goal is inside the old reservation window, - * we will come here when we just failed to allocate from - * the first part of the window. We still have another part - * that belongs to the next group. In this case, there is no - * point to discard our window and try to allocate a new one - * in this group(which will fail). we should - * keep the reservation window, just simply move on. - * - * Maybe we could shift the start block of the reservation - * window to the first block of next group. - */ - - if ((my_rsv->rsv_start <= group_end_block) && - (my_rsv->rsv_end > group_end_block) && - (start_block >= my_rsv->rsv_start)) - return -1; - - if ((my_rsv->rsv_alloc_hit > - (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { - /* - * if the previously allocation hit ratio is - * greater than 1/2, then we double the size of - * the reservation window the next time, - * otherwise we keep the same size window - */ - size = size * 2; - if (size > EXT4_MAX_RESERVE_BLOCKS) - size = EXT4_MAX_RESERVE_BLOCKS; - my_rsv->rsv_goal_size= size; - } - } - - spin_lock(rsv_lock); - /* - * shift the search start to the window near the goal block - */ - search_head = search_reserve_window(fs_rsv_root, start_block); - - /* - * find_next_reservable_window() simply finds a reservable window - * inside the given range(start_block, group_end_block). - * - * To make sure the reservation window has a free bit inside it, we - * need to check the bitmap after we found a reservable window. - */ -retry: - ret = find_next_reservable_window(search_head, my_rsv, sb, - start_block, group_end_block); - - if (ret == -1) { - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - spin_unlock(rsv_lock); - return -1; - } - - /* - * On success, find_next_reservable_window() returns the - * reservation window where there is a reservable space after it. - * Before we reserve this reservable space, we need - * to make sure there is at least a free block inside this region. - * - * searching the first free bit on the block bitmap and copy of - * last committed bitmap alternatively, until we found a allocatable - * block. Search start from the start block of the reservable space - * we just found. - */ - spin_unlock(rsv_lock); - first_free_block = bitmap_search_next_usable_block( - my_rsv->rsv_start - group_first_block, - bitmap_bh, group_end_block - group_first_block + 1); - - if (first_free_block < 0) { - /* - * no free block left on the bitmap, no point - * to reserve the space. return failed. - */ - spin_lock(rsv_lock); - if (!rsv_is_empty(&my_rsv->rsv_window)) - rsv_window_remove(sb, my_rsv); - spin_unlock(rsv_lock); - return -1; /* failed */ - } - - start_block = first_free_block + group_first_block; - /* - * check if the first free block is within the - * free space we just reserved - */ - if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) - return 0; /* success */ - /* - * if the first free bit we found is out of the reservable space - * continue search for next reservable space, - * start from where the free block is, - * we also shift the list head to where we stopped last time - */ - search_head = my_rsv; - spin_lock(rsv_lock); - goto retry; -} - -/** - * try_to_extend_reservation() - * @my_rsv: given reservation window - * @sb: super block - * @size: the delta to extend - * - * Attempt to expand the reservation window large enough to have - * required number of free blocks - * - * Since ext4_try_to_allocate() will always allocate blocks within - * the reservation window range, if the window size is too small, - * multiple blocks allocation has to stop at the end of the reservation - * window. To make this more efficient, given the total number of - * blocks needed and the current size of the window, we try to - * expand the reservation window size if necessary on a best-effort - * basis before ext4_new_blocks() tries to allocate blocks, - */ -static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, - struct super_block *sb, int size) -{ - struct ext4_reserve_window_node *next_rsv; - struct rb_node *next; - spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; - - if (!spin_trylock(rsv_lock)) - return; - - next = rb_next(&my_rsv->rsv_node); - - if (!next) - my_rsv->rsv_end += size; - else { - next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); - - if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) - my_rsv->rsv_end += size; - else - my_rsv->rsv_end = next_rsv->rsv_start - 1; - } - spin_unlock(rsv_lock); -} - -/** - * ext4_try_to_allocate_with_rsv() - * @sb: superblock - * @handle: handle to this transaction - * @group: given allocation block group - * @bitmap_bh: bufferhead holds the block bitmap - * @grp_goal: given target block within the group - * @count: target number of blocks to allocate - * @my_rsv: reservation window - * @errp: pointer to store the error code - * - * This is the main function used to allocate a new block and its reservation - * window. - * - * Each time when a new block allocation is need, first try to allocate from - * its own reservation. If it does not have a reservation window, instead of - * looking for a free bit on bitmap first, then look up the reservation list to - * see if it is inside somebody else's reservation window, we try to allocate a - * reservation window for it starting from the goal first. Then do the block - * allocation within the reservation window. - * - * This will avoid keeping on searching the reservation list again and - * again when somebody is looking for a free block (without - * reservation), and there are lots of free blocks, but they are all - * being reserved. - * - * We use a red-black tree for the per-filesystem reservation list. - * - */ -static ext4_grpblk_t -ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, - ext4_group_t group, struct buffer_head *bitmap_bh, - ext4_grpblk_t grp_goal, - struct ext4_reserve_window_node * my_rsv, - unsigned long *count, int *errp) -{ - ext4_fsblk_t group_first_block, group_last_block; - ext4_grpblk_t ret = 0; - int fatal; - unsigned long num = *count; - - *errp = 0; - - /* - * Make sure we use undo access for the bitmap, because it is critical - * that we do the frozen_data COW on bitmap buffers in all cases even - * if the buffer is in BJ_Forget state in the committing transaction. - */ - BUFFER_TRACE(bitmap_bh, "get undo access for new block"); - fatal = ext4_journal_get_undo_access(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; - } - - /* - * we don't deal with reservation when - * filesystem is mounted without reservation - * or the file is not a regular file - * or last attempt to allocate a block with reservation turned on failed - */ - if (my_rsv == NULL ) { - ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, - grp_goal, count, NULL); - goto out; - } - /* - * grp_goal is a group relative block number (if there is a goal) - * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) - * first block is a filesystem wide block number - * first block is the block number of the first block in this group - */ - group_first_block = ext4_group_first_block_no(sb, group); - group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); - - /* - * Basically we will allocate a new block from inode's reservation - * window. - * - * We need to allocate a new reservation window, if: - * a) inode does not have a reservation window; or - * b) last attempt to allocate a block from existing reservation - * failed; or - * c) we come here with a goal and with a reservation window - * - * We do not need to allocate a new reservation window if we come here - * at the beginning with a goal and the goal is inside the window, or - * we don't have a goal but already have a reservation window. - * then we could go to allocate from the reservation window directly. - */ - while (1) { - if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || - !goal_in_my_reservation(&my_rsv->rsv_window, - grp_goal, group, sb)) { - if (my_rsv->rsv_goal_size < *count) - my_rsv->rsv_goal_size = *count; - ret = alloc_new_reservation(my_rsv, grp_goal, sb, - group, bitmap_bh); - if (ret < 0) - break; /* failed */ - - if (!goal_in_my_reservation(&my_rsv->rsv_window, - grp_goal, group, sb)) - grp_goal = -1; - } else if (grp_goal >= 0) { - int curr = my_rsv->rsv_end - - (grp_goal + group_first_block) + 1; - - if (curr < *count) - try_to_extend_reservation(my_rsv, sb, - *count - curr); - } - - if ((my_rsv->rsv_start > group_last_block) || - (my_rsv->rsv_end < group_first_block)) { - rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); - BUG(); - } - ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, - grp_goal, &num, &my_rsv->rsv_window); - if (ret >= 0) { - my_rsv->rsv_alloc_hit += num; - *count = num; - break; /* succeed */ - } - num = *count; - } -out: - if (ret >= 0) { - BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " - "bitmap block"); - fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); - if (fatal) { - *errp = fatal; - return -1; - } - return ret; - } - - BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); - ext4_journal_release_buffer(handle, bitmap_bh); - return ret; -} - -/** * ext4_has_free_blocks() * @sbi: in-core super block structure. * @nblocks: number of neeed blocks @@ -1610,29 +629,34 @@ out: * On success, return nblocks */ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, - ext4_fsblk_t nblocks) + s64 nblocks) { - ext4_fsblk_t free_blocks; - ext4_fsblk_t root_blocks = 0; + s64 free_blocks, dirty_blocks; + s64 root_blocks = 0; + struct percpu_counter *fbc = &sbi->s_freeblocks_counter; + struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; - free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + free_blocks = percpu_counter_read_positive(fbc); + dirty_blocks = percpu_counter_read_positive(dbc); if (!capable(CAP_SYS_RESOURCE) && sbi->s_resuid != current->fsuid && (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) root_blocks = ext4_r_blocks_count(sbi->s_es); -#ifdef CONFIG_SMP - if (free_blocks - root_blocks < FBC_BATCH) - free_blocks = - percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); -#endif - if (free_blocks <= root_blocks) + + if (free_blocks - (nblocks + root_blocks + dirty_blocks) < + EXT4_FREEBLOCKS_WATERMARK) { + free_blocks = percpu_counter_sum(fbc); + dirty_blocks = percpu_counter_sum(dbc); + } + if (free_blocks <= (root_blocks + dirty_blocks)) /* we don't have free space */ return 0; - if (free_blocks - root_blocks < nblocks) - return free_blocks - root_blocks; + + if (free_blocks - (root_blocks + dirty_blocks) < nblocks) + return free_blocks - (root_blocks + dirty_blocks); return nblocks; - } +} /** @@ -1657,303 +681,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); } -/** - * ext4_old_new_blocks() -- core block bitmap based block allocation function - * - * @handle: handle to this transaction - * @inode: file inode - * @goal: given target block(filesystem wide) - * @count: target number of blocks to allocate - * @errp: error code - * - * ext4_old_new_blocks uses a goal block to assist allocation and look up - * the block bitmap directly to do block allocation. It tries to - * allocate block(s) from the block group contains the goal block first. If - * that fails, it will try to allocate block(s) from other block groups - * without any specific goal block. - * - * This function is called when -o nomballoc mount option is enabled - * - */ -ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp) -{ - struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gdp_bh; - ext4_group_t group_no; - ext4_group_t goal_group; - ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ - ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ - ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ - ext4_group_t bgi; /* blockgroup iteration index */ - int fatal = 0, err; - int performed_allocation = 0; - ext4_grpblk_t free_blocks; /* number of free blocks in a group */ - struct super_block *sb; - struct ext4_group_desc *gdp; - struct ext4_super_block *es; - struct ext4_sb_info *sbi; - struct ext4_reserve_window_node *my_rsv = NULL; - struct ext4_block_alloc_info *block_i; - unsigned short windowsz = 0; - ext4_group_t ngroups; - unsigned long num = *count; - - sb = inode->i_sb; - if (!sb) { - *errp = -ENODEV; - printk("ext4_new_block: nonexistent device"); - return 0; - } - - sbi = EXT4_SB(sb); - if (!EXT4_I(inode)->i_delalloc_reserved_flag) { - /* - * With delalloc we already reserved the blocks - */ - *count = ext4_has_free_blocks(sbi, *count); - } - if (*count == 0) { - *errp = -ENOSPC; - return 0; /*return with ENOSPC error */ - } - num = *count; - - /* - * Check quota for allocation of this block. - */ - if (DQUOT_ALLOC_BLOCK(inode, num)) { - *errp = -EDQUOT; - return 0; - } - - sbi = EXT4_SB(sb); - es = EXT4_SB(sb)->s_es; - ext4_debug("goal=%llu.\n", goal); - /* - * Allocate a block from reservation only when - * filesystem is mounted with reservation(default,-o reservation), and - * it's a regular file, and - * the desired window size is greater than 0 (One could use ioctl - * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off - * reservation on that particular file) - */ - block_i = EXT4_I(inode)->i_block_alloc_info; - if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) - my_rsv = &block_i->rsv_window_node; - - /* - * First, test whether the goal block is free. - */ - if (goal < le32_to_cpu(es->s_first_data_block) || - goal >= ext4_blocks_count(es)) - goal = le32_to_cpu(es->s_first_data_block); - ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); - goal_group = group_no; -retry_alloc: - gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) - goto io_error; - - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - /* - * if there is not enough free blocks to make a new resevation - * turn off reservation for this allocation - */ - if (my_rsv && (free_blocks < windowsz) - && (rsv_is_empty(&my_rsv->rsv_window))) - my_rsv = NULL; - - if (free_blocks > 0) { - bitmap_bh = ext4_read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, - group_no, bitmap_bh, grp_target_blk, - my_rsv, &num, &fatal); - if (fatal) - goto out; - if (grp_alloc_blk >= 0) - goto allocated; - } - - ngroups = EXT4_SB(sb)->s_groups_count; - smp_rmb(); - - /* - * Now search the rest of the groups. We assume that - * group_no and gdp correctly point to the last group visited. - */ - for (bgi = 0; bgi < ngroups; bgi++) { - group_no++; - if (group_no >= ngroups) - group_no = 0; - gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); - if (!gdp) - goto io_error; - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); - /* - * skip this group if the number of - * free blocks is less than half of the reservation - * window size. - */ - if (free_blocks <= (windowsz/2)) - continue; - - brelse(bitmap_bh); - bitmap_bh = ext4_read_block_bitmap(sb, group_no); - if (!bitmap_bh) - goto io_error; - /* - * try to allocate block(s) from this group, without a goal(-1). - */ - grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, - group_no, bitmap_bh, -1, my_rsv, - &num, &fatal); - if (fatal) - goto out; - if (grp_alloc_blk >= 0) - goto allocated; - } - /* - * We may end up a bogus ealier ENOSPC error due to - * filesystem is "full" of reservations, but - * there maybe indeed free blocks avaliable on disk - * In this case, we just forget about the reservations - * just do block allocation as without reservations. - */ - if (my_rsv) { - my_rsv = NULL; - windowsz = 0; - group_no = goal_group; - goto retry_alloc; - } - /* No space left on the device */ - *errp = -ENOSPC; - goto out; - -allocated: - - ext4_debug("using block group %lu(%d)\n", - group_no, gdp->bg_free_blocks_count); - - BUFFER_TRACE(gdp_bh, "get_write_access"); - fatal = ext4_journal_get_write_access(handle, gdp_bh); - if (fatal) - goto out; - - ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); - - if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || - in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || - in_range(ret_block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group) || - in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - ext4_error(sb, "ext4_new_block", - "Allocating block in system zone - " - "blocks from %llu, length %lu", - ret_block, num); - /* - * claim_block marked the blocks we allocated - * as in use. So we may want to selectively - * mark some of the blocks as free - */ - goto retry_alloc; - } - - performed_allocation = 1; - -#ifdef CONFIG_JBD2_DEBUG - { - struct buffer_head *debug_bh; - - /* Record bitmap buffer state in the newly allocated block */ - debug_bh = sb_find_get_block(sb, ret_block); - if (debug_bh) { - BUFFER_TRACE(debug_bh, "state when allocated"); - BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); - brelse(debug_bh); - } - } - jbd_lock_bh_state(bitmap_bh); - spin_lock(sb_bgl_lock(sbi, group_no)); - if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { - int i; - - for (i = 0; i < num; i++) { - if (ext4_test_bit(grp_alloc_blk+i, - bh2jh(bitmap_bh)->b_committed_data)) { - printk("%s: block was unexpectedly set in " - "b_committed_data\n", __func__); - } - } - } - ext4_debug("found bit %d\n", grp_alloc_blk); - spin_unlock(sb_bgl_lock(sbi, group_no)); - jbd_unlock_bh_state(bitmap_bh); -#endif - - if (ret_block + num - 1 >= ext4_blocks_count(es)) { - ext4_error(sb, "ext4_new_block", - "block(%llu) >= blocks count(%llu) - " - "block_group = %lu, es == %p ", ret_block, - ext4_blocks_count(es), group_no, es); - goto out; - } - - /* - * It is up to the caller to add the new buffer to a journal - * list of some description. We don't know in advance whether - * the caller wants to use it as metadata or data. - */ - spin_lock(sb_bgl_lock(sbi, group_no)); - if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) - gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); - le16_add_cpu(&gdp->bg_free_blocks_count, -num); - gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); - spin_unlock(sb_bgl_lock(sbi, group_no)); - if (!EXT4_I(inode)->i_delalloc_reserved_flag) - percpu_counter_sub(&sbi->s_freeblocks_counter, num); - - if (sbi->s_log_groups_per_flex) { - ext4_group_t flex_group = ext4_flex_group(sbi, group_no); - spin_lock(sb_bgl_lock(sbi, flex_group)); - sbi->s_flex_groups[flex_group].free_blocks -= num; - spin_unlock(sb_bgl_lock(sbi, flex_group)); - } - - BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); - err = ext4_journal_dirty_metadata(handle, gdp_bh); - if (!fatal) - fatal = err; - - sb->s_dirt = 1; - if (fatal) - goto out; - - *errp = 0; - brelse(bitmap_bh); - DQUOT_FREE_BLOCK(inode, *count-num); - *count = num; - return ret_block; - -io_error: - *errp = -EIO; -out: - if (fatal) { - *errp = fatal; - ext4_std_error(sb, fatal); - } - /* - * Undo the block allocation - */ - if (!performed_allocation) - DQUOT_FREE_BLOCK(inode, *count); - brelse(bitmap_bh); - return 0; -} - #define EXT4_META_BLOCK 0x1 static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, @@ -1963,10 +690,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, struct ext4_allocation_request ar; ext4_fsblk_t ret; - if (!test_opt(inode->i_sb, MBALLOC)) { - return ext4_old_new_blocks(handle, inode, goal, count, errp); - } - memset(&ar, 0, sizeof(ar)); /* Fill with neighbour allocated blocks */ @@ -2008,7 +731,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, /* * Account for the allocated meta blocks */ - if (!(*errp)) { + if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { spin_lock(&EXT4_I(inode)->i_block_reservation_lock); EXT4_I(inode)->i_allocated_meta_blocks += *count; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); @@ -2093,10 +816,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) bitmap_count += x; } brelse(bitmap_bh); - printk("ext4_count_free_blocks: stored = %llu" - ", computed = %llu, %llu\n", - ext4_free_blocks_count(es), - desc_count, bitmap_count); + printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" + ", computed = %llu, %llu\n", ext4_free_blocks_count(es), + desc_count, bitmap_count); return bitmap_count; #else desc_count = 0; @@ -2183,8 +905,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || metagroup < first_meta_bg) - return ext4_bg_num_gdb_nometa(sb,group); + return ext4_bg_num_gdb_nometa(sb, group); return ext4_bg_num_gdb_meta(sb,group); } + diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index d37ea675045..0a7a6663c19 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c @@ -15,17 +15,17 @@ static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; -unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) +unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) { unsigned int i; unsigned long sum = 0; if (!map) - return (0); + return 0; for (i = 0; i < numchars; i++) sum += nibblemap[map->b_data[i] & 0xf] + nibblemap[(map->b_data[i] >> 4) & 0xf]; - return (sum); + return sum; } #endif /* EXT4FS_DEBUG */ diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ec8e33b4521..3ca6a2b7632 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { }; static int ext4_readdir(struct file *, void *, filldir_t); -static int ext4_dx_readdir(struct file * filp, - void * dirent, filldir_t filldir); -static int ext4_release_dir (struct inode * inode, - struct file * filp); +static int ext4_dx_readdir(struct file *filp, + void *dirent, filldir_t filldir); +static int ext4_release_dir(struct inode *inode, + struct file *filp); const struct file_operations ext4_dir_operations = { .llseek = generic_file_llseek, @@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) } -int ext4_check_dir_entry (const char * function, struct inode * dir, - struct ext4_dir_entry_2 * de, - struct buffer_head * bh, - unsigned long offset) +int ext4_check_dir_entry(const char *function, struct inode *dir, + struct ext4_dir_entry_2 *de, + struct buffer_head *bh, + unsigned long offset) { - const char * error_msg = NULL; + const char *error_msg = NULL; const int rlen = ext4_rec_len_from_disk(de->rec_len); if (rlen < EXT4_DIR_REC_LEN(1)) @@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, error_msg = "inode out of bounds"; if (error_msg != NULL) - ext4_error (dir->i_sb, function, + ext4_error(dir->i_sb, function, "bad entry in directory #%lu: %s - " "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", dir->i_ino, error_msg, offset, @@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, return error_msg == NULL ? 1 : 0; } -static int ext4_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int ext4_readdir(struct file *filp, + void *dirent, filldir_t filldir) { int error = 0; unsigned long offset; @@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp, int err; struct inode *inode = filp->f_path.dentry->d_inode; int ret = 0; + int dir_has_error = 0; sb = inode->i_sb; @@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp, * of recovering data when there's a bad sector */ if (!bh) { - ext4_error (sb, "ext4_readdir", - "directory #%lu contains a hole at offset %lu", - inode->i_ino, (unsigned long)filp->f_pos); + if (!dir_has_error) { + ext4_error(sb, __func__, "directory #%lu " + "contains a hole at offset %Lu", + inode->i_ino, + (unsigned long long) filp->f_pos); + dir_has_error = 1; + } /* corrupt size? Maybe no more blocks to read */ if (filp->f_pos > inode->i_blocks << 9) break; @@ -187,14 +192,14 @@ revalidate: while (!error && filp->f_pos < inode->i_size && offset < sb->s_blocksize) { de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); - if (!ext4_check_dir_entry ("ext4_readdir", inode, de, - bh, offset)) { + if (!ext4_check_dir_entry("ext4_readdir", inode, de, + bh, offset)) { /* * On error, skip the f_pos to the next block */ filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; - brelse (bh); + brelse(bh); ret = stored; goto out; } @@ -218,12 +223,12 @@ revalidate: break; if (version != filp->f_version) goto revalidate; - stored ++; + stored++; } filp->f_pos += ext4_rec_len_from_disk(de->rec_len); } offset = 0; - brelse (bh); + brelse(bh); } out: return ret; @@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root) parent = rb_parent(n); fname = rb_entry(n, struct fname, rb_hash); while (fname) { - struct fname * old = fname; + struct fname *old = fname; fname = fname->next; - kfree (old); + kfree(old); } if (!parent) root->rb_node = NULL; @@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, struct ext4_dir_entry_2 *dirent) { struct rb_node **p, *parent = NULL; - struct fname * fname, *new_fn; + struct fname *fname, *new_fn; struct dir_private_info *info; int len; @@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, * for all entres on the fname linked list. (Normally there is only * one entry on the linked list, unless there are 62 bit hash collisions.) */ -static int call_filldir(struct file * filp, void * dirent, +static int call_filldir(struct file *filp, void *dirent, filldir_t filldir, struct fname *fname) { struct dir_private_info *info = filp->private_data; loff_t curr_pos; struct inode *inode = filp->f_path.dentry->d_inode; - struct super_block * sb; + struct super_block *sb; int error; sb = inode->i_sb; if (!fname) { - printk("call_filldir: called with null fname?!?\n"); + printk(KERN_ERR "ext4: call_filldir: called with " + "null fname?!?\n"); return 0; } curr_pos = hash2pos(fname->hash, fname->minor_hash); @@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent, return 0; } -static int ext4_dx_readdir(struct file * filp, - void * dirent, filldir_t filldir) +static int ext4_dx_readdir(struct file *filp, + void *dirent, filldir_t filldir) { struct dir_private_info *info = filp->private_data; struct inode *inode = filp->f_path.dentry->d_inode; @@ -511,7 +517,7 @@ finished: return 0; } -static int ext4_release_dir (struct inode * inode, struct file * filp) +static int ext4_release_dir(struct inode *inode, struct file *filp) { if (filp->private_data) ext4_htree_free_dir_info(filp->private_data); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 295003241d3..f46a513a515 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -44,9 +44,9 @@ #ifdef EXT4FS_DEBUG #define ext4_debug(f, a...) \ do { \ - printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ + printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ __FILE__, __LINE__, __func__); \ - printk (KERN_DEBUG f, ## a); \ + printk(KERN_DEBUG f, ## a); \ } while (0) #else #define ext4_debug(f, a...) do {} while (0) @@ -128,7 +128,7 @@ struct ext4_allocation_request { #else # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) #endif -#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) +#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) #ifdef __KERNEL__ # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) #else @@ -245,7 +245,7 @@ struct flex_groups { #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ +#define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ /* * Inode dynamic state flags @@ -291,8 +291,6 @@ struct ext4_new_group_data { #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS #define EXT4_IOC_GETVERSION _IOR('f', 3, long) #define EXT4_IOC_SETVERSION _IOW('f', 4, long) -#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) -#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION #ifdef CONFIG_JBD2_DEBUG @@ -300,7 +298,10 @@ struct ext4_new_group_data { #endif #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) -#define EXT4_IOC_MIGRATE _IO('f', 7) +#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) +#define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) +#define EXT4_IOC_MIGRATE _IO('f', 9) + /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ /* * ioctl commands in 32 bit emulation @@ -538,7 +539,6 @@ do { \ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ -#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H @@ -667,7 +667,7 @@ struct ext4_super_block { }; #ifdef __KERNEL__ -static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) +static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) { return sb->s_fs_info; } @@ -725,11 +725,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) */ #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) + (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) + (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ - ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) + (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ @@ -789,6 +789,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) #define EXT4_DEF_RESUID 0 #define EXT4_DEF_RESGID 0 +#define EXT4_DEF_INODE_READAHEAD_BLKS 32 + /* * Default mount options */ @@ -954,6 +956,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, unsigned long *blockgrpp, ext4_grpblk_t *offsetp); +extern struct proc_dir_entry *ext4_proc_root; + +#ifdef CONFIG_PROC_FS +extern const struct file_operations ext4_ui_proc_fops; + +#define EXT4_PROC_HANDLER(name, var) \ +do { \ + proc = proc_create_data(name, mode, sbi->s_proc, \ + &ext4_ui_proc_fops, &sbi->s_##var); \ + if (proc == NULL) { \ + printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ + goto err_out; \ + } \ +} while (0) +#else +#define EXT4_PROC_HANDLER(name, var) +#endif + /* * Function prototypes */ @@ -981,23 +1001,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, ext4_fsblk_t goal, unsigned long *count, int *errp); -extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, - ext4_fsblk_t goal, unsigned long *count, int *errp); +extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, - ext4_fsblk_t nblocks); -extern void ext4_free_blocks (handle_t *handle, struct inode *inode, + s64 nblocks); +extern void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int metadata); -extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, - ext4_fsblk_t block, unsigned long count, +extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, + ext4_fsblk_t block, unsigned long count, unsigned long *pdquot_freed_blocks); -extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); -extern void ext4_check_blocks_bitmap (struct super_block *); +extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); +extern void ext4_check_blocks_bitmap(struct super_block *); extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, ext4_group_t block_group, struct buffer_head ** bh); extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); -extern void ext4_init_block_alloc_info(struct inode *); -extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); /* dir.c */ extern int ext4_check_dir_entry(const char *, struct inode *, @@ -1009,20 +1026,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, extern void ext4_htree_free_dir_info(struct dir_private_info *p); /* fsync.c */ -extern int ext4_sync_file (struct file *, struct dentry *, int); +extern int ext4_sync_file(struct file *, struct dentry *, int); /* hash.c */ extern int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo); /* ialloc.c */ -extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); -extern void ext4_free_inode (handle_t *, struct inode *); -extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); -extern unsigned long ext4_count_free_inodes (struct super_block *); -extern unsigned long ext4_count_dirs (struct super_block *); -extern void ext4_check_inodes_bitmap (struct super_block *); -extern unsigned long ext4_count_free (struct buffer_head *, unsigned); +extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); +extern void ext4_free_inode(handle_t *, struct inode *); +extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); +extern unsigned long ext4_count_free_inodes(struct super_block *); +extern unsigned long ext4_count_dirs(struct super_block *); +extern void ext4_check_inodes_bitmap(struct super_block *); +extern unsigned long ext4_count_free(struct buffer_head *, unsigned); /* mballoc.c */ extern long ext4_mb_stats; @@ -1032,7 +1049,7 @@ extern int ext4_mb_release(struct super_block *); extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, struct ext4_allocation_request *, int *); extern int ext4_mb_reserve_blocks(struct super_block *, int); -extern void ext4_mb_discard_inode_preallocations(struct inode *); +extern void ext4_discard_preallocations(struct inode *); extern int __init init_ext4_mballoc(void); extern void exit_ext4_mballoc(void); extern void ext4_mb_free_blocks(handle_t *, struct inode *, @@ -1050,24 +1067,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int, int *); struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int, int *); +int ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create); int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned long maxblocks, struct buffer_head *bh_result, int create, int extend_disksize); extern struct inode *ext4_iget(struct super_block *, unsigned long); -extern int ext4_write_inode (struct inode *, int); -extern int ext4_setattr (struct dentry *, struct iattr *); +extern int ext4_write_inode(struct inode *, int); +extern int ext4_setattr(struct dentry *, struct iattr *); extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -extern void ext4_delete_inode (struct inode *); -extern int ext4_sync_inode (handle_t *, struct inode *); -extern void ext4_discard_reservation (struct inode *); +extern void ext4_delete_inode(struct inode *); +extern int ext4_sync_inode(handle_t *, struct inode *); extern void ext4_dirty_inode(struct inode *); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); -extern void ext4_truncate (struct inode *); +extern void ext4_truncate(struct inode *); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); @@ -1080,11 +1098,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); -extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); +extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); /* migrate.c */ -extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, - unsigned long); +extern int ext4_ext_migrate(struct inode *); /* namei.c */ extern int ext4_orphan_add(handle_t *, struct inode *); extern int ext4_orphan_del(handle_t *, struct inode *); @@ -1099,14 +1116,14 @@ extern int ext4_group_extend(struct super_block *sb, ext4_fsblk_t n_blocks_count); /* super.c */ -extern void ext4_error (struct super_block *, const char *, const char *, ...) +extern void ext4_error(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void __ext4_std_error (struct super_block *, const char *, int); -extern void ext4_abort (struct super_block *, const char *, const char *, ...) +extern void __ext4_std_error(struct super_block *, const char *, int); +extern void ext4_abort(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_warning (struct super_block *, const char *, const char *, ...) +extern void ext4_warning(struct super_block *, const char *, const char *, ...) __attribute__ ((format (printf, 3, 4))); -extern void ext4_update_dynamic_rev (struct super_block *sb); +extern void ext4_update_dynamic_rev(struct super_block *sb); extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, __u32 compat); extern int ext4_update_rocompat_feature(handle_t *handle, @@ -1179,7 +1196,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, - ext4_group_t group) + ext4_group_t group) { struct ext4_group_info ***grp_info; long indexv, indexh; @@ -1207,6 +1224,28 @@ do { \ __ext4_std_error((sb), __func__, (errno)); \ } while (0) +#ifdef CONFIG_SMP +/* Each CPU can accumulate FBC_BATCH blocks in their local + * counters. So we need to make sure we have free blocks more + * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. + */ +#define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) +#else +#define EXT4_FREEBLOCKS_WATERMARK 0 +#endif + +static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) +{ + /* + * XXX: replace with spinlock if seen contended -bzzz + */ + down_write(&EXT4_I(inode)->i_data_sem); + if (newsize > EXT4_I(inode)->i_disksize) + EXT4_I(inode)->i_disksize = newsize; + up_write(&EXT4_I(inode)->i_data_sem); + return ; +} + /* * Inodes and files operations */ diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index d33dc56d698..bec7ce59fc0 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -124,6 +124,19 @@ struct ext4_ext_path { #define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_EXTENT 2 +/* + * to be called by ext4_ext_walk_space() + * negative retcode - error + * positive retcode - signal for ext4_ext_walk_space(), see below + * callback must return valid extent (passed or newly created) + */ +typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, + struct ext4_ext_cache *, + struct ext4_extent *, void *); + +#define EXT_CONTINUE 0 +#define EXT_BREAK 1 +#define EXT_REPEAT 2 #define EXT_MAX_BLOCK 0xffffffff @@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_extent *); extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); +extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, + ext_prepare_callback, void *); extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, struct ext4_ext_path *); extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index ef7409f0e7e..5c124c0ac6d 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h @@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t; /* data type for block group number */ typedef unsigned long ext4_group_t; -struct ext4_reserve_window { - ext4_fsblk_t _rsv_start; /* First byte reserved */ - ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ -}; - -struct ext4_reserve_window_node { - struct rb_node rsv_node; - __u32 rsv_goal_size; - __u32 rsv_alloc_hit; - struct ext4_reserve_window rsv_window; -}; - -struct ext4_block_alloc_info { - /* information about reservation window */ - struct ext4_reserve_window_node rsv_window_node; - /* - * was i_next_alloc_block in ext4_inode_info - * is the logical (file-relative) number of the - * most-recently-allocated block in this file. - * We use this for detecting linearly ascending allocation requests. - */ - ext4_lblk_t last_alloc_logical_block; - /* - * Was i_next_alloc_goal in ext4_inode_info - * is the *physical* companion to i_next_alloc_block. - * it the physical block number of the block which was most-recentl - * allocated to this file. This give us the goal (target) for the next - * allocation when we detect linearly ascending requests. - */ - ext4_fsblk_t last_alloc_physical_block; -}; - #define rsv_start rsv_window._rsv_start #define rsv_end rsv_window._rsv_end @@ -97,11 +65,8 @@ struct ext4_inode_info { ext4_group_t i_block_group; __u32 i_state; /* Dynamic state flags for ext4 */ - /* block reservation info */ - struct ext4_block_alloc_info *i_block_alloc_info; - ext4_lblk_t i_dir_start_lookup; -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR /* * Extended attributes can be read independently of the main file * data. Taking i_mutex even when reading would cause contention @@ -111,7 +76,7 @@ struct ext4_inode_info { */ struct rw_semaphore xattr_sem; #endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL struct posix_acl *i_acl; struct posix_acl *i_default_acl; #endif diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6300226d553..6a0b40d4326 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h @@ -40,8 +40,8 @@ struct ext4_sb_info { unsigned long s_blocks_last; /* Last seen block count */ loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ - struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ - struct buffer_head ** s_group_desc; + struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ + struct buffer_head **s_group_desc; unsigned long s_mount_opt; ext4_fsblk_t s_sb_block; uid_t s_resuid; @@ -52,6 +52,7 @@ struct ext4_sb_info { int s_desc_per_block_bits; int s_inode_size; int s_first_ino; + unsigned int s_inode_readahead_blks; spinlock_t s_next_gen_lock; u32 s_next_generation; u32 s_hash_seed[4]; @@ -59,16 +60,17 @@ struct ext4_sb_info { struct percpu_counter s_freeblocks_counter; struct percpu_counter s_freeinodes_counter; struct percpu_counter s_dirs_counter; + struct percpu_counter s_dirtyblocks_counter; struct blockgroup_lock s_blockgroup_lock; + struct proc_dir_entry *s_proc; /* root of the per fs reservation window tree */ spinlock_t s_rsv_window_lock; struct rb_root s_rsv_window_root; - struct ext4_reserve_window_node s_rsv_window_head; /* Journaling */ - struct inode * s_journal_inode; - struct journal_s * s_journal; + struct inode *s_journal_inode; + struct journal_s *s_journal; struct list_head s_orphan; unsigned long s_commit_interval; struct block_device *journal_bdev; @@ -106,12 +108,12 @@ struct ext4_sb_info { /* tunables */ unsigned long s_stripe; - unsigned long s_mb_stream_request; - unsigned long s_mb_max_to_scan; - unsigned long s_mb_min_to_scan; - unsigned long s_mb_stats; - unsigned long s_mb_order2_reqs; - unsigned long s_mb_group_prealloc; + unsigned int s_mb_stream_request; + unsigned int s_mb_max_to_scan; + unsigned int s_mb_min_to_scan; + unsigned int s_mb_stats; + unsigned int s_mb_order2_reqs; + unsigned int s_mb_group_prealloc; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; unsigned long s_mb_last_start; @@ -121,7 +123,6 @@ struct ext4_sb_info { int s_mb_history_cur; int s_mb_history_max; int s_mb_history_num; - struct proc_dir_entry *s_mb_proc; spinlock_t s_mb_history_lock; int s_mb_history_filter; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b24d3c53f20..ea2ce3c0ae6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -40,6 +40,7 @@ #include <linux/slab.h> #include <linux/falloc.h> #include <asm/uaccess.h> +#include <linux/fiemap.h> #include "ext4_jbd2.h" #include "ext4_extents.h" @@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) ext_debug("\n"); } #else -#define ext4_ext_show_path(inode,path) -#define ext4_ext_show_leaf(inode,path) +#define ext4_ext_show_path(inode, path) +#define ext4_ext_show_leaf(inode, path) #endif void ext4_ext_drop_refs(struct ext4_ext_path *path) @@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode, for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { if (k != 0 && le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { - printk("k=%d, ix=0x%p, first=0x%p\n", k, - ix, EXT_FIRST_INDEX(eh)); - printk("%u <= %u\n", + printk(KERN_DEBUG "k=%d, ix=0x%p, " + "first=0x%p\n", k, + ix, EXT_FIRST_INDEX(eh)); + printk(KERN_DEBUG "%u <= %u\n", le32_to_cpu(ix->ei_block), le32_to_cpu(ix[-1].ei_block)); } @@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *newext) { - struct ext4_extent_header * eh; + struct ext4_extent_header *eh; struct ext4_extent *ex, *fex; struct ext4_extent *nearex; /* nearest extent */ struct ext4_ext_path *npath = NULL; @@ -1625,6 +1627,113 @@ cleanup: return err; } +int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, + ext4_lblk_t num, ext_prepare_callback func, + void *cbdata) +{ + struct ext4_ext_path *path = NULL; + struct ext4_ext_cache cbex; + struct ext4_extent *ex; + ext4_lblk_t next, start = 0, end = 0; + ext4_lblk_t last = block + num; + int depth, exists, err = 0; + + BUG_ON(func == NULL); + BUG_ON(inode == NULL); + + while (block < last && block != EXT_MAX_BLOCK) { + num = last - block; + /* find extent for this block */ + path = ext4_ext_find_extent(inode, block, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + path = NULL; + break; + } + + depth = ext_depth(inode); + BUG_ON(path[depth].p_hdr == NULL); + ex = path[depth].p_ext; + next = ext4_ext_next_allocated_block(path); + + exists = 0; + if (!ex) { + /* there is no extent yet, so try to allocate + * all requested space */ + start = block; + end = block + num; + } else if (le32_to_cpu(ex->ee_block) > block) { + /* need to allocate space before found extent */ + start = block; + end = le32_to_cpu(ex->ee_block); + if (block + num < end) + end = block + num; + } else if (block >= le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex)) { + /* need to allocate space after found extent */ + start = block; + end = block + num; + if (end >= next) + end = next; + } else if (block >= le32_to_cpu(ex->ee_block)) { + /* + * some part of requested space is covered + * by found extent + */ + start = block; + end = le32_to_cpu(ex->ee_block) + + ext4_ext_get_actual_len(ex); + if (block + num < end) + end = block + num; + exists = 1; + } else { + BUG(); + } + BUG_ON(end <= start); + + if (!exists) { + cbex.ec_block = start; + cbex.ec_len = end - start; + cbex.ec_start = 0; + cbex.ec_type = EXT4_EXT_CACHE_GAP; + } else { + cbex.ec_block = le32_to_cpu(ex->ee_block); + cbex.ec_len = ext4_ext_get_actual_len(ex); + cbex.ec_start = ext_pblock(ex); + cbex.ec_type = EXT4_EXT_CACHE_EXTENT; + } + + BUG_ON(cbex.ec_len == 0); + err = func(inode, path, &cbex, ex, cbdata); + ext4_ext_drop_refs(path); + + if (err < 0) + break; + + if (err == EXT_REPEAT) + continue; + else if (err == EXT_BREAK) { + err = 0; + break; + } + + if (ext_depth(inode) != depth) { + /* depth was changed. we have to realloc path */ + kfree(path); + path = NULL; + } + + block = cbex.ec_block + cbex.ec_len; + } + + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } + + return err; +} + static void ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, __u32 len, ext4_fsblk_t start, int type) @@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb) */ if (test_opt(sb, EXTENTS)) { - printk("EXT4-fs: file extents enabled"); + printk(KERN_INFO "EXT4-fs: file extents enabled"); #ifdef AGGRESSIVE_TEST printk(", aggressive tests"); #endif @@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, goto out2; } /* - * Okay, we need to do block allocation. Lazily initialize the block - * allocation info here if necessary. + * Okay, we need to do block allocation. */ - if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) - ext4_init_block_alloc_info(inode); /* find neighbour allocated blocks */ ar.lleft = iblock; @@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, /* free data blocks we just allocated */ /* not a good idea to call discard here directly, * but otherwise we'd need to call it every free() */ - ext4_mb_discard_inode_preallocations(inode); + ext4_discard_preallocations(inode); ext4_free_blocks(handle, inode, ext_pblock(&newex), ext4_ext_get_actual_len(&newex), 0); goto out2; @@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); ext4_ext_invalidate_cache(inode); - ext4_discard_reservation(inode); + ext4_discard_preallocations(inode); /* * TODO: optimization is possible here. @@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode, * Update only when preallocation was requested beyond * the file size. */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - new_size > i_size_read(inode)) { - i_size_write(inode, new_size); - EXT4_I(inode)->i_disksize = new_size; + if (!(mode & FALLOC_FL_KEEP_SIZE)) { + if (new_size > i_size_read(inode)) + i_size_write(inode, new_size); + if (new_size > EXT4_I(inode)->i_disksize) + ext4_update_i_disksize(inode, new_size); } } @@ -2972,3 +3079,143 @@ retry: mutex_unlock(&inode->i_mutex); return ret > 0 ? ret2 : ret; } + +/* + * Callback function called for each extent to gather FIEMAP information. + */ +int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, + struct ext4_ext_cache *newex, struct ext4_extent *ex, + void *data) +{ + struct fiemap_extent_info *fieinfo = data; + unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; + __u64 logical; + __u64 physical; + __u64 length; + __u32 flags = 0; + int error; + + logical = (__u64)newex->ec_block << blksize_bits; + + if (newex->ec_type == EXT4_EXT_CACHE_GAP) { + pgoff_t offset; + struct page *page; + struct buffer_head *bh = NULL; + + offset = logical >> PAGE_SHIFT; + page = find_get_page(inode->i_mapping, offset); + if (!page || !page_has_buffers(page)) + return EXT_CONTINUE; + + bh = page_buffers(page); + + if (!bh) + return EXT_CONTINUE; + + if (buffer_delay(bh)) { + flags |= FIEMAP_EXTENT_DELALLOC; + page_cache_release(page); + } else { + page_cache_release(page); + return EXT_CONTINUE; + } + } + + physical = (__u64)newex->ec_start << blksize_bits; + length = (__u64)newex->ec_len << blksize_bits; + + if (ex && ext4_ext_is_uninitialized(ex)) + flags |= FIEMAP_EXTENT_UNWRITTEN; + + /* + * If this extent reaches EXT_MAX_BLOCK, it must be last. + * + * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, + * this also indicates no more allocated blocks. + * + * XXX this might miss a single-block extent at EXT_MAX_BLOCK + */ + if (logical + length - 1 == EXT_MAX_BLOCK || + ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) + flags |= FIEMAP_EXTENT_LAST; + + error = fiemap_fill_next_extent(fieinfo, logical, physical, + length, flags); + if (error < 0) + return error; + if (error == 1) + return EXT_BREAK; + + return EXT_CONTINUE; +} + +/* fiemap flags we can handle specified here */ +#define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) + +int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) +{ + __u64 physical = 0; + __u64 length; + __u32 flags = FIEMAP_EXTENT_LAST; + int blockbits = inode->i_sb->s_blocksize_bits; + int error = 0; + + /* in-inode? */ + if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { + struct ext4_iloc iloc; + int offset; /* offset of xattr in inode */ + + error = ext4_get_inode_loc(inode, &iloc); + if (error) + return error; + physical = iloc.bh->b_blocknr << blockbits; + offset = EXT4_GOOD_OLD_INODE_SIZE + + EXT4_I(inode)->i_extra_isize; + physical += offset; + length = EXT4_SB(inode->i_sb)->s_inode_size - offset; + flags |= FIEMAP_EXTENT_DATA_INLINE; + } else { /* external block */ + physical = EXT4_I(inode)->i_file_acl << blockbits; + length = inode->i_sb->s_blocksize; + } + + if (physical) + error = fiemap_fill_next_extent(fieinfo, 0, physical, + length, flags); + return (error < 0 ? error : 0); +} + +int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + ext4_lblk_t start_blk; + ext4_lblk_t len_blks; + int error = 0; + + /* fallback to generic here if not in extents fmt */ + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return generic_block_fiemap(inode, fieinfo, start, len, + ext4_get_block); + + if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) + return -EBADR; + + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + error = ext4_xattr_fiemap(inode, fieinfo); + } else { + start_blk = start >> inode->i_sb->s_blocksize_bits; + len_blks = len >> inode->i_sb->s_blocksize_bits; + + /* + * Walk the extent tree gathering extent information. + * ext4_ext_fiemap_cb will push extents back to user. + */ + down_write(&EXT4_I(inode)->i_data_sem); + error = ext4_ext_walk_space(inode, start_blk, len_blks, + ext4_ext_fiemap_cb, fieinfo); + up_write(&EXT4_I(inode)->i_data_sem); + } + + return error; +} + diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 430eb7978db..6bd11fba71f 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -31,14 +31,14 @@ * from ext4_file_open: open gets called at every open, but release * gets called only when /all/ the files are closed. */ -static int ext4_release_file (struct inode * inode, struct file * filp) +static int ext4_release_file(struct inode *inode, struct file *filp) { /* if we are the last writer on the inode, drop the block reservation */ if ((filp->f_mode & FMODE_WRITE) && (atomic_read(&inode->i_writecount) == 1)) { down_write(&EXT4_I(inode)->i_data_sem); - ext4_discard_reservation(inode); + ext4_discard_preallocations(inode); up_write(&EXT4_I(inode)->i_data_sem); } if (is_dx(inode) && filp->private_data) @@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } +extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len); + const struct file_operations ext4_file_operations = { .llseek = generic_file_llseek, .read = do_sync_read, @@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = { .truncate = ext4_truncate, .setattr = ext4_setattr, .getattr = ext4_getattr, -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, @@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = { #endif .permission = ext4_permission, .fallocate = ext4_fallocate, + .fiemap = ext4_fiemap, }; diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a45c3737ad3..5afe4370840 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -28,6 +28,7 @@ #include <linux/writeback.h> #include <linux/jbd2.h> #include <linux/blkdev.h> +#include <linux/marker.h> #include "ext4.h" #include "ext4_jbd2.h" @@ -43,7 +44,7 @@ * inode to disk. */ -int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) +int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; @@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) J_ASSERT(ext4_journal_current_handle() == NULL); + trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", + inode->i_sb->s_id, datasync, inode->i_ino, + dentry->d_parent->d_inode->i_ino); + /* * data=writeback: * The caller's filemap_fdatawrite()/wait will sync the data. diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1d6329dbe39..556ca8eba3d 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) sum += DELTA; b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); - } while(--n); + } while (--n); buf[0] += b0; buf[1] += b1; @@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) /* The old legacy hash */ -static __u32 dx_hack_hash (const char *name, int len) +static __u32 dx_hack_hash(const char *name, int len) { __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; while (len--) { @@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) val = pad; if (len > num*4) len = num * 4; - for (i=0; i < len; i++) { + for (i = 0; i < len; i++) { if ((i % 4) == 0) val = pad; val = msg[i] + (val << 8); @@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) /* Check to see if the seed is all zero's */ if (hinfo->seed) { - for (i=0; i < 4; i++) { + for (i = 0; i < 4; i++) { if (hinfo->seed[i]) break; } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f344834bbf5..fe34d74cfb1 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) block_group, bitmap_blk); return NULL; } - if (bh_uptodate_or_lock(bh)) + if (buffer_uptodate(bh) && + !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) return bh; + lock_buffer(bh); spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { ext4_init_inode_bitmap(sb, bh, block_group, desc); @@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) * though), and then we'd have two inodes sharing the * same inode number and space on the harddisk. */ -void ext4_free_inode (handle_t *handle, struct inode * inode) +void ext4_free_inode(handle_t *handle, struct inode *inode) { - struct super_block * sb = inode->i_sb; + struct super_block *sb = inode->i_sb; int is_directory; unsigned long ino; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; ext4_group_t block_group; unsigned long bit; - struct ext4_group_desc * gdp; - struct ext4_super_block * es; + struct ext4_group_desc *gdp; + struct ext4_super_block *es; struct ext4_sb_info *sbi; int fatal = 0, err; ext4_group_t flex_group; if (atomic_read(&inode->i_count) > 1) { - printk ("ext4_free_inode: inode has count=%d\n", - atomic_read(&inode->i_count)); + printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", + atomic_read(&inode->i_count)); return; } if (inode->i_nlink) { - printk ("ext4_free_inode: inode has nlink=%d\n", - inode->i_nlink); + printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", + inode->i_nlink); return; } if (!sb) { - printk("ext4_free_inode: inode on nonexistent device\n"); + printk(KERN_ERR "ext4_free_inode: inode on " + "nonexistent device\n"); return; } sbi = EXT4_SB(sb); ino = inode->i_ino; - ext4_debug ("freeing inode %lu\n", ino); + ext4_debug("freeing inode %lu\n", ino); /* * Note: we must free any quota before locking the superblock, @@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) is_directory = S_ISDIR(inode->i_mode); /* Do this BEFORE marking the inode not in use or returning an error */ - clear_inode (inode); + clear_inode(inode); es = EXT4_SB(sb)->s_es; if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { - ext4_error (sb, "ext4_free_inode", - "reserved or nonexistent inode %lu", ino); + ext4_error(sb, "ext4_free_inode", + "reserved or nonexistent inode %lu", ino); goto error_return; } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); @@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) /* Ok, now we can actually update the inode bitmaps.. */ if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), bit, bitmap_bh->b_data)) - ext4_error (sb, "ext4_free_inode", - "bit already cleared for inode %lu", ino); + ext4_error(sb, "ext4_free_inode", + "bit already cleared for inode %lu", ino); else { - gdp = ext4_get_group_desc (sb, block_group, &bh2); + gdp = ext4_get_group_desc(sb, block_group, &bh2); BUFFER_TRACE(bh2, "get_write_access"); fatal = ext4_journal_get_write_access(handle, bh2); @@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, avefreei = freei / ngroups; for (group = 0; group < ngroups; group++) { - desc = ext4_get_group_desc (sb, group, NULL); + desc = ext4_get_group_desc(sb, group, NULL); if (!desc || !desc->bg_free_inodes_count) continue; if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) @@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, * For other inodes, search forward from the parent directory's block * group to find a free inode. */ -struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) +struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) { struct super_block *sb; struct buffer_head *bitmap_bh = NULL; struct buffer_head *bh2; ext4_group_t group = 0; unsigned long ino = 0; - struct inode * inode; - struct ext4_group_desc * gdp = NULL; - struct ext4_super_block * es; + struct inode *inode; + struct ext4_group_desc *gdp = NULL; + struct ext4_super_block *es; struct ext4_inode_info *ei; struct ext4_sb_info *sbi; int ret2, err = 0; @@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) } if (S_ISDIR(mode)) { - if (test_opt (sb, OLDALLOC)) + if (test_opt(sb, OLDALLOC)) ret2 = find_group_dir(sb, dir, &group); else ret2 = find_group_orlov(sb, dir, &group); @@ -783,7 +786,7 @@ got: } inode->i_uid = current->fsuid; - if (test_opt (sb, GRPID)) + if (test_opt(sb, GRPID)) inode->i_gid = dir->i_gid; else if (dir->i_mode & S_ISGID) { inode->i_gid = dir->i_gid; @@ -816,7 +819,6 @@ got: ei->i_flags &= ~EXT4_DIRSYNC_FL; ei->i_file_acl = 0; ei->i_dtime = 0; - ei->i_block_alloc_info = NULL; ei->i_block_group = group; ext4_set_inode_flags(inode); @@ -832,7 +834,7 @@ got: ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; ret = inode; - if(DQUOT_ALLOC_INODE(inode)) { + if (DQUOT_ALLOC_INODE(inode)) { err = -EDQUOT; goto fail_drop; } @@ -841,7 +843,7 @@ got: if (err) goto fail_free_drop; - err = ext4_init_security(handle,inode, dir); + err = ext4_init_security(handle, inode, dir); if (err) goto fail_free_drop; @@ -959,7 +961,7 @@ error: return ERR_PTR(err); } -unsigned long ext4_count_free_inodes (struct super_block * sb) +unsigned long ext4_count_free_inodes(struct super_block *sb) { unsigned long desc_count; struct ext4_group_desc *gdp; @@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) bitmap_count = 0; gdp = NULL; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - gdp = ext4_get_group_desc (sb, i, NULL); + gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; desc_count += le16_to_cpu(gdp->bg_free_inodes_count); @@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) bitmap_count += x; } brelse(bitmap_bh); - printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", - le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); + printk(KERN_DEBUG "ext4_count_free_inodes: " + "stored = %u, computed = %lu, %lu\n", + le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); return desc_count; #else desc_count = 0; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - gdp = ext4_get_group_desc (sb, i, NULL); + gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; desc_count += le16_to_cpu(gdp->bg_free_inodes_count); @@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) } /* Called at mount-time, super-block is locked */ -unsigned long ext4_count_dirs (struct super_block * sb) +unsigned long ext4_count_dirs(struct super_block * sb) { unsigned long count = 0; ext4_group_t i; for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { - struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); + struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); if (!gdp) continue; count += le16_to_cpu(gdp->bg_used_dirs_count); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7e91913e325..9b4ec9decfd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) /* * Called at the last iput() if i_nlink is zero. */ -void ext4_delete_inode (struct inode * inode) +void ext4_delete_inode(struct inode *inode) { handle_t *handle; int err; @@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, int final = 0; if (i_block < 0) { - ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); + ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); } else if (i_block < direct_blocks) { offsets[n++] = i_block; final = direct_blocks; - } else if ( (i_block -= direct_blocks) < indirect_blocks) { + } else if ((i_block -= direct_blocks) < indirect_blocks) { offsets[n++] = EXT4_IND_BLOCK; offsets[n++] = i_block; final = ptrs; @@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, *err = 0; /* i_data is not going away, no lock needed */ - add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); + add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); if (!p->key) goto no_block; while (--depth) { bh = sb_bread(sb, le32_to_cpu(p->key)); if (!bh) goto failure; - add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); + add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); /* Reader: end */ if (!p->key) goto no_block; @@ -443,7 +443,7 @@ no_block: static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) { struct ext4_inode_info *ei = EXT4_I(inode); - __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; + __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; __le32 *p; ext4_fsblk_t bg_start; ext4_fsblk_t last_block; @@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, Indirect *partial) { - struct ext4_block_alloc_info *block_i; - - block_i = EXT4_I(inode)->i_block_alloc_info; - /* - * try the heuristic for sequential allocation, - * failing that at least try to get decent locality. + * XXX need to get goal block from mballoc's data structures */ - if (block_i && (block == block_i->last_alloc_logical_block + 1) - && (block_i->last_alloc_physical_block != 0)) { - return block_i->last_alloc_physical_block + 1; - } return ext4_find_near(inode, partial); } @@ -630,7 +621,7 @@ allocated: *err = 0; return ret; failed_out: - for (i = 0; i <index; i++) + for (i = 0; i < index; i++) ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); return ret; } @@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, branch[n].p = (__le32 *) bh->b_data + offsets[n]; branch[n].key = cpu_to_le32(new_blocks[n]); *branch[n].p = branch[n].key; - if ( n == indirect_blks) { + if (n == indirect_blks) { current_block = new_blocks[n]; /* * End of chain, update the last new metablock of @@ -730,7 +721,7 @@ failed: BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); ext4_journal_forget(handle, branch[i].bh); } - for (i = 0; i <indirect_blks; i++) + for (i = 0; i < indirect_blks; i++) ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); ext4_free_blocks(handle, inode, new_blocks[i], num, 0); @@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, { int i; int err = 0; - struct ext4_block_alloc_info *block_i; ext4_fsblk_t current_block; - block_i = EXT4_I(inode)->i_block_alloc_info; /* * If we're splicing into a [td]indirect block (as opposed to the * inode) then we need to get write access to the [td]indirect block @@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, if (num == 0 && blks > 1) { current_block = le32_to_cpu(where->key) + 1; for (i = 1; i < blks; i++) - *(where->p + i ) = cpu_to_le32(current_block++); - } - - /* - * update the most recently allocated logical & physical block - * in i_block_alloc_info, to assist find the proper goal block for next - * allocation - */ - if (block_i) { - block_i->last_alloc_logical_block = block + blks - 1; - block_i->last_alloc_physical_block = - le32_to_cpu(where[num].key) + blks - 1; + *(where->p + i) = cpu_to_le32(current_block++); } /* We are done with atomic stuff, now do the rest of housekeeping */ @@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, goto cleanup; /* - * Okay, we need to do block allocation. Lazily initialize the block - * allocation info here if necessary + * Okay, we need to do block allocation. */ - if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) - ext4_init_block_alloc_info(inode); - goal = ext4_find_goal(inode, iblock, partial); /* the number of blocks need to allocate for [d,t]indirect blocks */ @@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + if (mdb_free) { + /* Account for allocated meta_blocks */ + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; - /* update fs free blocks counter for truncate case */ - percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); + /* update fs dirty blocks counter */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); + EXT4_I(inode)->i_allocated_meta_blocks = 0; + EXT4_I(inode)->i_reserved_meta_blocks = mdb; + } /* update per-inode reservations */ BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); EXT4_I(inode)->i_reserved_data_blocks -= used; - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - EXT4_I(inode)->i_reserved_meta_blocks = mdb; - EXT4_I(inode)->i_allocated_meta_blocks = 0; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } @@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 -static int ext4_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) +int ext4_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) { handle_t *handle = ext4_journal_current_handle(); int ret = 0, started = 0; @@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, BUFFER_TRACE(bh, "call get_create_access"); fatal = ext4_journal_get_create_access(handle, bh); if (!fatal && !buffer_uptodate(bh)) { - memset(bh->b_data,0,inode->i_sb->s_blocksize); + memset(bh->b_data, 0, inode->i_sb->s_blocksize); set_buffer_uptodate(bh); } unlock_buffer(bh); @@ -1266,7 +1241,7 @@ err: struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, ext4_lblk_t block, int create, int *err) { - struct buffer_head * bh; + struct buffer_head *bh; bh = ext4_getblk(handle, inode, block, create, err); if (!bh) @@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return NULL; } -static int walk_page_buffers( handle_t *handle, - struct buffer_head *head, - unsigned from, - unsigned to, - int *partial, - int (*fn)( handle_t *handle, - struct buffer_head *bh)) +static int walk_page_buffers(handle_t *handle, + struct buffer_head *head, + unsigned from, + unsigned to, + int *partial, + int (*fn)(handle_t *handle, + struct buffer_head *bh)) { struct buffer_head *bh; unsigned block_start, block_end; @@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle, int err, ret = 0; struct buffer_head *next; - for ( bh = head, block_start = 0; - ret == 0 && (bh != head || !block_start); - block_start = block_end, bh = next) + for (bh = head, block_start = 0; + ret == 0 && (bh != head || !block_start); + block_start = block_end, bh = next) { next = bh->b_this_page; block_end = block_start + blocksize; @@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - struct inode *inode = mapping->host; + struct inode *inode = mapping->host; int ret, needed_blocks = ext4_writepage_trans_blocks(inode); handle_t *handle; int retries = 0; - struct page *page; + struct page *page; pgoff_t index; - unsigned from, to; + unsigned from, to; index = pos >> PAGE_CACHE_SHIFT; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; + from = pos & (PAGE_CACHE_SIZE - 1); + to = from + len; retry: - handle = ext4_journal_start(inode, needed_blocks); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; + handle = ext4_journal_start(inode, needed_blocks); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; } page = __grab_cache_page(mapping, index); @@ -1387,9 +1362,16 @@ retry: } if (ret) { - unlock_page(page); + unlock_page(page); ext4_journal_stop(handle); - page_cache_release(page); + page_cache_release(page); + /* + * block_write_begin may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + */ + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file, ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { - /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. - */ loff_t new_i_size; new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = new_i_size; + if (new_i_size > EXT4_I(inode)->i_disksize) { + ext4_update_i_disksize(inode, new_i_size); + /* We need to mark inode dirty even if + * new_i_size is less that inode->i_size + * bu greater than i_disksize.(hint delalloc) + */ + ext4_mark_inode_dirty(handle, inode); + } + ret2 = generic_write_end(file, mapping, pos, len, copied, page, fsdata); copied = ret2; @@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file, loff_t new_i_size; new_i_size = pos + copied; - if (new_i_size > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = new_i_size; + if (new_i_size > EXT4_I(inode)->i_disksize) { + ext4_update_i_disksize(inode, new_i_size); + /* We need to mark inode dirty even if + * new_i_size is less that inode->i_size + * bu greater than i_disksize.(hint delalloc) + */ + ext4_mark_inode_dirty(handle, inode); + } ret2 = generic_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file, int ret = 0, ret2; int partial = 0; unsigned from, to; + loff_t new_i_size; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file, to, &partial, write_end_fn); if (!partial) SetPageUptodate(page); - if (pos+copied > inode->i_size) + new_i_size = pos + copied; + if (new_i_size > inode->i_size) i_size_write(inode, pos+copied); EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; - if (inode->i_size > EXT4_I(inode)->i_disksize) { - EXT4_I(inode)->i_disksize = inode->i_size; + if (new_i_size > EXT4_I(inode)->i_disksize) { + ext4_update_i_disksize(inode, new_i_size); ret2 = ext4_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file, static int ext4_da_reserve_space(struct inode *inode, int nrblocks) { + int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); unsigned long md_needed, mdblocks, total = 0; @@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) * in order to allocate nrblocks * worse case is one extent per block */ +repeat: spin_lock(&EXT4_I(inode)->i_block_reservation_lock); total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; mdblocks = ext4_calc_metadata_amount(inode, total); @@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; total = md_needed + nrblocks; - if (ext4_has_free_blocks(sbi, total) < total) { + if (ext4_claim_free_blocks(sbi, total)) { spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + if (ext4_should_retry_alloc(inode->i_sb, &retries)) { + yield(); + goto repeat; + } return -ENOSPC; } - /* reduce fs free blocks counter */ - percpu_counter_sub(&sbi->s_freeblocks_counter, total); - EXT4_I(inode)->i_reserved_data_blocks += nrblocks; EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; @@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) release = to_free + mdb_free; - /* update fs free blocks counter for truncate case */ - percpu_counter_add(&sbi->s_freeblocks_counter, release); + /* update fs dirty blocks counter for truncate case */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); /* update per-inode reservations */ BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); @@ -1630,6 +1625,7 @@ struct mpage_da_data { struct writeback_control *wbc; int io_done; long pages_written; + int retval; }; /* @@ -1783,6 +1779,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode, unmap_underlying_metadata(bdev, bh->b_blocknr + i); } +static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, + sector_t logical, long blk_cnt) +{ + int nr_pages, i; + pgoff_t index, end; + struct pagevec pvec; + struct inode *inode = mpd->inode; + struct address_space *mapping = inode->i_mapping; + + index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); + end = (logical + blk_cnt - 1) >> + (PAGE_CACHE_SHIFT - inode->i_blkbits); + while (index <= end) { + nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); + if (nr_pages == 0) + break; + for (i = 0; i < nr_pages; i++) { + struct page *page = pvec.pages[i]; + index = page->index; + if (index > end) + break; + index++; + + BUG_ON(!PageLocked(page)); + BUG_ON(PageWriteback(page)); + block_invalidatepage(page, 0); + ClearPageUptodate(page); + unlock_page(page); + } + } + return; +} + +static void ext4_print_free_blocks(struct inode *inode) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + printk(KERN_EMERG "Total free blocks count %lld\n", + ext4_count_free_blocks(inode->i_sb)); + printk(KERN_EMERG "Free/Dirty block details\n"); + printk(KERN_EMERG "free_blocks=%lld\n", + percpu_counter_sum(&sbi->s_freeblocks_counter)); + printk(KERN_EMERG "dirty_blocks=%lld\n", + percpu_counter_sum(&sbi->s_dirtyblocks_counter)); + printk(KERN_EMERG "Block reservation details\n"); + printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", + EXT4_I(inode)->i_reserved_data_blocks); + printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", + EXT4_I(inode)->i_reserved_meta_blocks); + return; +} + /* * mpage_da_map_blocks - go through given space * @@ -1792,32 +1839,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode, * The function skips space we know is already mapped to disk blocks. * */ -static void mpage_da_map_blocks(struct mpage_da_data *mpd) +static int mpage_da_map_blocks(struct mpage_da_data *mpd) { int err = 0; - struct buffer_head *lbh = &mpd->lbh; - sector_t next = lbh->b_blocknr; struct buffer_head new; + struct buffer_head *lbh = &mpd->lbh; + sector_t next; /* * We consider only non-mapped and non-allocated blocks */ if (buffer_mapped(lbh) && !buffer_delay(lbh)) - return; - + return 0; new.b_state = lbh->b_state; new.b_blocknr = 0; new.b_size = lbh->b_size; - + next = lbh->b_blocknr; /* * If we didn't accumulate anything * to write simply return */ if (!new.b_size) - return; + return 0; err = mpd->get_block(mpd->inode, next, &new, 1); - if (err) - return; + if (err) { + + /* If get block returns with error + * we simply return. Later writepage + * will redirty the page and writepages + * will find the dirty page again + */ + if (err == -EAGAIN) + return 0; + + if (err == -ENOSPC && + ext4_count_free_blocks(mpd->inode->i_sb)) { + mpd->retval = err; + return 0; + } + + /* + * get block failure will cause us + * to loop in writepages. Because + * a_ops->writepage won't be able to + * make progress. The page will be redirtied + * by writepage and writepages will again + * try to write the same. + */ + printk(KERN_EMERG "%s block allocation failed for inode %lu " + "at logical offset %llu with max blocks " + "%zd with error %d\n", + __func__, mpd->inode->i_ino, + (unsigned long long)next, + lbh->b_size >> mpd->inode->i_blkbits, err); + printk(KERN_EMERG "This should not happen.!! " + "Data will be lost\n"); + if (err == -ENOSPC) { + ext4_print_free_blocks(mpd->inode); + } + /* invlaidate all the pages */ + ext4_da_block_invalidatepages(mpd, next, + lbh->b_size >> mpd->inode->i_blkbits); + return err; + } BUG_ON(new.b_size == 0); if (buffer_new(&new)) @@ -1830,7 +1914,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) if (buffer_delay(lbh) || buffer_unwritten(lbh)) mpage_put_bnr_to_bhs(mpd, next, &new); - return; + return 0; } #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ @@ -1899,8 +1983,8 @@ flush_it: * We couldn't merge the block to our extent, so we * need to flush current extent and start new one */ - mpage_da_map_blocks(mpd); - mpage_da_submit_io(mpd); + if (mpage_da_map_blocks(mpd) == 0) + mpage_da_submit_io(mpd); mpd->io_done = 1; return; } @@ -1942,8 +2026,8 @@ static int __mpage_da_writepage(struct page *page, * and start IO on them using writepage() */ if (mpd->next_page != mpd->first_page) { - mpage_da_map_blocks(mpd); - mpage_da_submit_io(mpd); + if (mpage_da_map_blocks(mpd) == 0) + mpage_da_submit_io(mpd); /* * skip rest of the page in the page_vec */ @@ -2018,39 +2102,36 @@ static int __mpage_da_writepage(struct page *page, */ static int mpage_da_writepages(struct address_space *mapping, struct writeback_control *wbc, - get_block_t get_block) + struct mpage_da_data *mpd) { - struct mpage_da_data mpd; long to_write; int ret; - if (!get_block) + if (!mpd->get_block) return generic_writepages(mapping, wbc); - mpd.wbc = wbc; - mpd.inode = mapping->host; - mpd.lbh.b_size = 0; - mpd.lbh.b_state = 0; - mpd.lbh.b_blocknr = 0; - mpd.first_page = 0; - mpd.next_page = 0; - mpd.get_block = get_block; - mpd.io_done = 0; - mpd.pages_written = 0; + mpd->lbh.b_size = 0; + mpd->lbh.b_state = 0; + mpd->lbh.b_blocknr = 0; + mpd->first_page = 0; + mpd->next_page = 0; + mpd->io_done = 0; + mpd->pages_written = 0; + mpd->retval = 0; to_write = wbc->nr_to_write; - ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); + ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); /* * Handle last extent of pages */ - if (!mpd.io_done && mpd.next_page != mpd.first_page) { - mpage_da_map_blocks(&mpd); - mpage_da_submit_io(&mpd); + if (!mpd->io_done && mpd->next_page != mpd->first_page) { + if (mpage_da_map_blocks(mpd) == 0) + mpage_da_submit_io(mpd); } - wbc->nr_to_write = to_write - mpd.pages_written; + wbc->nr_to_write = to_write - mpd->pages_written; return ret; } @@ -2103,18 +2184,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, handle_t *handle = NULL; handle = ext4_journal_current_handle(); - if (!handle) { - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, 0, 0, 0); - BUG_ON(!ret); - } else { - ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, - bh_result, create, 0, EXT4_DELALLOC_RSVED); - } - + BUG_ON(!handle); + ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, + bh_result, create, 0, EXT4_DELALLOC_RSVED); if (ret > 0) { + bh_result->b_size = (ret << inode->i_blkbits); + if (ext4_should_order_data(inode)) { + int retval; + retval = ext4_jbd2_file_inode(handle, inode); + if (retval) + /* + * Failed to add inode for ordered + * mode. Don't update file size + */ + return retval; + } + /* * Update on-disk size along with block allocation * we don't use 'extend_disksize' as size may change @@ -2124,18 +2211,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, if (disksize > i_size_read(inode)) disksize = i_size_read(inode); if (disksize > EXT4_I(inode)->i_disksize) { - /* - * XXX: replace with spinlock if seen contended -bzzz - */ - down_write(&EXT4_I(inode)->i_data_sem); - if (disksize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = disksize; - up_write(&EXT4_I(inode)->i_data_sem); - - if (EXT4_I(inode)->i_disksize == disksize) { - ret = ext4_mark_inode_dirty(handle, inode); - return ret; - } + ext4_update_i_disksize(inode, disksize); + ret = ext4_mark_inode_dirty(handle, inode); + return ret; } ret = 0; } @@ -2284,6 +2362,7 @@ static int ext4_da_writepages(struct address_space *mapping, { handle_t *handle = NULL; loff_t range_start = 0; + struct mpage_da_data mpd; struct inode *inode = mapping->host; int needed_blocks, ret = 0, nr_to_writebump = 0; long to_write, pages_skipped = 0; @@ -2317,6 +2396,9 @@ static int ext4_da_writepages(struct address_space *mapping, range_start = wbc->range_start; pages_skipped = wbc->pages_skipped; + mpd.wbc = wbc; + mpd.inode = mapping->host; + restart_loop: to_write = wbc->nr_to_write; while (!ret && to_write > 0) { @@ -2340,23 +2422,17 @@ restart_loop: dump_stack(); goto out_writepages; } - if (ext4_should_order_data(inode)) { - /* - * With ordered mode we need to add - * the inode to the journal handl - * when we do block allocation. - */ - ret = ext4_jbd2_file_inode(handle, inode); - if (ret) { - ext4_journal_stop(handle); - goto out_writepages; - } - } - to_write -= wbc->nr_to_write; - ret = mpage_da_writepages(mapping, wbc, - ext4_da_get_block_write); + + mpd.get_block = ext4_da_get_block_write; + ret = mpage_da_writepages(mapping, wbc, &mpd); + ext4_journal_stop(handle); + + if (mpd.retval == -ENOSPC) + jbd2_journal_force_commit_nested(sbi->s_journal); + + /* reset the retry count */ if (ret == MPAGE_DA_EXTENT_TAIL) { /* * got one extent now try with @@ -2391,6 +2467,33 @@ out_writepages: return ret; } +#define FALL_BACK_TO_NONDELALLOC 1 +static int ext4_nonda_switch(struct super_block *sb) +{ + s64 free_blocks, dirty_blocks; + struct ext4_sb_info *sbi = EXT4_SB(sb); + + /* + * switch to non delalloc mode if we are running low + * on free block. The free block accounting via percpu + * counters can get slightly wrong with FBC_BATCH getting + * accumulated on each CPU without updating global counters + * Delalloc need an accurate free block accounting. So switch + * to non delalloc when we are near to error range. + */ + free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); + dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); + if (2 * free_blocks < 3 * dirty_blocks || + free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { + /* + * free block count is less that 150% of dirty blocks + * or free blocks is less that watermark + */ + return 1; + } + return 0; +} + static int ext4_da_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -2406,6 +2509,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; + if (ext4_nonda_switch(inode->i_sb)) { + *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; + return ext4_write_begin(file, mapping, pos, + len, flags, pagep, fsdata); + } + *fsdata = (void *)0; retry: /* * With delayed allocation, we don't log the i_disksize update @@ -2433,6 +2542,13 @@ retry: unlock_page(page); ext4_journal_stop(handle); page_cache_release(page); + /* + * block_write_begin may have instantiated a few blocks + * outside i_size. Trim these off again. Don't need + * i_size_read because we hold i_mutex. + */ + if (pos + len > inode->i_size) + vmtruncate(inode, inode->i_size); } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) @@ -2456,7 +2572,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, bh = page_buffers(page); idx = offset >> inode->i_blkbits; - for (i=0; i < idx; i++) + for (i = 0; i < idx; i++) bh = bh->b_this_page; if (!buffer_mapped(bh) || (buffer_delay(bh))) @@ -2474,9 +2590,22 @@ static int ext4_da_write_end(struct file *file, handle_t *handle = ext4_journal_current_handle(); loff_t new_i_size; unsigned long start, end; + int write_mode = (int)(unsigned long)fsdata; + + if (write_mode == FALL_BACK_TO_NONDELALLOC) { + if (ext4_should_order_data(inode)) { + return ext4_ordered_write_end(file, mapping, pos, + len, copied, page, fsdata); + } else if (ext4_should_writeback_data(inode)) { + return ext4_writeback_write_end(file, mapping, pos, + len, copied, page, fsdata); + } else { + BUG(); + } + } start = pos & (PAGE_CACHE_SIZE - 1); - end = start + copied -1; + end = start + copied - 1; /* * generic_write_end() will run mark_inode_dirty() if i_size @@ -2500,6 +2629,11 @@ static int ext4_da_write_end(struct file *file, EXT4_I(inode)->i_disksize = new_i_size; } up_write(&EXT4_I(inode)->i_data_sem); + /* We need to mark inode dirty even if + * new_i_size is less that inode->i_size + * bu greater than i_disksize.(hint delalloc) + */ + ext4_mark_inode_dirty(handle, inode); } } ret2 = generic_write_end(file, mapping, pos, len, copied, @@ -2591,7 +2725,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) return 0; } - return generic_block_bmap(mapping,block,ext4_get_block); + return generic_block_bmap(mapping, block, ext4_get_block); } static int bget_one(handle_t *handle, struct buffer_head *bh) @@ -3197,7 +3331,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, if (!partial->key && *partial->p) /* Writer: end */ goto no_top; - for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) + for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) ; /* * OK, we've found the last block that must survive. The rest of our @@ -3216,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, } /* Writer: end */ - while(partial > p) { + while (partial > p) { brelse(partial->bh); partial--; } @@ -3408,9 +3542,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, /* This zaps the entire block. Bottom up. */ BUFFER_TRACE(bh, "free child branches"); ext4_free_branches(handle, inode, bh, - (__le32*)bh->b_data, - (__le32*)bh->b_data + addr_per_block, - depth); + (__le32 *) bh->b_data, + (__le32 *) bh->b_data + addr_per_block, + depth); /* * We've probably journalled the indirect block several @@ -3578,7 +3712,7 @@ void ext4_truncate(struct inode *inode) */ down_write(&ei->i_data_sem); - ext4_discard_reservation(inode); + ext4_discard_preallocations(inode); /* * The orphan list entry will now protect us from any crash which @@ -3673,41 +3807,6 @@ out_stop: ext4_journal_stop(handle); } -static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, - unsigned long ino, struct ext4_iloc *iloc) -{ - ext4_group_t block_group; - unsigned long offset; - ext4_fsblk_t block; - struct ext4_group_desc *gdp; - - if (!ext4_valid_inum(sb, ino)) { - /* - * This error is already checked for in namei.c unless we are - * looking at an NFS filehandle, in which case no error - * report is needed - */ - return 0; - } - - block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); - gdp = ext4_get_group_desc(sb, block_group, NULL); - if (!gdp) - return 0; - - /* - * Figure out the offset within the block group inode table - */ - offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * - EXT4_INODE_SIZE(sb); - block = ext4_inode_table(sb, gdp) + - (offset >> EXT4_BLOCK_SIZE_BITS(sb)); - - iloc->block_group = block_group; - iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); - return block; -} - /* * ext4_get_inode_loc returns with an extra refcount against the inode's * underlying buffer_head on success. If 'in_mem' is true, we have all @@ -3717,19 +3816,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, static int __ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc, int in_mem) { - ext4_fsblk_t block; - struct buffer_head *bh; + struct ext4_group_desc *gdp; + struct buffer_head *bh; + struct super_block *sb = inode->i_sb; + ext4_fsblk_t block; + int inodes_per_block, inode_offset; + + iloc->bh = 0; + if (!ext4_valid_inum(sb, inode->i_ino)) + return -EIO; - block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); - if (!block) + iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); + gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); + if (!gdp) return -EIO; - bh = sb_getblk(inode->i_sb, block); + /* + * Figure out the offset within the block group inode table + */ + inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); + inode_offset = ((inode->i_ino - 1) % + EXT4_INODES_PER_GROUP(sb)); + block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); + iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); + + bh = sb_getblk(sb, block); if (!bh) { - ext4_error (inode->i_sb, "ext4_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%llu", - inode->i_ino, block); + ext4_error(sb, "ext4_get_inode_loc", "unable to read " + "inode block - inode=%lu, block=%llu", + inode->i_ino, block); return -EIO; } if (!buffer_uptodate(bh)) { @@ -3757,28 +3872,12 @@ static int __ext4_get_inode_loc(struct inode *inode, */ if (in_mem) { struct buffer_head *bitmap_bh; - struct ext4_group_desc *desc; - int inodes_per_buffer; - int inode_offset, i; - ext4_group_t block_group; - int start; - - block_group = (inode->i_ino - 1) / - EXT4_INODES_PER_GROUP(inode->i_sb); - inodes_per_buffer = bh->b_size / - EXT4_INODE_SIZE(inode->i_sb); - inode_offset = ((inode->i_ino - 1) % - EXT4_INODES_PER_GROUP(inode->i_sb)); - start = inode_offset & ~(inodes_per_buffer - 1); + int i, start; - /* Is the inode bitmap in cache? */ - desc = ext4_get_group_desc(inode->i_sb, - block_group, NULL); - if (!desc) - goto make_io; + start = inode_offset & ~(inodes_per_block - 1); - bitmap_bh = sb_getblk(inode->i_sb, - ext4_inode_bitmap(inode->i_sb, desc)); + /* Is the inode bitmap in cache? */ + bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); if (!bitmap_bh) goto make_io; @@ -3791,14 +3890,14 @@ static int __ext4_get_inode_loc(struct inode *inode, brelse(bitmap_bh); goto make_io; } - for (i = start; i < start + inodes_per_buffer; i++) { + for (i = start; i < start + inodes_per_block; i++) { if (i == inode_offset) continue; if (ext4_test_bit(i, bitmap_bh->b_data)) break; } brelse(bitmap_bh); - if (i == start + inodes_per_buffer) { + if (i == start + inodes_per_block) { /* all other inodes are free, so skip I/O */ memset(bh->b_data, 0, bh->b_size); set_buffer_uptodate(bh); @@ -3809,6 +3908,36 @@ static int __ext4_get_inode_loc(struct inode *inode, make_io: /* + * If we need to do any I/O, try to pre-readahead extra + * blocks from the inode table. + */ + if (EXT4_SB(sb)->s_inode_readahead_blks) { + ext4_fsblk_t b, end, table; + unsigned num; + + table = ext4_inode_table(sb, gdp); + /* Make sure s_inode_readahead_blks is a power of 2 */ + while (EXT4_SB(sb)->s_inode_readahead_blks & + (EXT4_SB(sb)->s_inode_readahead_blks-1)) + EXT4_SB(sb)->s_inode_readahead_blks = + (EXT4_SB(sb)->s_inode_readahead_blks & + (EXT4_SB(sb)->s_inode_readahead_blks-1)); + b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); + if (table > b) + b = table; + end = b + EXT4_SB(sb)->s_inode_readahead_blks; + num = EXT4_INODES_PER_GROUP(sb); + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, + EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) + num -= le16_to_cpu(gdp->bg_itable_unused); + table += num / inodes_per_block; + if (end > table) + end = table; + while (b <= end) + sb_breadahead(sb, b++); + } + + /* * There are other valid inodes in the buffer, this inode * has in-inode xattrs, or we don't have this inode in memory. * Read the block from disk. @@ -3818,10 +3947,9 @@ make_io: submit_bh(READ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { - ext4_error(inode->i_sb, "ext4_get_inode_loc", - "unable to read inode block - " - "inode=%lu, block=%llu", - inode->i_ino, block); + ext4_error(sb, __func__, + "unable to read inode block - inode=%lu, " + "block=%llu", inode->i_ino, block); brelse(bh); return -EIO; } @@ -3913,11 +4041,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) return inode; ei = EXT4_I(inode); -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif - ei->i_block_alloc_info = NULL; ret = __ext4_get_inode_loc(inode, &iloc, 0); if (ret < 0) @@ -3927,7 +4054,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_mode = le16_to_cpu(raw_inode->i_mode); inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); - if(!(test_opt (inode->i_sb, NO_UID32))) { + if (!(test_opt(inode->i_sb, NO_UID32))) { inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; } @@ -3945,7 +4072,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) if (inode->i_mode == 0 || !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { /* this inode is deleted */ - brelse (bh); + brelse(bh); ret = -ESTALE; goto bad_inode; } @@ -3978,7 +4105,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > EXT4_INODE_SIZE(inode->i_sb)) { - brelse (bh); + brelse(bh); ret = -EIO; goto bad_inode; } @@ -4031,7 +4158,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) init_special_inode(inode, inode->i_mode, new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); } - brelse (iloc.bh); + brelse(iloc.bh); ext4_set_inode_flags(inode); unlock_new_inode(inode); return inode; @@ -4113,14 +4240,14 @@ static int ext4_do_update_inode(handle_t *handle, ext4_get_inode_flags(ei); raw_inode->i_mode = cpu_to_le16(inode->i_mode); - if(!(test_opt(inode->i_sb, NO_UID32))) { + if (!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); /* * Fix up interoperability with old kernels. Otherwise, old inodes get * re-used with the upper 16 bits of the uid/gid intact */ - if(!ei->i_dtime) { + if (!ei->i_dtime) { raw_inode->i_uid_high = cpu_to_le16(high_16_bits(inode->i_uid)); raw_inode->i_gid_high = @@ -4208,7 +4335,7 @@ static int ext4_do_update_inode(handle_t *handle, ei->i_state &= ~EXT4_STATE_NEW; out_brelse: - brelse (bh); + brelse(bh); ext4_std_error(inode->i_sb, err); return err; } @@ -4811,6 +4938,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) loff_t size; unsigned long len; int ret = -EINVAL; + void *fsdata; struct file *file = vma->vm_file; struct inode *inode = file->f_path.dentry->d_inode; struct address_space *mapping = inode->i_mapping; @@ -4849,11 +4977,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) * on the same page though */ ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), - len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); + len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); if (ret < 0) goto out_unlock; ret = mapping->a_ops->write_end(file, mapping, page_offset(page), - len, len, page, NULL); + len, len, page, fsdata); if (ret < 0) goto out_unlock; ret = 0; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7a6c2f1faba..ea27eaa0cfe 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct inode *inode = filp->f_dentry->d_inode; struct ext4_inode_info *ei = EXT4_I(inode); unsigned int flags; - unsigned short rsv_window_size; - ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); + ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); switch (cmd) { case EXT4_IOC_GETFLAGS: @@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return put_user(flags, (int __user *) arg); case EXT4_IOC_SETFLAGS: { handle_t *handle = NULL; - int err; + int err, migrate = 0; struct ext4_iloc iloc; unsigned int oldflags; unsigned int jflag; @@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_RESOURCE)) goto flags_out; } + if (oldflags & EXT4_EXTENTS_FL) { + /* We don't support clearning extent flags */ + if (!(flags & EXT4_EXTENTS_FL)) { + err = -EOPNOTSUPP; + goto flags_out; + } + } else if (flags & EXT4_EXTENTS_FL) { + /* migrate the file */ + migrate = 1; + flags &= ~EXT4_EXTENTS_FL; + } handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { @@ -109,6 +119,10 @@ flags_err: if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) err = ext4_change_inode_journal_flag(inode, jflag); + if (err) + goto flags_out; + if (migrate) + err = ext4_ext_migrate(inode); flags_out: mutex_unlock(&inode->i_mutex); mnt_drop_write(filp->f_path.mnt); @@ -175,49 +189,6 @@ setversion_out: return ret; } #endif - case EXT4_IOC_GETRSVSZ: - if (test_opt(inode->i_sb, RESERVATION) - && S_ISREG(inode->i_mode) - && ei->i_block_alloc_info) { - rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; - return put_user(rsv_window_size, (int __user *)arg); - } - return -ENOTTY; - case EXT4_IOC_SETRSVSZ: { - int err; - - if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) - return -ENOTTY; - - if (!is_owner_or_cap(inode)) - return -EACCES; - - if (get_user(rsv_window_size, (int __user *)arg)) - return -EFAULT; - - err = mnt_want_write(filp->f_path.mnt); - if (err) - return err; - - if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) - rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; - - /* - * need to allocate reservation structure for this inode - * before set the window size - */ - down_write(&ei->i_data_sem); - if (!ei->i_block_alloc_info) - ext4_init_block_alloc_info(inode); - - if (ei->i_block_alloc_info){ - struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; - rsv->rsv_goal_size = rsv_window_size; - } - up_write(&ei->i_data_sem); - mnt_drop_write(filp->f_path.mnt); - return 0; - } case EXT4_IOC_GROUP_EXTEND: { ext4_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; @@ -267,7 +238,26 @@ setversion_out: } case EXT4_IOC_MIGRATE: - return ext4_ext_migrate(inode, filp, cmd, arg); + { + int err; + if (!is_owner_or_cap(inode)) + return -EACCES; + + err = mnt_want_write(filp->f_path.mnt); + if (err) + return err; + /* + * inode_mutex prevent write and truncate on the file. + * Read still goes through. We take i_data_sem in + * ext4_ext_swap_inode_data before we switch the + * inode format to prevent read. + */ + mutex_lock(&(inode->i_mutex)); + err = ext4_ext_migrate(inode); + mutex_unlock(&(inode->i_mutex)); + mnt_drop_write(filp->f_path.mnt); + return err; + } default: return -ENOTTY; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e0e3a5eb1dd..b580714f0d8 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) b2 = (unsigned char *) bitmap; for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { if (b1[i] != b2[i]) { - printk("corruption in group %lu at byte %u(%u):" - " %x in copy != %x on disk/prealloc\n", - e4b->bd_group, i, i * 8, b1[i], b2[i]); + printk(KERN_ERR "corruption in group %lu " + "at byte %u(%u): %x in copy != %x " + "on disk/prealloc\n", + e4b->bd_group, i, i * 8, b1[i], b2[i]); BUG(); } } @@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, void *buddy; void *buddy2; - if (!test_opt(sb, MBALLOC)) - return 0; - { static int mb_check_counter; if (mb_check_counter++ % 100 != 0) @@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if (bh[i] == NULL) goto out; - if (bh_uptodate_or_lock(bh[i])) + if (buffer_uptodate(bh[i]) && + !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) continue; + lock_buffer(bh[i]); spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], @@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - remove_proc_entry("mb_groups", sbi->s_mb_proc); - remove_proc_entry("mb_history", sbi->s_mb_proc); - + if (sbi->s_proc != NULL) { + remove_proc_entry("mb_groups", sbi->s_proc); + remove_proc_entry("mb_history", sbi->s_proc); + } kfree(sbi->s_mb_history); } @@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int i; - if (sbi->s_mb_proc != NULL) { - proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, + if (sbi->s_proc != NULL) { + proc_create_data("mb_history", S_IRUGO, sbi->s_proc, &ext4_mb_seq_history_fops, sb); - proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, + proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, &ext4_mb_seq_groups_fops, sb); } @@ -2485,19 +2486,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) unsigned max; int ret; - if (!test_opt(sb, MBALLOC)) - return 0; - i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_offsets == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); return -ENOMEM; } sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); if (sbi->s_mb_maxs == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); kfree(sbi->s_mb_maxs); return -ENOMEM; } @@ -2520,7 +2516,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) /* init file for buddy data */ ret = ext4_mb_init_backend(sb); if (ret != 0) { - clear_opt(sbi->s_mount_opt, MBALLOC); kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); return ret; @@ -2540,17 +2535,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; - i = sizeof(struct ext4_locality_group) * nr_cpu_ids; - sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); + sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { - clear_opt(sbi->s_mount_opt, MBALLOC); kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); return -ENOMEM; } - for (i = 0; i < nr_cpu_ids; i++) { + for_each_possible_cpu(i) { struct ext4_locality_group *lg; - lg = &sbi->s_locality_groups[i]; + lg = per_cpu_ptr(sbi->s_locality_groups, i); mutex_init(&lg->lg_mutex); for (j = 0; j < PREALLOC_TB_SIZE; j++) INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); @@ -2560,7 +2553,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) ext4_mb_init_per_dev_proc(sb); ext4_mb_history_init(sb); - printk("EXT4-fs: mballoc enabled\n"); + printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); return 0; } @@ -2589,9 +2582,6 @@ int ext4_mb_release(struct super_block *sb) struct ext4_group_info *grinfo; struct ext4_sb_info *sbi = EXT4_SB(sb); - if (!test_opt(sb, MBALLOC)) - return 0; - /* release freed, non-committed blocks */ spin_lock(&sbi->s_md_lock); list_splice_init(&sbi->s_closed_transaction, @@ -2647,8 +2637,7 @@ int ext4_mb_release(struct super_block *sb) atomic_read(&sbi->s_mb_discarded)); } - kfree(sbi->s_locality_groups); - + free_percpu(sbi->s_locality_groups); ext4_mb_history_release(sb); ext4_mb_destroy_per_dev_proc(sb); @@ -2721,118 +2710,46 @@ ext4_mb_free_committed_blocks(struct super_block *sb) #define EXT4_MB_STREAM_REQ "stream_req" #define EXT4_MB_GROUP_PREALLOC "group_prealloc" - - -#define MB_PROC_FOPS(name) \ -static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ -{ \ - struct ext4_sb_info *sbi = m->private; \ - \ - seq_printf(m, "%ld\n", sbi->s_mb_##name); \ - return 0; \ -} \ - \ -static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ -{ \ - return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ -} \ - \ -static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ - const char __user *buf, size_t cnt, loff_t *ppos) \ -{ \ - struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ - char str[32]; \ - long value; \ - if (cnt >= sizeof(str)) \ - return -EINVAL; \ - if (copy_from_user(str, buf, cnt)) \ - return -EFAULT; \ - value = simple_strtol(str, NULL, 0); \ - if (value <= 0) \ - return -ERANGE; \ - sbi->s_mb_##name = value; \ - return cnt; \ -} \ - \ -static const struct file_operations ext4_mb_##name##_proc_fops = { \ - .owner = THIS_MODULE, \ - .open = ext4_mb_##name##_proc_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ - .write = ext4_mb_##name##_proc_write, \ -}; - -MB_PROC_FOPS(stats); -MB_PROC_FOPS(max_to_scan); -MB_PROC_FOPS(min_to_scan); -MB_PROC_FOPS(order2_reqs); -MB_PROC_FOPS(stream_request); -MB_PROC_FOPS(group_prealloc); - -#define MB_PROC_HANDLER(name, var) \ -do { \ - proc = proc_create_data(name, mode, sbi->s_mb_proc, \ - &ext4_mb_##var##_proc_fops, sbi); \ - if (proc == NULL) { \ - printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ - goto err_out; \ - } \ -} while (0) - static int ext4_mb_init_per_dev_proc(struct super_block *sb) { mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; struct ext4_sb_info *sbi = EXT4_SB(sb); struct proc_dir_entry *proc; - char devname[64]; - if (proc_root_ext4 == NULL) { - sbi->s_mb_proc = NULL; + if (sbi->s_proc == NULL) return -EINVAL; - } - bdevname(sb->s_bdev, devname); - sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); - - MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); - MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); - MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); - MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); - MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); - MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); + EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); + EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); + EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); + EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); + EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); + EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); return 0; err_out: - printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); - remove_proc_entry(devname, proc_root_ext4); - sbi->s_mb_proc = NULL; - + remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); + remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); + remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); + remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); + remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); + remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); return -ENOMEM; } static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char devname[64]; - if (sbi->s_mb_proc == NULL) + if (sbi->s_proc == NULL) return -EINVAL; - bdevname(sb->s_bdev, devname); - remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); - remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); - remove_proc_entry(devname, proc_root_ext4); + remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); + remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); + remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); + remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); + remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); + remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); return 0; } @@ -2854,11 +2771,6 @@ int __init init_ext4_mballoc(void) kmem_cache_destroy(ext4_pspace_cachep); return -ENOMEM; } -#ifdef CONFIG_PROC_FS - proc_root_ext4 = proc_mkdir("fs/ext4", NULL); - if (proc_root_ext4 == NULL) - printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); -#endif return 0; } @@ -2867,9 +2779,6 @@ void exit_ext4_mballoc(void) /* XXX: synchronize_rcu(); */ kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_ac_cachep); -#ifdef CONFIG_PROC_FS - remove_proc_entry("fs/ext4", NULL); -#endif } @@ -2879,7 +2788,7 @@ void exit_ext4_mballoc(void) */ static noinline_for_stack int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, - handle_t *handle) + handle_t *handle, unsigned long reserv_blks) { struct buffer_head *bitmap_bh = NULL; struct ext4_super_block *es; @@ -2968,15 +2877,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); - + percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); /* - * free blocks account has already be reduced/reserved - * at write_begin() time for delayed allocation - * do not double accounting + * Now reduce the dirty block count also. Should not go negative */ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) - percpu_counter_sub(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); + /* release all the reserved blocks if non delalloc */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); + else + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + ac->ac_b_ex.fe_len); if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, @@ -3884,7 +3794,7 @@ out: * * FIXME!! Make sure it is valid at all the call sites */ -void ext4_mb_discard_inode_preallocations(struct inode *inode) +void ext4_discard_preallocations(struct inode *inode) { struct ext4_inode_info *ei = EXT4_I(inode); struct super_block *sb = inode->i_sb; @@ -3896,7 +3806,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) struct ext4_buddy e4b; int err; - if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { + if (!S_ISREG(inode->i_mode)) { /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ return; } @@ -4094,8 +4004,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) * per cpu locality group is to reduce the contention between block * request from multiple CPUs. */ - ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; - put_cpu(); + ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); /* we're going to use group allocation */ ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; @@ -4369,33 +4278,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, struct ext4_allocation_request *ar, int *errp) { + int freed; struct ext4_allocation_context *ac = NULL; struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block = 0; - int freed; - int inquota; + unsigned long inquota; + unsigned long reserv_blks = 0; sb = ar->inode->i_sb; sbi = EXT4_SB(sb); - if (!test_opt(sb, MBALLOC)) { - block = ext4_old_new_blocks(handle, ar->inode, ar->goal, - &(ar->len), errp); - return block; - } if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { /* * With delalloc we already reserved the blocks */ - ar->len = ext4_has_free_blocks(sbi, ar->len); - } - - if (ar->len == 0) { - *errp = -ENOSPC; - return 0; + while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { + /* let others to free the space */ + yield(); + ar->len = ar->len >> 1; + } + if (!ar->len) { + *errp = -ENOSPC; + return 0; + } + reserv_blks = ar->len; } - while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { ar->flags |= EXT4_MB_HINT_NOPREALLOC; ar->len--; @@ -4441,7 +4349,7 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { - *errp = ext4_mb_mark_diskspace_used(ac, handle); + *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); if (*errp == -EAGAIN) { ac->ac_b_ex.fe_group = 0; ac->ac_b_ex.fe_start = 0; diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906c2a7..b3b4828f8b8 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -257,7 +257,6 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) -static struct proc_dir_entry *proc_root_ext4; struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 46fc0b5b12b..f2a9cf498ec 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) } -int ext4_ext_migrate(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +int ext4_ext_migrate(struct inode *inode) { handle_t *handle; int retval = 0, i; @@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, * when we add extents we extent the journal */ /* - * inode_mutex prevent write and truncate on the file. Read still goes - * through. We take i_data_sem in ext4_ext_swap_inode_data before we - * switch the inode format to prevent read. - */ - mutex_lock(&(inode->i_mutex)); - /* * Even though we take i_mutex we can still cause block allocation * via mmap write to holes. If we have allocated new blocks we fail * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. @@ -623,7 +616,6 @@ err_out: tmp_inode->i_nlink = 0; ext4_journal_stop(handle); - mutex_unlock(&(inode->i_mutex)); if (tmp_inode) iput(tmp_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c..92db9e94514 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -151,34 +151,36 @@ struct dx_map_entry static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); -static inline unsigned dx_get_hash (struct dx_entry *entry); -static void dx_set_hash (struct dx_entry *entry, unsigned value); -static unsigned dx_get_count (struct dx_entry *entries); -static unsigned dx_get_limit (struct dx_entry *entries); -static void dx_set_count (struct dx_entry *entries, unsigned value); -static void dx_set_limit (struct dx_entry *entries, unsigned value); -static unsigned dx_root_limit (struct inode *dir, unsigned infosize); -static unsigned dx_node_limit (struct inode *dir); -static struct dx_frame *dx_probe(struct dentry *dentry, +static inline unsigned dx_get_hash(struct dx_entry *entry); +static void dx_set_hash(struct dx_entry *entry, unsigned value); +static unsigned dx_get_count(struct dx_entry *entries); +static unsigned dx_get_limit(struct dx_entry *entries); +static void dx_set_count(struct dx_entry *entries, unsigned value); +static void dx_set_limit(struct dx_entry *entries, unsigned value); +static unsigned dx_root_limit(struct inode *dir, unsigned infosize); +static unsigned dx_node_limit(struct inode *dir); +static struct dx_frame *dx_probe(const struct qstr *d_name, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame, int *err); -static void dx_release (struct dx_frame *frames); -static int dx_make_map (struct ext4_dir_entry_2 *de, int size, - struct dx_hash_info *hinfo, struct dx_map_entry map[]); +static void dx_release(struct dx_frame *frames); +static int dx_make_map(struct ext4_dir_entry_2 *de, int size, + struct dx_hash_info *hinfo, struct dx_map_entry map[]); static void dx_sort_map(struct dx_map_entry *map, unsigned count); -static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, +static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, struct dx_map_entry *offsets, int count); -static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); +static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block); static int ext4_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, struct dx_frame *frames, __u32 *start_hash); -static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, - struct ext4_dir_entry_2 **res_dir, int *err); +static struct buffer_head * ext4_dx_find_entry(struct inode *dir, + const struct qstr *d_name, + struct ext4_dir_entry_2 **res_dir, + int *err); static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); @@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) entry->block = cpu_to_le32(value); } -static inline unsigned dx_get_hash (struct dx_entry *entry) +static inline unsigned dx_get_hash(struct dx_entry *entry) { return le32_to_cpu(entry->hash); } -static inline void dx_set_hash (struct dx_entry *entry, unsigned value) +static inline void dx_set_hash(struct dx_entry *entry, unsigned value) { entry->hash = cpu_to_le32(value); } -static inline unsigned dx_get_count (struct dx_entry *entries) +static inline unsigned dx_get_count(struct dx_entry *entries) { return le16_to_cpu(((struct dx_countlimit *) entries)->count); } -static inline unsigned dx_get_limit (struct dx_entry *entries) +static inline unsigned dx_get_limit(struct dx_entry *entries) { return le16_to_cpu(((struct dx_countlimit *) entries)->limit); } -static inline void dx_set_count (struct dx_entry *entries, unsigned value) +static inline void dx_set_count(struct dx_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); } -static inline void dx_set_limit (struct dx_entry *entries, unsigned value) +static inline void dx_set_limit(struct dx_entry *entries, unsigned value) { ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); } -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) +static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - EXT4_DIR_REC_LEN(2) - infosize; return entry_space / sizeof(struct dx_entry); } -static inline unsigned dx_node_limit (struct inode *dir) +static inline unsigned dx_node_limit(struct inode *dir) { unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); return entry_space / sizeof(struct dx_entry); @@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir) * Debug */ #ifdef DX_DEBUG -static void dx_show_index (char * label, struct dx_entry *entries) +static void dx_show_index(char * label, struct dx_entry *entries) { int i, n = dx_get_count (entries); - printk("%s index ", label); + printk(KERN_DEBUG "%s index ", label); for (i = 0; i < n; i++) { - printk("%x->%lu ", i? dx_get_hash(entries + i) : + printk("%x->%lu ", i ? dx_get_hash(entries + i) : 0, (unsigned long)dx_get_block(entries + i)); } printk("\n"); @@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, struct dx_entry *entries, int levels) { unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count = dx_get_count (entries), names = 0, space = 0, i; + unsigned count = dx_get_count(entries), names = 0, space = 0, i; unsigned bcount = 0; struct buffer_head *bh; int err; @@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, names += stats.names; space += stats.space; bcount += stats.bcount; - brelse (bh); + brelse(bh); } if (bcount) - printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", - names, space/bcount,(space/bcount)*100/blocksize); + printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", + levels ? "" : " ", names, space/bcount, + (space/bcount)*100/blocksize); return (struct stats) { names, space, bcount}; } #endif /* DX_DEBUG */ @@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, * back to userspace. */ static struct dx_frame * -dx_probe(struct dentry *dentry, struct inode *dir, +dx_probe(const struct qstr *d_name, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) { unsigned count, indirect; @@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, u32 hash; frame->bh = NULL; - if (dentry) - dir = dentry->d_parent->d_inode; if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) goto fail; root = (struct dx_root *) bh->b_data; @@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, } hinfo->hash_version = root->info.hash_version; hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; - if (dentry) - ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); + if (d_name) + ext4fs_dirhash(d_name->name, d_name->len, hinfo); hash = hinfo->hash; if (root->info.unused_flags & 1) { @@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, goto fail; } - dxtrace (printk("Look up %x", hash)); + dxtrace(printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); @@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, 0, &err))) return err; /* Failure */ p++; - brelse (p->bh); + brelse(p->bh); p->bh = bh; p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; } @@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, /* On error, skip the f_pos to the next block. */ dir_file->f_pos = (dir_file->f_pos | (dir->i_sb->s_blocksize - 1)) + 1; - brelse (bh); + brelse(bh); return count; } ext4fs_dirhash(de->name, de->name_len, hinfo); @@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, int ret, err; __u32 hashval; - dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, - start_minor_hash)); + dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", + start_hash, start_minor_hash)); dir = dir_file->f_path.dentry->d_inode; if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; @@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, } hinfo.hash = start_hash; hinfo.minor_hash = 0; - frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); + frame = dx_probe(NULL, dir, &hinfo, frames, &err); if (!frame) return err; @@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, break; } dx_release(frames); - dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", - count, *next_hash)); + dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " + "next hash: %x\n", count, *next_hash)); return count; errout: dx_release(frames); @@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name, /* * Returns 0 if not found, -1 on failure, and 1 on success */ -static inline int search_dirblock(struct buffer_head * bh, +static inline int search_dirblock(struct buffer_head *bh, struct inode *dir, - struct dentry *dentry, + const struct qstr *d_name, unsigned long offset, struct ext4_dir_entry_2 ** res_dir) { struct ext4_dir_entry_2 * de; char * dlimit; int de_len; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + const char *name = d_name->name; + int namelen = d_name->len; de = (struct ext4_dir_entry_2 *) bh->b_data; dlimit = bh->b_data + dir->i_sb->s_blocksize; @@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh, * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext4_find_entry (struct dentry *dentry, +static struct buffer_head * ext4_find_entry (struct inode *dir, + const struct qstr *d_name, struct ext4_dir_entry_2 ** res_dir) { - struct super_block * sb; - struct buffer_head * bh_use[NAMEI_RA_SIZE]; - struct buffer_head * bh, *ret = NULL; + struct super_block *sb; + struct buffer_head *bh_use[NAMEI_RA_SIZE]; + struct buffer_head *bh, *ret = NULL; ext4_lblk_t start, block, b; int ra_max = 0; /* Number of bh's in the readahead buffer, bh_use[] */ @@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, int num = 0; ext4_lblk_t nblocks; int i, err; - struct inode *dir = dentry->d_parent->d_inode; int namelen; *res_dir = NULL; sb = dir->i_sb; - namelen = dentry->d_name.len; + namelen = d_name->len; if (namelen > EXT4_NAME_LEN) return NULL; if (is_dx(dir)) { - bh = ext4_dx_find_entry(dentry, res_dir, &err); + bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the @@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, */ if (bh || (err != ERR_BAD_DX_DIR)) return bh; - dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); + dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " + "falling back\n")); } nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); start = EXT4_I(dir)->i_dir_start_lookup; @@ -926,7 +928,7 @@ restart: brelse(bh); goto next; } - i = search_dirblock(bh, dir, dentry, + i = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT4_I(dir)->i_dir_start_lookup = block; @@ -956,11 +958,11 @@ restart: cleanup_and_exit: /* Clean up the read-ahead blocks */ for (; ra_ptr < ra_max; ra_ptr++) - brelse (bh_use[ra_ptr]); + brelse(bh_use[ra_ptr]); return ret; } -static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, +static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, struct ext4_dir_entry_2 **res_dir, int *err) { struct super_block * sb; @@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, struct buffer_head *bh; ext4_lblk_t block; int retval; - int namelen = dentry->d_name.len; - const u8 *name = dentry->d_name.name; - struct inode *dir = dentry->d_parent->d_inode; + int namelen = d_name->len; + const u8 *name = d_name->name; sb = dir->i_sb; /* NFS may look up ".." - look at dx_root directory block */ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ - if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) + if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) return NULL; } else { frame = frames; @@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, return bh; } } - brelse (bh); + brelse(bh); /* Check to see if we should continue to search */ retval = ext4_htree_next_block(dir, hash, frame, frames, NULL); @@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, *err = -ENOENT; errout: - dxtrace(printk("%s not found\n", name)); + dxtrace(printk(KERN_DEBUG "%s not found\n", name)); dx_release (frames); return NULL; } -static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - struct inode * inode; - struct ext4_dir_entry_2 * de; - struct buffer_head * bh; + struct inode *inode; + struct ext4_dir_entry_2 *de; + struct buffer_head *bh; if (dentry->d_name.len > EXT4_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - bh = ext4_find_entry(dentry, &de); + bh = ext4_find_entry(dir, &dentry->d_name, &de); inode = NULL; if (bh) { unsigned long ino = le32_to_cpu(de->inode); - brelse (bh); + brelse(bh); if (!ext4_valid_inum(dir->i_sb, ino)) { ext4_error(dir->i_sb, "ext4_lookup", "bad inode number: %lu", ino); @@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child) unsigned long ino; struct dentry *parent; struct inode *inode; - struct dentry dotdot; + static const struct qstr dotdot = { + .name = "..", + .len = 2, + }; struct ext4_dir_entry_2 * de; struct buffer_head *bh; - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - dotdot.d_parent = child; /* confusing, isn't it! */ - - bh = ext4_find_entry(&dotdot, &de); + bh = ext4_find_entry(child->d_inode, &dotdot, &de); inode = NULL; if (!bh) return ERR_PTR(-ENOENT); @@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* create map in the end of data2 block */ map = (struct dx_map_entry *) (data2 + blocksize); - count = dx_make_map ((struct ext4_dir_entry_2 *) data1, + count = dx_make_map((struct ext4_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - dx_sort_map (map, count); + dx_sort_map(map, count); /* Split the existing block in the middle, size-wise */ size = 0; move = 0; @@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, /* Fancy dance to stay within two buffers */ de2 = dx_move_dirents(data1, data2, map + split, count - split); - de = dx_pack_dirents(data1,blocksize); + de = dx_pack_dirents(data1, blocksize); de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); @@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, swap(*bh, bh2); de = de2; } - dx_insert_block (frame, hash2 + continued, newblock); - err = ext4_journal_dirty_metadata (handle, bh2); + dx_insert_block(frame, hash2 + continued, newblock); + err = ext4_journal_dirty_metadata(handle, bh2); if (err) goto journal_error; - err = ext4_journal_dirty_metadata (handle, frame->bh); + err = ext4_journal_dirty_metadata(handle, frame->bh); if (err) goto journal_error; - brelse (bh2); - dxtrace(dx_show_index ("frame", frame->entries)); + brelse(bh2); + dxtrace(dx_show_index("frame", frame->entries)); return de; journal_error: @@ -1271,7 +1271,7 @@ errout: */ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, struct inode *inode, struct ext4_dir_entry_2 *de, - struct buffer_head * bh) + struct buffer_head *bh) { struct inode *dir = dentry->d_parent->d_inode; const char *name = dentry->d_name.name; @@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, while ((char *) de <= top) { if (!ext4_check_dir_entry("ext4_add_entry", dir, de, bh, offset)) { - brelse (bh); + brelse(bh); return -EIO; } - if (ext4_match (namelen, name, de)) { - brelse (bh); + if (ext4_match(namelen, name, de)) { + brelse(bh); return -EEXIST; } nlen = EXT4_DIR_REC_LEN(de->name_len); @@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, } else de->inode = 0; de->name_len = namelen; - memcpy (de->name, name, namelen); + memcpy(de->name, name, namelen); /* * XXX shouldn't update any times until successful * completion of syscall, but too many callers depend @@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, struct fake_dirent *fde; blocksize = dir->i_sb->s_blocksize; - dxtrace(printk("Creating index\n")); + dxtrace(printk(KERN_DEBUG "Creating index\n")); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, } root = (struct dx_root *) bh->b_data; - bh2 = ext4_append (handle, dir, &block, &retval); + bh2 = ext4_append(handle, dir, &block, &retval); if (!(bh2)) { brelse(bh); return retval; @@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, root->info.info_length = sizeof(root->info); root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; entries = root->entries; - dx_set_block (entries, 1); - dx_set_count (entries, 1); - dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); + dx_set_block(entries, 1); + dx_set_count(entries, 1); + dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); /* Initialize as for dx_probe */ hinfo.hash_version = root->info.hash_version; @@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, * may not sleep between calling this and putting something into * the entry, as someone else might have used it while you slept. */ -static int ext4_add_entry (handle_t *handle, struct dentry *dentry, - struct inode *inode) +static int ext4_add_entry(handle_t *handle, struct dentry *dentry, + struct inode *inode) { struct inode *dir = dentry->d_parent->d_inode; unsigned long offset; - struct buffer_head * bh; + struct buffer_head *bh; struct ext4_dir_entry_2 *de; - struct super_block * sb; + struct super_block *sb; int retval; int dx_fallback=0; unsigned blocksize; @@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, struct dx_frame frames[2], *frame; struct dx_entry *entries, *at; struct dx_hash_info hinfo; - struct buffer_head * bh; + struct buffer_head *bh; struct inode *dir = dentry->d_parent->d_inode; - struct super_block * sb = dir->i_sb; + struct super_block *sb = dir->i_sb; struct ext4_dir_entry_2 *de; int err; - frame = dx_probe(dentry, NULL, &hinfo, frames, &err); + frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); if (!frame) return err; entries = frame->entries; @@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, } /* Block full, should compress but for now just split */ - dxtrace(printk("using %u of %u node entries\n", + dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", dx_get_count(entries), dx_get_limit(entries))); /* Need to split index? */ if (dx_get_count(entries) == dx_get_limit(entries)) { @@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (levels) { unsigned icount1 = icount/2, icount2 = icount - icount1; unsigned hash2 = dx_get_hash(entries + icount1); - dxtrace(printk("Split index %i/%i\n", icount1, icount2)); + dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", + icount1, icount2)); BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ err = ext4_journal_get_write_access(handle, @@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, if (err) goto journal_error; - memcpy ((char *) entries2, (char *) (entries + icount1), - icount2 * sizeof(struct dx_entry)); - dx_set_count (entries, icount1); - dx_set_count (entries2, icount2); - dx_set_limit (entries2, dx_node_limit(dir)); + memcpy((char *) entries2, (char *) (entries + icount1), + icount2 * sizeof(struct dx_entry)); + dx_set_count(entries, icount1); + dx_set_count(entries2, icount2); + dx_set_limit(entries2, dx_node_limit(dir)); /* Which index block gets the new entry? */ if (at - entries >= icount1) { @@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, frame->entries = entries = entries2; swap(frame->bh, bh2); } - dx_insert_block (frames + 0, hash2, newblock); - dxtrace(dx_show_index ("node", frames[1].entries)); - dxtrace(dx_show_index ("node", + dx_insert_block(frames + 0, hash2, newblock); + dxtrace(dx_show_index("node", frames[1].entries)); + dxtrace(dx_show_index("node", ((struct dx_node *) bh2->b_data)->entries)); err = ext4_journal_dirty_metadata(handle, bh2); if (err) goto journal_error; brelse (bh2); } else { - dxtrace(printk("Creating second level index...\n")); + dxtrace(printk(KERN_DEBUG + "Creating second level index...\n")); memcpy((char *) entries2, (char *) entries, icount * sizeof(struct dx_entry)); dx_set_limit(entries2, dx_node_limit(dir)); @@ -1630,12 +1632,12 @@ cleanup: * ext4_delete_entry deletes a directory entry by merging it with the * previous entry */ -static int ext4_delete_entry (handle_t *handle, - struct inode * dir, - struct ext4_dir_entry_2 * de_del, - struct buffer_head * bh) +static int ext4_delete_entry(handle_t *handle, + struct inode *dir, + struct ext4_dir_entry_2 *de_del, + struct buffer_head *bh) { - struct ext4_dir_entry_2 * de, * pde; + struct ext4_dir_entry_2 *de, *pde; int i; i = 0; @@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle, * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, - struct nameidata *nd) +static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) { handle_t *handle; - struct inode * inode; + struct inode *inode; int err, retries = 0; retry: @@ -1747,8 +1749,8 @@ retry: return err; } -static int ext4_mknod (struct inode * dir, struct dentry *dentry, - int mode, dev_t rdev) +static int ext4_mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t rdev) { handle_t *handle; struct inode *inode; @@ -1767,11 +1769,11 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode = ext4_new_inode (handle, dir, mode); + inode = ext4_new_inode(handle, dir, mode); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR inode->i_op = &ext4_special_inode_operations; #endif err = ext4_add_nondir(handle, dentry, inode); @@ -1782,12 +1784,12 @@ retry: return err; } -static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) +static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) { handle_t *handle; - struct inode * inode; - struct buffer_head * dir_block; - struct ext4_dir_entry_2 * de; + struct inode *inode; + struct buffer_head *dir_block; + struct ext4_dir_entry_2 *de; int err, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) @@ -1803,7 +1805,7 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode = ext4_new_inode (handle, dir, S_IFDIR | mode); + inode = ext4_new_inode(handle, dir, S_IFDIR | mode); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -1811,7 +1813,7 @@ retry: inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; - dir_block = ext4_bread (handle, inode, 0, 1, &err); + dir_block = ext4_bread(handle, inode, 0, 1, &err); if (!dir_block) goto out_clear_inode; BUFFER_TRACE(dir_block, "get_write_access"); @@ -1820,26 +1822,26 @@ retry: de->inode = cpu_to_le32(inode->i_ino); de->name_len = 1; de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); - strcpy (de->name, "."); + strcpy(de->name, "."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); de = ext4_next_entry(de); de->inode = cpu_to_le32(dir->i_ino); de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1)); de->name_len = 2; - strcpy (de->name, ".."); + strcpy(de->name, ".."); ext4_set_de_type(dir->i_sb, de, S_IFDIR); inode->i_nlink = 2; BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); ext4_journal_dirty_metadata(handle, dir_block); - brelse (dir_block); + brelse(dir_block); ext4_mark_inode_dirty(handle, inode); - err = ext4_add_entry (handle, dentry, inode); + err = ext4_add_entry(handle, dentry, inode); if (err) { out_clear_inode: clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); - iput (inode); + iput(inode); goto out_stop; } ext4_inc_count(handle, dir); @@ -1856,17 +1858,17 @@ out_stop: /* * routine to check that the specified directory is empty (for rmdir) */ -static int empty_dir (struct inode * inode) +static int empty_dir(struct inode *inode) { unsigned long offset; - struct buffer_head * bh; - struct ext4_dir_entry_2 * de, * de1; - struct super_block * sb; + struct buffer_head *bh; + struct ext4_dir_entry_2 *de, *de1; + struct super_block *sb; int err = 0; sb = inode->i_sb; if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || - !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { + !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { if (err) ext4_error(inode->i_sb, __func__, "error %d reading directory #%lu offset 0", @@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode) de1 = ext4_next_entry(de); if (le32_to_cpu(de->inode) != inode->i_ino || !le32_to_cpu(de1->inode) || - strcmp (".", de->name) || - strcmp ("..", de1->name)) { - ext4_warning (inode->i_sb, "empty_dir", - "bad directory (dir #%lu) - no `.' or `..'", - inode->i_ino); - brelse (bh); + strcmp(".", de->name) || + strcmp("..", de1->name)) { + ext4_warning(inode->i_sb, "empty_dir", + "bad directory (dir #%lu) - no `.' or `..'", + inode->i_ino); + brelse(bh); return 1; } offset = ext4_rec_len_from_disk(de->rec_len) + ext4_rec_len_from_disk(de1->rec_len); de = ext4_next_entry(de1); - while (offset < inode->i_size ) { + while (offset < inode->i_size) { if (!bh || (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { err = 0; - brelse (bh); - bh = ext4_bread (NULL, inode, + brelse(bh); + bh = ext4_bread(NULL, inode, offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); if (!bh) { if (err) @@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode) continue; } if (le32_to_cpu(de->inode)) { - brelse (bh); + brelse(bh); return 0; } offset += ext4_rec_len_from_disk(de->rec_len); de = ext4_next_entry(de); } - brelse (bh); + brelse(bh); return 1; } @@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) * ->i_nlink. For, say it, character device. Not a regular file, * not a directory, not a symlink and ->i_nlink > 0. */ - J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || - S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); + J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || + S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); @@ -2069,12 +2071,12 @@ out_brelse: goto out_err; } -static int ext4_rmdir (struct inode * dir, struct dentry *dentry) +static int ext4_rmdir(struct inode *dir, struct dentry *dentry) { int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext4_dir_entry_2 * de; + struct inode *inode; + struct buffer_head *bh; + struct ext4_dir_entry_2 *de; handle_t *handle; /* Initialize quotas before so that eventual writes go in @@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) return PTR_ERR(handle); retval = -ENOENT; - bh = ext4_find_entry (dentry, &de); + bh = ext4_find_entry(dir, &dentry->d_name, &de); if (!bh) goto end_rmdir; @@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) goto end_rmdir; retval = -ENOTEMPTY; - if (!empty_dir (inode)) + if (!empty_dir(inode)) goto end_rmdir; retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_rmdir; if (!EXT4_DIR_LINK_EMPTY(inode)) - ext4_warning (inode->i_sb, "ext4_rmdir", - "empty directory has too many links (%d)", - inode->i_nlink); + ext4_warning(inode->i_sb, "ext4_rmdir", + "empty directory has too many links (%d)", + inode->i_nlink); inode->i_version++; clear_nlink(inode); /* There's no need to set i_disksize: the fact that i_nlink is @@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) end_rmdir: ext4_journal_stop(handle); - brelse (bh); + brelse(bh); return retval; } -static int ext4_unlink(struct inode * dir, struct dentry *dentry) +static int ext4_unlink(struct inode *dir, struct dentry *dentry) { int retval; - struct inode * inode; - struct buffer_head * bh; - struct ext4_dir_entry_2 * de; + struct inode *inode; + struct buffer_head *bh; + struct ext4_dir_entry_2 *de; handle_t *handle; /* Initialize quotas before so that eventual writes go @@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) handle->h_sync = 1; retval = -ENOENT; - bh = ext4_find_entry (dentry, &de); + bh = ext4_find_entry(dir, &dentry->d_name, &de); if (!bh) goto end_unlink; @@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) goto end_unlink; if (!inode->i_nlink) { - ext4_warning (inode->i_sb, "ext4_unlink", - "Deleting nonexistent file (%lu), %d", - inode->i_ino, inode->i_nlink); + ext4_warning(inode->i_sb, "ext4_unlink", + "Deleting nonexistent file (%lu), %d", + inode->i_ino, inode->i_nlink); inode->i_nlink = 1; } retval = ext4_delete_entry(handle, dir, de, bh); @@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) end_unlink: ext4_journal_stop(handle); - brelse (bh); + brelse(bh); return retval; } -static int ext4_symlink (struct inode * dir, - struct dentry *dentry, const char * symname) +static int ext4_symlink(struct inode *dir, + struct dentry *dentry, const char *symname) { handle_t *handle; - struct inode * inode; + struct inode *inode; int l, err, retries = 0; l = strlen(symname)+1; @@ -2203,12 +2205,12 @@ retry: if (IS_DIRSYNC(dir)) handle->h_sync = 1; - inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); + inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; - if (l > sizeof (EXT4_I(inode)->i_data)) { + if (l > sizeof(EXT4_I(inode)->i_data)) { inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); /* @@ -2221,14 +2223,14 @@ retry: if (err) { clear_nlink(inode); ext4_mark_inode_dirty(handle, inode); - iput (inode); + iput(inode); goto out_stop; } } else { /* clear the extent format for fast symlink */ EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; inode->i_op = &ext4_fast_symlink_inode_operations; - memcpy((char*)&EXT4_I(inode)->i_data,symname,l); + memcpy((char *)&EXT4_I(inode)->i_data, symname, l); inode->i_size = l-1; } EXT4_I(inode)->i_disksize = inode->i_size; @@ -2240,8 +2242,8 @@ out_stop: return err; } -static int ext4_link (struct dentry * old_dentry, - struct inode * dir, struct dentry *dentry) +static int ext4_link(struct dentry *old_dentry, + struct inode *dir, struct dentry *dentry) { handle_t *handle; struct inode *inode = old_dentry->d_inode; @@ -2284,13 +2286,13 @@ retry: * Anybody can rename anything with this: the permission checks are left to the * higher-level routines. */ -static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, - struct inode * new_dir,struct dentry *new_dentry) +static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) { handle_t *handle; - struct inode * old_inode, * new_inode; - struct buffer_head * old_bh, * new_bh, * dir_bh; - struct ext4_dir_entry_2 * old_de, * new_de; + struct inode *old_inode, *new_inode; + struct buffer_head *old_bh, *new_bh, *dir_bh; + struct ext4_dir_entry_2 *old_de, *new_de; int retval; old_bh = new_bh = dir_bh = NULL; @@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) handle->h_sync = 1; - old_bh = ext4_find_entry (old_dentry, &old_de); + old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, goto end_rename; new_inode = new_dentry->d_inode; - new_bh = ext4_find_entry (new_dentry, &new_de); + new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); if (new_bh) { if (!new_inode) { - brelse (new_bh); + brelse(new_bh); new_bh = NULL; } } if (S_ISDIR(old_inode->i_mode)) { if (new_inode) { retval = -ENOTEMPTY; - if (!empty_dir (new_inode)) + if (!empty_dir(new_inode)) goto end_rename; } retval = -EIO; - dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); + dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); if (!dir_bh) goto end_rename; if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) goto end_rename; retval = -EMLINK; - if (!new_inode && new_dir!=old_dir && + if (!new_inode && new_dir != old_dir && new_dir->i_nlink >= EXT4_LINK_MAX) goto end_rename; } if (!new_bh) { - retval = ext4_add_entry (handle, new_dentry, old_inode); + retval = ext4_add_entry(handle, new_dentry, old_inode); if (retval) goto end_rename; } else { @@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, struct buffer_head *old_bh2; struct ext4_dir_entry_2 *old_de2; - old_bh2 = ext4_find_entry(old_dentry, &old_de2); + old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); if (old_bh2) { retval = ext4_delete_entry(handle, old_dir, old_de2, old_bh2); @@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, retval = 0; end_rename: - brelse (dir_bh); - brelse (old_bh); - brelse (new_bh); + brelse(dir_bh); + brelse(old_bh); + brelse(new_bh); ext4_journal_stop(handle); return retval; } @@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = { .mknod = ext4_mknod, .rename = ext4_rename, .setattr = ext4_setattr, -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, @@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = { const struct inode_operations ext4_special_inode_operations = { .setattr = ext4_setattr, -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b3d35604ea1..b6ec1843a01 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - /* - * If we are not using the primary superblock/GDT copy don't resize, + /* + * If we are not using the primary superblock/GDT copy don't resize, * because the user tools have no way of handling this. Probably a * bad time to do it anyways. */ @@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) * We can allocate memory for mb_alloc based on the new group * descriptor */ - if (test_opt(sb, MBALLOC)) { - err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); - if (err) - goto exit_journal; - } + err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); + if (err) + goto exit_journal; + /* * Make the new blocks and inodes valid next. We do this before * increasing the group count so that once the group is enabled, @@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) percpu_counter_add(&sbi->s_freeinodes_counter, EXT4_INODES_PER_GROUP(sb)); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { + ext4_group_t flex_group; + flex_group = ext4_flex_group(sbi, input->group); + sbi->s_flex_groups[flex_group].free_blocks += + input->free_blocks_count; + sbi->s_flex_groups[flex_group].free_inodes += + EXT4_INODES_PER_GROUP(sb); + } + ext4_journal_dirty_metadata(handle, sbi->s_sbh); sb->s_dirt = 1; @@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ext4_group_t o_groups_count; ext4_grpblk_t last; ext4_grpblk_t add; - struct buffer_head * bh; + struct buffer_head *bh; handle_t *handle; int err; unsigned long freed_blocks; @@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, /* * Mark mballoc pages as not up to date so that they will be updated * next time they are loaded by ext4_mb_load_buddy. + * + * XXX Bad, Bad, BAD!!! We should not be overloading the + * Uptodate flag, particularly on thte bitmap bh, as way of + * hinting to ext4_mb_load_buddy() that it needs to be + * overloaded. A user could take a LVM snapshot, then do an + * on-line fsck, and clear the uptodate flag, and this would + * not be a bug in userspace, but a bug in the kernel. FIXME!!! */ - if (test_opt(sb, MBALLOC)) { + { struct ext4_sb_info *sbi = EXT4_SB(sb); struct inode *inode = sbi->s_buddy_cache; int blocks_per_page; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 566344b926b..0e661c56966 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -34,6 +34,8 @@ #include <linux/namei.h> #include <linux/quotaops.h> #include <linux/seq_file.h> +#include <linux/proc_fs.h> +#include <linux/marker.h> #include <linux/log2.h> #include <linux/crc16.h> #include <asm/uaccess.h> @@ -45,6 +47,8 @@ #include "namei.h" #include "group.h" +struct proc_dir_entry *ext4_proc_root; + static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_create_journal(struct super_block *, struct ext4_super_block *, @@ -508,10 +512,12 @@ static void ext4_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); - BUFFER_TRACE(sbi->s_sbh, "marking dirty"); - mark_buffer_dirty(sbi->s_sbh); ext4_commit_super(sb, es, 1); } + if (sbi->s_proc) { + remove_proc_entry("inode_readahead_blks", sbi->s_proc); + remove_proc_entry(sb->s_id, ext4_proc_root); + } for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); @@ -520,6 +526,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) @@ -562,11 +569,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL ei->i_acl = EXT4_ACL_NOT_CACHED; ei->i_default_acl = EXT4_ACL_NOT_CACHED; #endif - ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); @@ -599,7 +605,7 @@ static void init_once(void *foo) struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; INIT_LIST_HEAD(&ei->i_orphan); -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR init_rwsem(&ei->xattr_sem); #endif init_rwsem(&ei->i_data_sem); @@ -625,8 +631,7 @@ static void destroy_inodecache(void) static void ext4_clear_inode(struct inode *inode) { - struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL if (EXT4_I(inode)->i_acl && EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { posix_acl_release(EXT4_I(inode)->i_acl); @@ -638,10 +643,7 @@ static void ext4_clear_inode(struct inode *inode) EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; } #endif - ext4_discard_reservation(inode); - EXT4_I(inode)->i_block_alloc_info = NULL; - if (unlikely(rsv)) - kfree(rsv); + ext4_discard_preallocations(inode); jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, &EXT4_I(inode)->jinode); } @@ -654,7 +656,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_jquota_fmt) seq_printf(seq, ",jqfmt=%s", - (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); + (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); if (sbi->s_qf_names[USRQUOTA]) seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); @@ -718,7 +720,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",debug"); if (test_opt(sb, OLDALLOC)) seq_puts(seq, ",oldalloc"); -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR if (test_opt(sb, XATTR_USER) && !(def_mount_opts & EXT4_DEFM_XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -727,7 +729,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nouser_xattr"); } #endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) seq_puts(seq, ",acl"); if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) @@ -752,8 +754,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_puts(seq, ",nobh"); if (!test_opt(sb, EXTENTS)) seq_puts(seq, ",noextents"); - if (!test_opt(sb, MBALLOC)) - seq_puts(seq, ",nomballoc"); if (test_opt(sb, I_VERSION)) seq_puts(seq, ",i_version"); if (!test_opt(sb, DELALLOC)) @@ -773,6 +773,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) seq_puts(seq, ",data=writeback"); + if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) + seq_printf(seq, ",inode_readahead_blks=%u", + sbi->s_inode_readahead_blks); + ext4_show_quota_options(seq, sb); return 0; } @@ -822,7 +826,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, } #ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") +#define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) static int ext4_dquot_initialize(struct inode *inode, int type); @@ -907,6 +911,7 @@ enum { Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, + Opt_inode_readahead_blks }; static match_table_t tokens = { @@ -967,6 +972,7 @@ static match_table_t tokens = { {Opt_resize, "resize"}, {Opt_delalloc, "delalloc"}, {Opt_nodelalloc, "nodelalloc"}, + {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, {Opt_err, NULL}, }; @@ -981,7 +987,7 @@ static ext4_fsblk_t get_sb_block(void **data) /*todo: use simple_strtoll with >32bit ext4 */ sb_block = simple_strtoul(options, &options, 0); if (*options && *options != ',') { - printk("EXT4-fs: Invalid sb specification: %s\n", + printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", (char *) *data); return 1; } @@ -1072,7 +1078,7 @@ static int parse_options(char *options, struct super_block *sb, case Opt_orlov: clear_opt(sbi->s_mount_opt, OLDALLOC); break; -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR case Opt_user_xattr: set_opt(sbi->s_mount_opt, XATTR_USER); break; @@ -1082,10 +1088,11 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_user_xattr: case Opt_nouser_xattr: - printk("EXT4 (no)user_xattr options not supported\n"); + printk(KERN_ERR "EXT4 (no)user_xattr options " + "not supported\n"); break; #endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL case Opt_acl: set_opt(sbi->s_mount_opt, POSIX_ACL); break; @@ -1095,7 +1102,8 @@ static int parse_options(char *options, struct super_block *sb, #else case Opt_acl: case Opt_noacl: - printk("EXT4 (no)acl options not supported\n"); + printk(KERN_ERR "EXT4 (no)acl options " + "not supported\n"); break; #endif case Opt_reservation: @@ -1189,8 +1197,8 @@ set_qf_name: sb_any_quota_suspended(sb)) && !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT4-fs: Cannot change journaled " - "quota options when quota turned on.\n"); + "EXT4-fs: Cannot change journaled " + "quota options when quota turned on.\n"); return 0; } qname = match_strdup(&args[0]); @@ -1357,12 +1365,6 @@ set_qf_format: case Opt_nodelalloc: clear_opt(sbi->s_mount_opt, DELALLOC); break; - case Opt_mballoc: - set_opt(sbi->s_mount_opt, MBALLOC); - break; - case Opt_nomballoc: - clear_opt(sbi->s_mount_opt, MBALLOC); - break; case Opt_stripe: if (match_int(&args[0], &option)) return 0; @@ -1373,6 +1375,13 @@ set_qf_format: case Opt_delalloc: set_opt(sbi->s_mount_opt, DELALLOC); break; + case Opt_inode_readahead_blks: + if (match_int(&args[0], &option)) + return 0; + if (option < 0 || option > (1 << 30)) + return 0; + sbi->s_inode_readahead_blks = option; + break; default: printk(KERN_ERR "EXT4-fs: Unrecognized mount option \"%s\" " @@ -1473,15 +1482,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt); - printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); - if (EXT4_SB(sb)->s_journal->j_inode == NULL) { - char b[BDEVNAME_SIZE]; - - printk("external journal on %s\n", - bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); - } else { - printk("internal journal\n"); - } + printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", + sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : + "external", EXT4_SB(sb)->s_journal->j_devname); return res; } @@ -1504,8 +1507,11 @@ static int ext4_fill_flex_info(struct super_block *sb) sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; groups_per_flex = 1 << sbi->s_log_groups_per_flex; - flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / - groups_per_flex; + /* We allocate both existing and potentially added groups */ + flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + + ((sbi->s_es->s_reserved_gdt_blocks +1 ) << + EXT4_DESC_PER_BLOCK_BITS(sb))) / + groups_per_flex; sbi->s_flex_groups = kzalloc(flex_group_count * sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { @@ -1584,7 +1590,7 @@ static int ext4_check_descriptors(struct super_block *sb) if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) flexbg_flag = 1; - ext4_debug ("Checking group descriptors"); + ext4_debug("Checking group descriptors"); for (i = 0; i < sbi->s_groups_count; i++) { struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); @@ -1623,8 +1629,10 @@ static int ext4_check_descriptors(struct super_block *sb) "Checksum for group %lu failed (%u!=%u)\n", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, gdp)), le16_to_cpu(gdp->bg_checksum)); - if (!(sb->s_flags & MS_RDONLY)) + if (!(sb->s_flags & MS_RDONLY)) { + spin_unlock(sb_bgl_lock(sbi, i)); return 0; + } } spin_unlock(sb_bgl_lock(sbi, i)); if (!flexbg_flag) @@ -1714,9 +1722,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, DQUOT_INIT(inode); if (inode->i_nlink) { printk(KERN_DEBUG - "%s: truncating inode %lu to %Ld bytes\n", + "%s: truncating inode %lu to %lld bytes\n", __func__, inode->i_ino, inode->i_size); - jbd_debug(2, "truncating inode %lu to %Ld bytes\n", + jbd_debug(2, "truncating inode %lu to %lld bytes\n", inode->i_ino, inode->i_size); ext4_truncate(inode); nr_truncates++; @@ -1914,6 +1922,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) unsigned long journal_devnum = 0; unsigned long def_mount_opts; struct inode *root; + char *cp; int ret = -EINVAL; int blocksize; int db_count; @@ -1930,10 +1939,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_mount_opt = 0; sbi->s_resuid = EXT4_DEF_RESUID; sbi->s_resgid = EXT4_DEF_RESGID; + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; sbi->s_sb_block = sb_block; unlock_kernel(); + /* Cleanup superblock name */ + for (cp = sb->s_id; (cp = strchr(cp, '/'));) + *cp = '!'; + blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); if (!blocksize) { printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); @@ -1973,11 +1987,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, GRPID); if (def_mount_opts & EXT4_DEFM_UID16) set_opt(sbi->s_mount_opt, NO_UID32); -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR if (def_mount_opts & EXT4_DEFM_XATTR_USER) set_opt(sbi->s_mount_opt, XATTR_USER); #endif -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL if (def_mount_opts & EXT4_DEFM_ACL) set_opt(sbi->s_mount_opt, POSIX_ACL); #endif @@ -2012,11 +2026,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_warning(sb, __func__, "extents feature not enabled on this filesystem, " "use tune2fs.\n"); - /* - * turn on mballoc code by default in ext4 filesystem - * Use -o nomballoc to turn it off - */ - set_opt(sbi->s_mount_opt, MBALLOC); /* * enable delayed allocation by default @@ -2041,16 +2050,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "running e2fsck is recommended\n"); /* - * Since ext4 is still considered development code, we require - * that the TEST_FILESYS flag in s->flags be set. - */ - if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { - printk(KERN_WARNING "EXT4-fs: %s: not marked " - "OK to use with test code.\n", sb->s_id); - goto failed_mount; - } - - /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, * so there is a chance incompat flags are set on a rev 0 filesystem. @@ -2219,6 +2218,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + if (ext4_proc_root) + sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); + + if (sbi->s_proc) + proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, + &ext4_ui_proc_fops, + &sbi->s_inode_readahead_blks); + bgl_lock_init(&sbi->s_blockgroup_lock); for (i = 0; i < db_count; i++) { @@ -2257,24 +2264,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) err = percpu_counter_init(&sbi->s_dirs_counter, ext4_count_dirs(sb)); } + if (!err) { + err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); + } if (err) { printk(KERN_ERR "EXT4-fs: insufficient memory\n"); goto failed_mount3; } - /* per fileystem reservation list head & lock */ - spin_lock_init(&sbi->s_rsv_window_lock); - sbi->s_rsv_window_root = RB_ROOT; - /* Add a single, static dummy reservation to the start of the - * reservation window list --- it gives us a placeholder for - * append-at-start-of-list which makes the allocation logic - * _much_ simpler. */ - sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; - sbi->s_rsv_window_head.rsv_alloc_hit = 0; - sbi->s_rsv_window_head.rsv_goal_size = 0; - ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); - sbi->s_stripe = ext4_get_stripe_size(sbi); /* @@ -2471,7 +2468,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); ext4_ext_init(sb); - ext4_mb_init(sb, needs_recovery); + err = ext4_mb_init(sb, needs_recovery); + if (err) { + printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", + err); + goto failed_mount4; + } lock_kernel(); return 0; @@ -2489,11 +2491,16 @@ failed_mount3: percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); + percpu_counter_destroy(&sbi->s_dirtyblocks_counter); failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); failed_mount: + if (sbi->s_proc) { + remove_proc_entry("inode_readahead_blks", sbi->s_proc); + remove_proc_entry(sb->s_id, ext4_proc_root); + } #ifdef CONFIG_QUOTA for (i = 0; i < MAXQUOTAS; i++) kfree(sbi->s_qf_names[i]); @@ -2552,7 +2559,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, return NULL; } - jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", + jbd_debug(2, "Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); if (!S_ISREG(journal_inode->i_mode)) { printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); @@ -2715,6 +2722,11 @@ static int ext4_load_journal(struct super_block *sb, return -EINVAL; } + if (journal->j_flags & JBD2_BARRIER) + printk(KERN_INFO "EXT4-fs: barriers enabled\n"); + else + printk(KERN_INFO "EXT4-fs: barriers disabled\n"); + if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { err = jbd2_journal_update_format(journal); if (err) { @@ -2799,13 +2811,34 @@ static void ext4_commit_super(struct super_block *sb, if (!sbh) return; + if (buffer_write_io_error(sbh)) { + /* + * Oh, dear. A previous attempt to write the + * superblock failed. This could happen because the + * USB device was yanked out. Or it could happen to + * be a transient write error and maybe the block will + * be remapped. Nothing we can do but to retry the + * write and hope for the best. + */ + printk(KERN_ERR "ext4: previous I/O error to " + "superblock detected for %s.\n", sb->s_id); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + } es->s_wtime = cpu_to_le32(get_seconds()); ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); BUFFER_TRACE(sbh, "marking dirty"); mark_buffer_dirty(sbh); - if (sync) + if (sync) { sync_dirty_buffer(sbh); + if (buffer_write_io_error(sbh)) { + printk(KERN_ERR "ext4: I/O error while writing " + "superblock for %s.\n", sb->s_id); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + } + } } @@ -2907,6 +2940,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) { tid_t target; + trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); sb->s_dirt = 0; if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { if (wait) @@ -3162,7 +3196,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = EXT4_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; - buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); + buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - + percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); ext4_free_blocks_count_set(es, buf->f_bfree); buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); if (buf->f_bfree < ext4_r_blocks_count(es)) @@ -3432,7 +3467,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, handle_t *handle = journal_current_handle(); if (!handle) { - printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" + printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started.\n", (unsigned long long)off, (unsigned long long)len); return -EIO; @@ -3493,18 +3528,82 @@ static int ext4_get_sb(struct file_system_type *fs_type, return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); } +#ifdef CONFIG_PROC_FS +static int ext4_ui_proc_show(struct seq_file *m, void *v) +{ + unsigned int *p = m->private; + + seq_printf(m, "%u\n", *p); + return 0; +} + +static int ext4_ui_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ext4_ui_proc_show, PDE(inode)->data); +} + +static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, + size_t cnt, loff_t *ppos) +{ + unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; + char str[32]; + unsigned long value; + + if (cnt >= sizeof(str)) + return -EINVAL; + if (copy_from_user(str, buf, cnt)) + return -EFAULT; + value = simple_strtol(str, NULL, 0); + if (value < 0) + return -ERANGE; + *p = value; + return cnt; +} + +const struct file_operations ext4_ui_proc_fops = { + .owner = THIS_MODULE, + .open = ext4_ui_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = ext4_ui_proc_write, +}; +#endif + +static struct file_system_type ext4_fs_type = { + .owner = THIS_MODULE, + .name = "ext4", + .get_sb = ext4_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +#ifdef CONFIG_EXT4DEV_COMPAT +static int ext4dev_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + printk(KERN_WARNING "EXT4-fs: Update your userspace programs " + "to mount using ext4\n"); + printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " + "will go away by 2.6.31\n"); + return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); +} + static struct file_system_type ext4dev_fs_type = { .owner = THIS_MODULE, .name = "ext4dev", - .get_sb = ext4_get_sb, + .get_sb = ext4dev_get_sb, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; +MODULE_ALIAS("ext4dev"); +#endif static int __init init_ext4_fs(void) { int err; + ext4_proc_root = proc_mkdir("fs/ext4", NULL); err = init_ext4_mballoc(); if (err) return err; @@ -3515,9 +3614,16 @@ static int __init init_ext4_fs(void) err = init_inodecache(); if (err) goto out1; - err = register_filesystem(&ext4dev_fs_type); + err = register_filesystem(&ext4_fs_type); if (err) goto out; +#ifdef CONFIG_EXT4DEV_COMPAT + err = register_filesystem(&ext4dev_fs_type); + if (err) { + unregister_filesystem(&ext4_fs_type); + goto out; + } +#endif return 0; out: destroy_inodecache(); @@ -3530,10 +3636,14 @@ out2: static void __exit exit_ext4_fs(void) { + unregister_filesystem(&ext4_fs_type); +#ifdef CONFIG_EXT4DEV_COMPAT unregister_filesystem(&ext4dev_fs_type); +#endif destroy_inodecache(); exit_ext4_xattr(); exit_ext4_mballoc(); + remove_proc_entry("fs/ext4", NULL); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e9178643dc0..00740cb32be 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -23,10 +23,10 @@ #include "ext4.h" #include "xattr.h" -static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); - nd_set_link(nd, (char*)ei->i_data); + nd_set_link(nd, (char *) ei->i_data); return NULL; } @@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, @@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { const struct inode_operations ext4_fast_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = ext4_follow_link, -#ifdef CONFIG_EXT4DEV_FS_XATTR +#ifdef CONFIG_EXT4_FS_XATTR .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8954208b489..80626d516fe 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; static struct xattr_handler *ext4_xattr_handler_map[] = { [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, #endif [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, -#ifdef CONFIG_EXT4DEV_FS_SECURITY +#ifdef CONFIG_EXT4_FS_SECURITY [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, #endif }; @@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { struct xattr_handler *ext4_xattr_handlers[] = { &ext4_xattr_user_handler, &ext4_xattr_trusted_handler, -#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL +#ifdef CONFIG_EXT4_FS_POSIX_ACL &ext4_xattr_acl_access_handler, &ext4_xattr_acl_default_handler, #endif -#ifdef CONFIG_EXT4DEV_FS_SECURITY +#ifdef CONFIG_EXT4_FS_SECURITY &ext4_xattr_security_handler, #endif NULL @@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, struct ext4_xattr_block_find bs = { .s = { .not_found = -ENODATA, }, }; + unsigned long no_expand; int error; if (!name) @@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, if (strlen(name) > 255) return -ERANGE; down_write(&EXT4_I(inode)->xattr_sem); + no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; + EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; + error = ext4_get_inode_loc(inode, &is.iloc); if (error) goto cleanup; @@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, cleanup: brelse(is.iloc.bh); brelse(bs.bh); + if (no_expand == 0) + EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; up_write(&EXT4_I(inode)->xattr_sem); return error; } diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 5992fe979bb..8ede88b18c2 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -51,8 +51,8 @@ struct ext4_xattr_entry { (((name_len) + EXT4_XATTR_ROUND + \ sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) #define EXT4_XATTR_NEXT(entry) \ - ( (struct ext4_xattr_entry *)( \ - (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) + ((struct ext4_xattr_entry *)( \ + (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) #define EXT4_XATTR_SIZE(size) \ (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) @@ -63,7 +63,7 @@ struct ext4_xattr_entry { EXT4_I(inode)->i_extra_isize)) #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) -# ifdef CONFIG_EXT4DEV_FS_XATTR +# ifdef CONFIG_EXT4_FS_XATTR extern struct xattr_handler ext4_xattr_user_handler; extern struct xattr_handler ext4_xattr_trusted_handler; @@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); extern struct xattr_handler *ext4_xattr_handlers[]; -# else /* CONFIG_EXT4DEV_FS_XATTR */ +# else /* CONFIG_EXT4_FS_XATTR */ static inline int ext4_xattr_get(struct inode *inode, int name_index, const char *name, @@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, #define ext4_xattr_handlers NULL -# endif /* CONFIG_EXT4DEV_FS_XATTR */ +# endif /* CONFIG_EXT4_FS_XATTR */ -#ifdef CONFIG_EXT4DEV_FS_SECURITY +#ifdef CONFIG_EXT4_FS_SECURITY extern int ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir); #else diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d..33a6b7ecb8b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -13,9 +13,14 @@ #include <linux/security.h> #include <linux/module.h> #include <linux/uaccess.h> +#include <linux/writeback.h> +#include <linux/buffer_head.h> #include <asm/ioctls.h> +/* So that the fiemap access checks can't overflow on 32 bit machines. */ +#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) + /** * vfs_ioctl - call filesystem specific ioctl methods * @filp: open file to invoke ioctl method on @@ -71,6 +76,272 @@ static int ioctl_fibmap(struct file *filp, int __user *p) return put_user(res, p); } +/** + * fiemap_fill_next_extent - Fiemap helper function + * @fieinfo: Fiemap context passed into ->fiemap + * @logical: Extent logical start offset, in bytes + * @phys: Extent physical start offset, in bytes + * @len: Extent length, in bytes + * @flags: FIEMAP_EXTENT flags that describe this extent + * + * Called from file system ->fiemap callback. Will populate extent + * info as passed in via arguments and copy to user memory. On + * success, extent count on fieinfo is incremented. + * + * Returns 0 on success, -errno on error, 1 if this was the last + * extent that will fit in user array. + */ +#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) +#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) +#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) +int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, + u64 phys, u64 len, u32 flags) +{ + struct fiemap_extent extent; + struct fiemap_extent *dest = fieinfo->fi_extents_start; + + /* only count the extents */ + if (fieinfo->fi_extents_max == 0) { + fieinfo->fi_extents_mapped++; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; + } + + if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) + return 1; + + if (flags & SET_UNKNOWN_FLAGS) + flags |= FIEMAP_EXTENT_UNKNOWN; + if (flags & SET_NO_UNMOUNTED_IO_FLAGS) + flags |= FIEMAP_EXTENT_ENCODED; + if (flags & SET_NOT_ALIGNED_FLAGS) + flags |= FIEMAP_EXTENT_NOT_ALIGNED; + + memset(&extent, 0, sizeof(extent)); + extent.fe_logical = logical; + extent.fe_physical = phys; + extent.fe_length = len; + extent.fe_flags = flags; + + dest += fieinfo->fi_extents_mapped; + if (copy_to_user(dest, &extent, sizeof(extent))) + return -EFAULT; + + fieinfo->fi_extents_mapped++; + if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) + return 1; + return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; +} +EXPORT_SYMBOL(fiemap_fill_next_extent); + +/** + * fiemap_check_flags - check validity of requested flags for fiemap + * @fieinfo: Fiemap context passed into ->fiemap + * @fs_flags: Set of fiemap flags that the file system understands + * + * Called from file system ->fiemap callback. This will compute the + * intersection of valid fiemap flags and those that the fs supports. That + * value is then compared against the user supplied flags. In case of bad user + * flags, the invalid values will be written into the fieinfo structure, and + * -EBADR is returned, which tells ioctl_fiemap() to return those values to + * userspace. For this reason, a return code of -EBADR should be preserved. + * + * Returns 0 on success, -EBADR on bad flags. + */ +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) +{ + u32 incompat_flags; + + incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); + if (incompat_flags) { + fieinfo->fi_flags = incompat_flags; + return -EBADR; + } + return 0; +} +EXPORT_SYMBOL(fiemap_check_flags); + +static int fiemap_check_ranges(struct super_block *sb, + u64 start, u64 len, u64 *new_len) +{ + *new_len = len; + + if (len == 0) + return -EINVAL; + + if (start > sb->s_maxbytes) + return -EFBIG; + + /* + * Shrink request scope to what the fs can actually handle. + */ + if ((len > sb->s_maxbytes) || + (sb->s_maxbytes - len) < start) + *new_len = sb->s_maxbytes - start; + + return 0; +} + +static int ioctl_fiemap(struct file *filp, unsigned long arg) +{ + struct fiemap fiemap; + struct fiemap_extent_info fieinfo = { 0, }; + struct inode *inode = filp->f_path.dentry->d_inode; + struct super_block *sb = inode->i_sb; + u64 len; + int error; + + if (!inode->i_op->fiemap) + return -EOPNOTSUPP; + + if (copy_from_user(&fiemap, (struct fiemap __user *)arg, + sizeof(struct fiemap))) + return -EFAULT; + + if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) + return -EINVAL; + + error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, + &len); + if (error) + return error; + + fieinfo.fi_flags = fiemap.fm_flags; + fieinfo.fi_extents_max = fiemap.fm_extent_count; + fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); + + if (fiemap.fm_extent_count != 0 && + !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, + fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) + return -EFAULT; + + if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) + filemap_write_and_wait(inode->i_mapping); + + error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); + fiemap.fm_flags = fieinfo.fi_flags; + fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; + if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) + error = -EFAULT; + + return error; +} + +#define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) +#define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); + +/* + * @inode - the inode to map + * @arg - the pointer to userspace where we copy everything to + * @get_block - the fs's get_block function + * + * This does FIEMAP for block based inodes. Basically it will just loop + * through get_block until we hit the number of extents we want to map, or we + * go past the end of the file and hit a hole. + * + * If it is possible to have data blocks beyond a hole past @inode->i_size, then + * please do not use this function, it will stop at the first unmapped block + * beyond i_size + */ +int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block) +{ + struct buffer_head tmp; + unsigned int start_blk; + long long length = 0, map_len = 0; + u64 logical = 0, phys = 0, size = 0; + u32 flags = FIEMAP_EXTENT_MERGED; + int ret = 0; + + if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) + return ret; + + start_blk = logical_to_blk(inode, start); + + /* guard against change */ + mutex_lock(&inode->i_mutex); + + length = (long long)min_t(u64, len, i_size_read(inode)); + map_len = length; + + do { + /* + * we set b_size to the total size we want so it will map as + * many contiguous blocks as possible at once + */ + memset(&tmp, 0, sizeof(struct buffer_head)); + tmp.b_size = map_len; + + ret = get_block(inode, start_blk, &tmp, 0); + if (ret) + break; + + /* HOLE */ + if (!buffer_mapped(&tmp)) { + /* + * first hole after going past the EOF, this is our + * last extent + */ + if (length <= 0) { + flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, + flags); + break; + } + + length -= blk_to_logical(inode, 1); + + /* if we have holes up to/past EOF then we're done */ + if (length <= 0) + break; + + start_blk++; + } else { + if (length <= 0 && size) { + ret = fiemap_fill_next_extent(fieinfo, logical, + phys, size, + flags); + if (ret) + break; + } + + logical = blk_to_logical(inode, start_blk); + phys = blk_to_logical(inode, tmp.b_blocknr); + size = tmp.b_size; + flags = FIEMAP_EXTENT_MERGED; + + length -= tmp.b_size; + start_blk += logical_to_blk(inode, size); + + /* + * if we are past the EOF we need to loop again to see + * if there is a hole so we can mark this extent as the + * last one, and if not keep mapping things until we + * find a hole, or we run out of slots in the extent + * array + */ + if (length <= 0) + continue; + + ret = fiemap_fill_next_extent(fieinfo, logical, phys, + size, flags); + if (ret) + break; + } + cond_resched(); + } while (1); + + mutex_unlock(&inode->i_mutex); + + /* if ret is 1 then we just hit the end of the extent array */ + if (ret == 1) + ret = 0; + + return ret; +} +EXPORT_SYMBOL(generic_block_fiemap); + static int file_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { @@ -80,6 +351,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, switch (cmd) { case FIBMAP: return ioctl_fibmap(filp, p); + case FS_IOC_FIEMAP: + return ioctl_fiemap(filp, arg); case FIGETBSZ: return put_user(inode->i_sb->s_blocksize, p); case FIONREAD: diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8..42895d36945 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -20,6 +20,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> +#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> @@ -126,14 +127,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we - * were waiting for the checkpoint lock + * were waiting for the checkpoint lock. If there are no + * outstanding transactions there is nothing to checkpoint and + * we can't make progress. Abort the journal in this case. */ spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); if (__jbd2_log_space_left(journal) < nblocks) { + int chkpt = journal->j_checkpoint_transactions != NULL; + + spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); - jbd2_log_do_checkpoint(journal); + if (chkpt) { + jbd2_log_do_checkpoint(journal); + } else { + printk(KERN_ERR "%s: no transactions\n", + __func__); + jbd2_journal_abort(journal, 0); + } + spin_lock(&journal->j_state_lock); + } else { + spin_unlock(&journal->j_list_lock); } mutex_unlock(&journal->j_checkpoint_mutex); } @@ -313,6 +329,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); + trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", + journal->j_devname, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95e..0d3814a35ed 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -16,6 +16,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> +#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> @@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "submit commit block"); lock_buffer(bh); - get_bh(bh); - set_buffer_dirty(bh); + clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; @@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, * to remember if we sent a barrier request */ if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", journal->j_devname); spin_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_BARRIER; spin_unlock(&journal->j_state_lock); @@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, /* And try again, without the barrier */ lock_buffer(bh); set_buffer_uptodate(bh); - set_buffer_dirty(bh); + clear_buffer_dirty(bh); ret = submit_bh(WRITE, bh); } *cbh = bh; @@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); + trace_mark(jbd2_start_commit, "dev %s transaction %d", + journal->j_devname, commit_transaction->t_tid); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); @@ -681,11 +680,9 @@ start_journal_io: */ err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { - char b[BDEVNAME_SIZE]; - printk(KERN_WARNING "JBD2: Detected IO errors while flushing file data " - "on %s\n", bdevname(journal->j_fs_dev, b)); + "on %s\n", journal->j_devname); err = 0; } @@ -990,6 +987,9 @@ restart_loop: } spin_unlock(&journal->j_list_lock); + trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", + journal->j_devname, commit_transaction->t_tid, + journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4ed..01c3901c3a0 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, if (ret) *retp = ret; else { - char b[BDEVNAME_SIZE]; - printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", - __func__, - blocknr, - bdevname(journal->j_dev, b)); + __func__, blocknr, journal->j_devname); err = -EIO; __journal_abort_soft(journal, err); } @@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; static void jbd2_stats_proc_init(journal_t *journal) { - char name[BDEVNAME_SIZE]; - - bdevname(journal->j_dev, name); - journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); + journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); if (journal->j_proc_entry) { proc_create_data("history", S_IRUGO, journal->j_proc_entry, &jbd2_seq_history_fops, journal); @@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) static void jbd2_stats_proc_exit(journal_t *journal) { - char name[BDEVNAME_SIZE]; - - bdevname(journal->j_dev, name); remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry("history", journal->j_proc_entry); - remove_proc_entry(name, proc_jbd2_stats); + remove_proc_entry(journal->j_devname, proc_jbd2_stats); } static void journal_init_stats(journal_t *journal) @@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, { journal_t *journal = journal_init_common(); struct buffer_head *bh; + char *p; int n; if (!journal) @@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; journal->j_maxlen = len; + bdevname(journal->j_dev, journal->j_devname); + p = journal->j_devname; + while ((p = strchr(p, '/'))) + *p = '!'; jbd2_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); @@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) { struct buffer_head *bh; journal_t *journal = journal_init_common(); + char *p; int err; int n; unsigned long long blocknr; @@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; journal->j_inode = inode; + bdevname(journal->j_dev, journal->j_devname); + p = journal->j_devname; + while ((p = strchr(p, '/'))) + *p = '!'; + p = journal->j_devname + strlen(journal->j_devname); + sprintf(p, ":%lu", journal->j_inode->i_ino); jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", journal, inode->i_sb->s_id, inode->i_ino, @@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) goto out; } + if (buffer_write_io_error(bh)) { + /* + * Oh, dear. A previous attempt to write the journal + * superblock failed. This could happen because the + * USB device was yanked out. Or it could happen to + * be a transient write error and maybe the block will + * be remapped. Nothing we can do but to retry the + * write and hope for the best. + */ + printk(KERN_ERR "JBD2: previous I/O error detected " + "for journal superblock update for %s.\n", + journal->j_devname); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + spin_lock(&journal->j_state_lock); jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); @@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); - if (wait) + if (wait) { sync_dirty_buffer(bh); - else + if (buffer_write_io_error(bh)) { + printk(KERN_ERR "JBD2: I/O error detected " + "when updating journal superblock for %s.\n", + journal->j_devname); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + } else ll_rw_block(SWRITE, 1, &bh); out: @@ -1761,23 +1786,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) } /* - * journal_dev_name: format a character string to describe on what - * device this journal is present. - */ - -static const char *journal_dev_name(journal_t *journal, char *buffer) -{ - struct block_device *bdev; - - if (journal->j_inode) - bdev = journal->j_inode->i_sb->s_bdev; - else - bdev = journal->j_dev; - - return bdevname(bdev, buffer); -} - -/* * Journal abort has very specific semantics, which we describe * for journal abort. * @@ -1793,13 +1801,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) void __jbd2_journal_abort_hard(journal_t *journal) { transaction_t *transaction; - char b[BDEVNAME_SIZE]; if (journal->j_flags & JBD2_ABORT) return; printk(KERN_ERR "Aborting journal on device %s.\n", - journal_dev_name(journal, b)); + journal->j_devname); spin_lock(&journal->j_state_lock); journal->j_flags |= JBD2_ABORT; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 10bfb466e06..29ff57ec5d1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -990,15 +990,6 @@ out: } /* - * This is only valid for leaf nodes, which are the only ones that can - * have empty extents anyway. - */ -static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) -{ - return !rec->e_leaf_clusters; -} - -/* * This function will discard the rightmost extent record. */ static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 42ff94bd801..60cd3d59230 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -146,4 +146,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, return le16_to_cpu(rec->e_leaf_clusters); } +/* + * This is only valid for leaf nodes, which are the only ones that can + * have empty extents anyway. + */ +static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) +{ + return !rec->e_leaf_clusters; +} + #endif /* OCFS2_ALLOC_H */ diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c58668a326f..aed268e80b4 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -25,6 +25,7 @@ #include <linux/fs.h> #include <linux/init.h> #include <linux/types.h> +#include <linux/fiemap.h> #define MLOG_MASK_PREFIX ML_EXTENT_MAP #include <cluster/masklog.h> @@ -32,6 +33,7 @@ #include "ocfs2.h" #include "alloc.h" +#include "dlmglue.h" #include "extent_map.h" #include "inode.h" #include "super.h" @@ -282,6 +284,51 @@ out: kfree(new_emi); } +static int ocfs2_last_eb_is_empty(struct inode *inode, + struct ocfs2_dinode *di) +{ + int ret, next_free; + u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); + struct buffer_head *eb_bh = NULL; + struct ocfs2_extent_block *eb; + struct ocfs2_extent_list *el; + + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk, + &eb_bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } + + eb = (struct ocfs2_extent_block *) eb_bh->b_data; + el = &eb->h_list; + + if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { + ret = -EROFS; + OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); + goto out; + } + + if (el->l_tree_depth) { + ocfs2_error(inode->i_sb, + "Inode %lu has non zero tree depth in " + "leaf block %llu\n", inode->i_ino, + (unsigned long long)eb_bh->b_blocknr); + ret = -EROFS; + goto out; + } + + next_free = le16_to_cpu(el->l_next_free_rec); + + if (next_free == 0 || + (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) + ret = 1; + +out: + brelse(eb_bh); + return ret; +} + /* * Return the 1st index within el which contains an extent start * larger than v_cluster. @@ -373,42 +420,28 @@ out: return ret; } -int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, - u32 *p_cluster, u32 *num_clusters, - unsigned int *extent_flags) +static int ocfs2_get_clusters_nocache(struct inode *inode, + struct buffer_head *di_bh, + u32 v_cluster, unsigned int *hole_len, + struct ocfs2_extent_rec *ret_rec, + unsigned int *is_last) { - int ret, i; - unsigned int flags = 0; - struct buffer_head *di_bh = NULL; - struct buffer_head *eb_bh = NULL; + int i, ret, tree_height, len; struct ocfs2_dinode *di; - struct ocfs2_extent_block *eb; + struct ocfs2_extent_block *uninitialized_var(eb); struct ocfs2_extent_list *el; struct ocfs2_extent_rec *rec; - u32 coff; - - if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - ret = -ERANGE; - mlog_errno(ret); - goto out; - } - - ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, - num_clusters, extent_flags); - if (ret == 0) - goto out; + struct buffer_head *eb_bh = NULL; - ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, - &di_bh, OCFS2_BH_CACHED, inode); - if (ret) { - mlog_errno(ret); - goto out; - } + memset(ret_rec, 0, sizeof(*ret_rec)); + if (is_last) + *is_last = 0; di = (struct ocfs2_dinode *) di_bh->b_data; el = &di->id2.i_list; + tree_height = le16_to_cpu(el->l_tree_depth); - if (el->l_tree_depth) { + if (tree_height > 0) { ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); if (ret) { mlog_errno(ret); @@ -431,46 +464,143 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, i = ocfs2_search_extent_list(el, v_cluster); if (i == -1) { /* - * A hole was found. Return some canned values that - * callers can key on. If asked for, num_clusters will - * be populated with the size of the hole. + * Holes can be larger than the maximum size of an + * extent, so we return their lengths in a seperate + * field. */ - *p_cluster = 0; - if (num_clusters) { + if (hole_len) { ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, - v_cluster, - num_clusters); + v_cluster, &len); if (ret) { mlog_errno(ret); goto out; } + + *hole_len = len; } - } else { - rec = &el->l_recs[i]; + goto out_hole; + } - BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); + rec = &el->l_recs[i]; - if (!rec->e_blkno) { - ocfs2_error(inode->i_sb, "Inode %lu has bad extent " - "record (%u, %u, 0)", inode->i_ino, - le32_to_cpu(rec->e_cpos), - ocfs2_rec_clusters(el, rec)); - ret = -EROFS; - goto out; + BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); + + if (!rec->e_blkno) { + ocfs2_error(inode->i_sb, "Inode %lu has bad extent " + "record (%u, %u, 0)", inode->i_ino, + le32_to_cpu(rec->e_cpos), + ocfs2_rec_clusters(el, rec)); + ret = -EROFS; + goto out; + } + + *ret_rec = *rec; + + /* + * Checking for last extent is potentially expensive - we + * might have to look at the next leaf over to see if it's + * empty. + * + * The first two checks are to see whether the caller even + * cares for this information, and if the extent is at least + * the last in it's list. + * + * If those hold true, then the extent is last if any of the + * additional conditions hold true: + * - Extent list is in-inode + * - Extent list is right-most + * - Extent list is 2nd to rightmost, with empty right-most + */ + if (is_last) { + if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { + if (tree_height == 0) + *is_last = 1; + else if (eb->h_blkno == di->i_last_eb_blk) + *is_last = 1; + else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { + ret = ocfs2_last_eb_is_empty(inode, di); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + if (ret == 1) + *is_last = 1; + } } + } + +out_hole: + ret = 0; +out: + brelse(eb_bh); + return ret; +} + +static void ocfs2_relative_extent_offsets(struct super_block *sb, + u32 v_cluster, + struct ocfs2_extent_rec *rec, + u32 *p_cluster, u32 *num_clusters) + +{ + u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); + + *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); + *p_cluster = *p_cluster + coff; + + if (num_clusters) + *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; +} + +int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, + u32 *p_cluster, u32 *num_clusters, + unsigned int *extent_flags) +{ + int ret; + unsigned int uninitialized_var(hole_len), flags = 0; + struct buffer_head *di_bh = NULL; + struct ocfs2_extent_rec rec; - coff = v_cluster - le32_to_cpu(rec->e_cpos); + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = -ERANGE; + mlog_errno(ret); + goto out; + } - *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, - le64_to_cpu(rec->e_blkno)); - *p_cluster = *p_cluster + coff; + ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, + num_clusters, extent_flags); + if (ret == 0) + goto out; - if (num_clusters) - *num_clusters = ocfs2_rec_clusters(el, rec) - coff; + ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, + &di_bh, OCFS2_BH_CACHED, inode); + if (ret) { + mlog_errno(ret); + goto out; + } - flags = rec->e_flags; + ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, + &rec, NULL); + if (ret) { + mlog_errno(ret); + goto out; + } - ocfs2_extent_map_insert_rec(inode, rec); + if (rec.e_blkno == 0ULL) { + /* + * A hole was found. Return some canned values that + * callers can key on. If asked for, num_clusters will + * be populated with the size of the hole. + */ + *p_cluster = 0; + if (num_clusters) { + *num_clusters = hole_len; + } + } else { + ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, + p_cluster, num_clusters); + flags = rec.e_flags; + + ocfs2_extent_map_insert_rec(inode, &rec); } if (extent_flags) @@ -478,7 +608,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, out: brelse(di_bh); - brelse(eb_bh); return ret; } @@ -521,3 +650,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, out: return ret; } + +static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, + struct fiemap_extent_info *fieinfo, + u64 map_start) +{ + int ret; + unsigned int id_count; + struct ocfs2_dinode *di; + u64 phys; + u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; + struct ocfs2_inode_info *oi = OCFS2_I(inode); + + di = (struct ocfs2_dinode *)di_bh->b_data; + id_count = le16_to_cpu(di->id2.i_data.id_count); + + if (map_start < id_count) { + phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; + phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); + + ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, + flags); + if (ret < 0) + return ret; + } + + return 0; +} + +#define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) + +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 map_start, u64 map_len) +{ + int ret, is_last; + u32 mapping_end, cpos; + unsigned int hole_size; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + u64 len_bytes, phys_bytes, virt_bytes; + struct buffer_head *di_bh = NULL; + struct ocfs2_extent_rec rec; + + ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); + if (ret) + return ret; + + ret = ocfs2_inode_lock(inode, &di_bh, 0); + if (ret) { + mlog_errno(ret); + goto out; + } + + down_read(&OCFS2_I(inode)->ip_alloc_sem); + + /* + * Handle inline-data separately. + */ + if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); + goto out_unlock; + } + + cpos = map_start >> osb->s_clustersize_bits; + mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, + map_start + map_len); + mapping_end -= cpos; + is_last = 0; + while (cpos < mapping_end && !is_last) { + u32 fe_flags; + + ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, + &hole_size, &rec, &is_last); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (rec.e_blkno == 0ULL) { + cpos += hole_size; + continue; + } + + fe_flags = 0; + if (rec.e_flags & OCFS2_EXT_UNWRITTEN) + fe_flags |= FIEMAP_EXTENT_UNWRITTEN; + if (is_last) + fe_flags |= FIEMAP_EXTENT_LAST; + len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; + phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; + virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; + + ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, + len_bytes, fe_flags); + if (ret) + break; + + cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); + } + + if (ret > 0) + ret = 0; + +out_unlock: + brelse(di_bh); + + up_read(&OCFS2_I(inode)->ip_alloc_sem); + + ocfs2_inode_unlock(inode, 0); +out: + + return ret; +} diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index de91e3e41a2..1b97490e1ea 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h @@ -50,4 +50,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, u64 *ret_count, unsigned int *extent_flags); +int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 map_start, u64 map_len); + #endif /* _EXTENT_MAP_H */ diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15c3da..ed38796052d 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2228,6 +2228,7 @@ const struct inode_operations ocfs2_file_iops = { .getattr = ocfs2_getattr, .permission = ocfs2_permission, .fallocate = ocfs2_fallocate, + .fiemap = ocfs2_fiemap, }; const struct inode_operations ocfs2_special_file_iops = { diff --git a/include/linux/ata.h b/include/linux/ata.h index be00973d1a8..a53318b8cbd 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -30,6 +30,7 @@ #define __LINUX_ATA_H__ #include <linux/types.h> +#include <asm/byteorder.h> /* defines only for the constants which don't work well as enums */ #define ATA_DMA_BOUNDARY 0xffffUL @@ -558,6 +559,15 @@ static inline int ata_id_has_flush(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 12); } +static inline int ata_id_flush_enabled(const u16 *id) +{ + if (ata_id_has_flush(id) == 0) + return 0; + if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) + return 0; + return id[ATA_ID_CFS_ENABLE_2] & (1 << 12); +} + static inline int ata_id_has_flush_ext(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) @@ -565,6 +575,19 @@ static inline int ata_id_has_flush_ext(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 13); } +static inline int ata_id_flush_ext_enabled(const u16 *id) +{ + if (ata_id_has_flush_ext(id) == 0) + return 0; + if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) + return 0; + /* + * some Maxtor disks have bit 13 defined incorrectly + * so check bit 10 too + */ + return (id[ATA_ID_CFS_ENABLE_2] & 0x2400) == 0x2400; +} + static inline int ata_id_has_lba48(const u16 *id) { if ((id[ATA_ID_COMMAND_SET_2] & 0xC000) != 0x4000) @@ -574,6 +597,15 @@ static inline int ata_id_has_lba48(const u16 *id) return id[ATA_ID_COMMAND_SET_2] & (1 << 10); } +static inline int ata_id_lba48_enabled(const u16 *id) +{ + if (ata_id_has_lba48(id) == 0) + return 0; + if ((id[ATA_ID_CSF_DEFAULT] & 0xC000) != 0x4000) + return 0; + return id[ATA_ID_CFS_ENABLE_2] & (1 << 10); +} + static inline int ata_id_hpa_enabled(const u16 *id) { /* Yes children, word 83 valid bits cover word 82 data */ @@ -645,7 +677,15 @@ static inline unsigned int ata_id_major_version(const u16 *id) static inline int ata_id_is_sata(const u16 *id) { - return ata_id_major_version(id) >= 5 && id[ATA_ID_HW_CONFIG] == 0; + /* + * See if word 93 is 0 AND drive is at least ATA-5 compatible + * verifying that word 80 by casting it to a signed type -- + * this trick allows us to filter out the reserved values of + * 0x0000 and 0xffff along with the earlier ATA revisions... + */ + if (id[ATA_ID_HW_CONFIG] == 0 && (short)id[ATA_ID_MAJOR_VER] >= 0x0020) + return 1; + return 0; } static inline int ata_id_has_tpm(const u16 *id) @@ -742,6 +782,76 @@ static inline int atapi_id_dmadir(const u16 *dev_id) return ata_id_major_version(dev_id) >= 7 && (dev_id[62] & 0x8000); } +/* + * ata_id_is_lba_capacity_ok() performs a sanity check on + * the claimed LBA capacity value for the device. + * + * Returns 1 if LBA capacity looks sensible, 0 otherwise. + * + * It is called only once for each device. + */ +static inline int ata_id_is_lba_capacity_ok(u16 *id) +{ + unsigned long lba_sects, chs_sects, head, tail; + + /* No non-LBA info .. so valid! */ + if (id[ATA_ID_CYLS] == 0) + return 1; + + lba_sects = ata_id_u32(id, ATA_ID_LBA_CAPACITY); + + /* + * The ATA spec tells large drives to return + * C/H/S = 16383/16/63 independent of their size. + * Some drives can be jumpered to use 15 heads instead of 16. + * Some drives can be jumpered to use 4092 cyls instead of 16383. + */ + if ((id[ATA_ID_CYLS] == 16383 || + (id[ATA_ID_CYLS] == 4092 && id[ATA_ID_CUR_CYLS] == 16383)) && + id[ATA_ID_SECTORS] == 63 && + (id[ATA_ID_HEADS] == 15 || id[ATA_ID_HEADS] == 16) && + (lba_sects >= 16383 * 63 * id[ATA_ID_HEADS])) + return 1; + + chs_sects = id[ATA_ID_CYLS] * id[ATA_ID_HEADS] * id[ATA_ID_SECTORS]; + + /* perform a rough sanity check on lba_sects: within 10% is OK */ + if (lba_sects - chs_sects < chs_sects/10) + return 1; + + /* some drives have the word order reversed */ + head = (lba_sects >> 16) & 0xffff; + tail = lba_sects & 0xffff; + lba_sects = head | (tail << 16); + + if (lba_sects - chs_sects < chs_sects/10) { + *(__le32 *)&id[ATA_ID_LBA_CAPACITY] = __cpu_to_le32(lba_sects); + return 1; /* LBA capacity is (now) good */ + } + + return 0; /* LBA capacity value may be bad */ +} + +static inline void ata_id_to_hd_driveid(u16 *id) +{ +#ifdef __BIG_ENDIAN + /* accessed in struct hd_driveid as 8-bit values */ + id[ATA_ID_MAX_MULTSECT] = __cpu_to_le16(id[ATA_ID_MAX_MULTSECT]); + id[ATA_ID_CAPABILITY] = __cpu_to_le16(id[ATA_ID_CAPABILITY]); + id[ATA_ID_OLD_PIO_MODES] = __cpu_to_le16(id[ATA_ID_OLD_PIO_MODES]); + id[ATA_ID_OLD_DMA_MODES] = __cpu_to_le16(id[ATA_ID_OLD_DMA_MODES]); + id[ATA_ID_MULTSECT] = __cpu_to_le16(id[ATA_ID_MULTSECT]); + + /* as 32-bit values */ + *(u32 *)&id[ATA_ID_LBA_CAPACITY] = ata_id_u32(id, ATA_ID_LBA_CAPACITY); + *(u32 *)&id[ATA_ID_SPG] = ata_id_u32(id, ATA_ID_SPG); + + /* as 64-bit value */ + *(u64 *)&id[ATA_ID_LBA_CAPACITY_2] = + ata_id_u64(id, ATA_ID_LBA_CAPACITY_2); +#endif +} + static inline int is_multi_taskfile(struct ata_taskfile *tf) { return (tf->command == ATA_CMD_READ_MULTI) || diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 80171ee89a2..8120fa1bc23 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -837,6 +837,8 @@ extern void ext3_truncate (struct inode *); extern void ext3_set_inode_flags(struct inode *); extern void ext3_get_inode_flags(struct ext3_inode_info *); extern void ext3_set_aops(struct inode *inode); +extern int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h new file mode 100644 index 00000000000..671decbd2ae --- /dev/null +++ b/include/linux/fiemap.h @@ -0,0 +1,64 @@ +/* + * FS_IOC_FIEMAP ioctl infrastructure. + * + * Some portions copyright (C) 2007 Cluster File Systems, Inc + * + * Authors: Mark Fasheh <mfasheh@suse.com> + * Kalpak Shah <kalpak.shah@sun.com> + * Andreas Dilger <adilger@sun.com> + */ + +#ifndef _LINUX_FIEMAP_H +#define _LINUX_FIEMAP_H + +struct fiemap_extent { + __u64 fe_logical; /* logical offset in bytes for the start of + * the extent from the beginning of the file */ + __u64 fe_physical; /* physical offset in bytes for the start + * of the extent from the beginning of the disk */ + __u64 fe_length; /* length in bytes for this extent */ + __u64 fe_reserved64[2]; + __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */ + __u32 fe_reserved[3]; +}; + +struct fiemap { + __u64 fm_start; /* logical offset (inclusive) at + * which to start mapping (in) */ + __u64 fm_length; /* logical length of mapping which + * userspace wants (in) */ + __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */ + __u32 fm_mapped_extents;/* number of extents that were mapped (out) */ + __u32 fm_extent_count; /* size of fm_extents array (in) */ + __u32 fm_reserved; + struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */ +}; + +#define FIEMAP_MAX_OFFSET (~0ULL) + +#define FIEMAP_FLAG_SYNC 0x00000001 /* sync file data before map */ +#define FIEMAP_FLAG_XATTR 0x00000002 /* map extended attribute tree */ + +#define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +#define FIEMAP_EXTENT_LAST 0x00000001 /* Last extent in file. */ +#define FIEMAP_EXTENT_UNKNOWN 0x00000002 /* Data location unknown. */ +#define FIEMAP_EXTENT_DELALLOC 0x00000004 /* Location still pending. + * Sets EXTENT_UNKNOWN. */ +#define FIEMAP_EXTENT_ENCODED 0x00000008 /* Data can not be read + * while fs is unmounted */ +#define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 /* Data is encrypted by fs. + * Sets EXTENT_NO_BYPASS. */ +#define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 /* Extent offsets may not be + * block aligned. */ +#define FIEMAP_EXTENT_DATA_INLINE 0x00000200 /* Data mixed with metadata. + * Sets EXTENT_NOT_ALIGNED.*/ +#define FIEMAP_EXTENT_DATA_TAIL 0x00000400 /* Multiple files in block. + * Sets EXTENT_NOT_ALIGNED.*/ +#define FIEMAP_EXTENT_UNWRITTEN 0x00000800 /* Space allocated, but + * no data (i.e. zero). */ +#define FIEMAP_EXTENT_MERGED 0x00001000 /* File does not natively + * support extents. Result + * merged for efficiency. */ + +#endif /* _LINUX_FIEMAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 32477e8872d..44e3cb2f196 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -234,6 +234,7 @@ extern int dir_notify_enable; #define FS_IOC_SETFLAGS _IOW('f', 2, long) #define FS_IOC_GETVERSION _IOR('v', 1, long) #define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) #define FS_IOC32_GETFLAGS _IOR('f', 1, int) #define FS_IOC32_SETFLAGS _IOW('f', 2, int) #define FS_IOC32_GETVERSION _IOR('v', 1, int) @@ -294,6 +295,7 @@ extern int dir_notify_enable; #include <linux/mutex.h> #include <linux/capability.h> #include <linux/semaphore.h> +#include <linux/fiemap.h> #include <asm/atomic.h> #include <asm/byteorder.h> @@ -1182,6 +1184,20 @@ extern void dentry_unhash(struct dentry *dentry); extern int file_permission(struct file *, int); /* + * VFS FS_IOC_FIEMAP helper definitions. + */ +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent + * array */ +}; +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags); +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); + +/* * File types * * NOTE! These match bits 12..15 of stat.st_mode @@ -1290,6 +1306,8 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len); + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); }; struct seq_file; @@ -1987,6 +2005,9 @@ extern int vfs_fstat(unsigned int, struct kstat *); extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg); +extern int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block); extern void get_filesystem(struct file_system_type *fs); extern void put_filesystem(struct file_system_type *fs); diff --git a/include/linux/ide.h b/include/linux/ide.h index 6514db8fd2e..a9d82d6e6bd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -8,7 +8,7 @@ #include <linux/init.h> #include <linux/ioport.h> -#include <linux/hdreg.h> +#include <linux/ata.h> #include <linux/blkdev.h> #include <linux/proc_fs.h> #include <linux/interrupt.h> @@ -17,6 +17,7 @@ #include <linux/device.h> #include <linux/pci.h> #include <linux/completion.h> +#include <linux/pm.h> #ifdef CONFIG_BLK_DEV_IDEACPI #include <acpi/acpi.h> #endif @@ -87,12 +88,13 @@ struct ide_io_ports { }; #define OK_STAT(stat,good,bad) (((stat)&((good)|(bad)))==(good)) -#define BAD_R_STAT (BUSY_STAT | ERR_STAT) -#define BAD_W_STAT (BAD_R_STAT | WRERR_STAT) -#define BAD_STAT (BAD_R_STAT | DRQ_STAT) -#define DRIVE_READY (READY_STAT | SEEK_STAT) -#define BAD_CRC (ABRT_ERR | ICRC_ERR) +#define BAD_R_STAT (ATA_BUSY | ATA_ERR) +#define BAD_W_STAT (BAD_R_STAT | ATA_DF) +#define BAD_STAT (BAD_R_STAT | ATA_DRQ) +#define DRIVE_READY (ATA_DRDY | ATA_DSC) + +#define BAD_CRC (ATA_ABORTED | ATA_ICRC) #define SATA_NR_PORTS (3) /* 16 possible ?? */ @@ -125,24 +127,41 @@ struct ide_io_ports { #define PARTN_BITS 6 /* number of minor dev bits for partitions */ #define MAX_DRIVES 2 /* per interface; 2 assumed by lots of code */ #define SECTOR_SIZE 512 -#define SECTOR_WORDS (SECTOR_SIZE / 4) /* number of 32bit words per sector */ + #define IDE_LARGE_SEEK(b1,b2,t) (((b1) > (b2) + (t)) || ((b2) > (b1) + (t))) /* * Timeouts for various operations: */ -#define WAIT_DRQ (HZ/10) /* 100msec - spec allows up to 20ms */ -#define WAIT_READY (5*HZ) /* 5sec - some laptops are very slow */ -#define WAIT_PIDENTIFY (10*HZ) /* 10sec - should be less than 3ms (?), if all ATAPI CD is closed at boot */ -#define WAIT_WORSTCASE (30*HZ) /* 30sec - worst case when spinning up */ -#define WAIT_CMD (10*HZ) /* 10sec - maximum wait for an IRQ to happen */ -#define WAIT_MIN_SLEEP (2*HZ/100) /* 20msec - minimum sleep time */ +enum { + /* spec allows up to 20ms */ + WAIT_DRQ = HZ / 10, /* 100ms */ + /* some laptops are very slow */ + WAIT_READY = 5 * HZ, /* 5s */ + /* should be less than 3ms (?), if all ATAPI CD is closed at boot */ + WAIT_PIDENTIFY = 10 * HZ, /* 10s */ + /* worst case when spinning up */ + WAIT_WORSTCASE = 30 * HZ, /* 30s */ + /* maximum wait for an IRQ to happen */ + WAIT_CMD = 10 * HZ, /* 10s */ + /* Some drives require a longer IRQ timeout. */ + WAIT_FLOPPY_CMD = 50 * HZ, /* 50s */ + /* + * Some drives (for example, Seagate STT3401A Travan) require a very + * long timeout, because they don't return an interrupt or clear their + * BSY bit until after the command completes (even retension commands). + */ + WAIT_TAPE_CMD = 900 * HZ, /* 900s */ + /* minimum sleep time */ + WAIT_MIN_SLEEP = HZ / 50, /* 20ms */ +}; /* * Op codes for special requests to be handled by ide_special_rq(). * Values should be in the range of 0x20 to 0x3f. */ #define REQ_DRIVE_RESET 0x20 +#define REQ_DEVSET_EXEC 0x21 /* * Check for an interrupt and acknowledge the interrupt status @@ -303,8 +322,8 @@ typedef enum { ide_started, /* a drive operation was started, handler was set */ } ide_startstop_t; +struct ide_devset; struct ide_driver_s; -struct ide_settings_s; #ifdef CONFIG_BLK_DEV_IDEACPI struct ide_acpi_drive_link; @@ -315,10 +334,10 @@ struct ide_acpi_hwif_link; enum { IDE_AFLAG_DRQ_INTERRUPT = (1 << 0), IDE_AFLAG_MEDIA_CHANGED = (1 << 1), - - /* ide-cd */ /* Drive cannot lock the door. */ IDE_AFLAG_NO_DOORLOCK = (1 << 2), + + /* ide-cd */ /* Drive cannot eject the disc. */ IDE_AFLAG_NO_EJECT = (1 << 3), /* Drive is a pre ATAPI 1.2 drive. */ @@ -354,21 +373,25 @@ enum { IDE_AFLAG_CLIK_DRIVE = (1 << 19), /* Requires BH algorithm for packets */ IDE_AFLAG_ZIP_DRIVE = (1 << 20), + /* Write protect */ + IDE_AFLAG_WP = (1 << 21), + /* Supports format progress report */ + IDE_AFLAG_SRFP = (1 << 22), /* ide-tape */ - IDE_AFLAG_IGNORE_DSC = (1 << 21), + IDE_AFLAG_IGNORE_DSC = (1 << 23), /* 0 When the tape position is unknown */ - IDE_AFLAG_ADDRESS_VALID = (1 << 22), + IDE_AFLAG_ADDRESS_VALID = (1 << 24), /* Device already opened */ - IDE_AFLAG_BUSY = (1 << 23), + IDE_AFLAG_BUSY = (1 << 25), /* Attempt to auto-detect the current user block size */ - IDE_AFLAG_DETECT_BS = (1 << 24), + IDE_AFLAG_DETECT_BS = (1 << 26), /* Currently on a filemark */ - IDE_AFLAG_FILEMARK = (1 << 25), + IDE_AFLAG_FILEMARK = (1 << 27), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), + IDE_AFLAG_MEDIUM_PRESENT = (1 << 28), - IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), + IDE_AFLAG_NO_AUTOCLOSE = (1 << 29), }; struct ide_drive_s { @@ -380,10 +403,10 @@ struct ide_drive_s { struct request *rq; /* current request */ struct ide_drive_s *next; /* circular list of hwgroup drives */ void *driver_data; /* extra driver data */ - struct hd_driveid *id; /* drive model identification info */ + u16 *id; /* identification info */ #ifdef CONFIG_IDE_PROC_FS struct proc_dir_entry *proc; /* /proc/ide/ directory entry */ - struct ide_settings_s *settings;/* /proc/ide/ drive settings */ + const struct ide_proc_devset *settings; /* /proc/ide/ drive settings */ #endif struct hwif_s *hwif; /* actually (ide_hwif_t *) */ @@ -395,16 +418,16 @@ struct ide_drive_s { special_t special; /* special action flags */ select_t select; /* basic drive/head select reg value */ - u8 keep_settings; /* restore settings after drive reset */ - u8 using_dma; /* disk is using dma for read/write */ u8 retry_pio; /* retrying dma capable host in pio */ u8 state; /* retry state */ u8 waiting_for_dma; /* dma currently in progress */ - u8 unmask; /* okay to unmask other irqs */ - u8 noflush; /* don't attempt flushes */ - u8 dsc_overlap; /* DSC overlap */ - u8 nice1; /* give potential excess bandwidth */ + unsigned keep_settings : 1; /* restore settings after drive reset */ + unsigned using_dma : 1; /* disk is using dma for read/write */ + unsigned unmask : 1; /* okay to unmask other irqs */ + unsigned noflush : 1; /* don't attempt flushes */ + unsigned dsc_overlap : 1; /* DSC overlap */ + unsigned nice1 : 1; /* give potential excess bandwidth */ unsigned present : 1; /* drive is physically present */ unsigned dead : 1; /* device ejected hint */ unsigned id_read : 1; /* 1=id read from disk 0 = synthetic */ @@ -414,23 +437,22 @@ struct ide_drive_s { unsigned forced_geom : 1; /* 1 if hdx=c,h,s was given at boot */ unsigned no_unmask : 1; /* disallow setting unmask bit */ unsigned no_io_32bit : 1; /* disallow enabling 32bit I/O */ - unsigned atapi_overlap : 1; /* ATAPI overlap (not supported) */ unsigned doorlocking : 1; /* for removable only: door lock/unlock works */ unsigned nodma : 1; /* disallow DMA */ - unsigned remap_0_to_1 : 1; /* 0=noremap, 1=remap 0->1 (for EZDrive) */ unsigned blocked : 1; /* 1=powermanagment told us not to do anything, so sleep nicely */ unsigned scsi : 1; /* 0=default, 1=ide-scsi emulation */ unsigned sleeping : 1; /* 1=sleeping & sleep field valid */ unsigned post_reset : 1; unsigned udma33_warned : 1; + unsigned addressing : 2; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ + unsigned wcache : 1; /* status of write cache */ + unsigned nowerr : 1; /* used for ignoring ATA_DF */ - u8 addressing; /* 0=28-bit, 1=48-bit, 2=48-bit doing 28-bit */ u8 quirk_list; /* considered quirky, set for a specific host */ u8 init_speed; /* transfer rate set at boot */ u8 current_speed; /* current transfer rate set */ u8 desired_speed; /* desired transfer rate set */ u8 dn; /* now wide spread use */ - u8 wcache; /* status of write cache */ u8 acoustic; /* acoustic management */ u8 media; /* disk, cdrom, tape, floppy, ... */ u8 ready_stat; /* min status value for drive ready */ @@ -438,9 +460,7 @@ struct ide_drive_s { u8 mult_req; /* requested multiple sector setting */ u8 tune_req; /* requested drive tuning setting */ u8 io_32bit; /* 0=16-bit, 1=32-bit, 2/3=32bit+sync */ - u8 bad_wstat; /* used for ignoring WRERR_STAT */ - u8 nowerr; /* used for ignoring WRERR_STAT */ - u8 sect0; /* offset of first sector for DM6:DDO */ + u8 bad_wstat; /* used for ignoring ATA_DF */ u8 head; /* "real" number of heads */ u8 sect; /* "real" sectors per track */ u8 bios_head; /* BIOS/fdisk/LILO number of heads */ @@ -474,10 +494,6 @@ typedef struct ide_drive_s ide_drive_t; #define to_ide_device(dev)container_of(dev, ide_drive_t, gendev) -#define IDE_CHIPSET_PCI_MASK \ - ((1<<ide_pci)|(1<<ide_cmd646)|(1<<ide_ali14xx)) -#define IDE_CHIPSET_IS_PCI(c) ((IDE_CHIPSET_PCI_MASK >> (c)) & 1) - struct ide_task_s; struct ide_port_info; @@ -567,7 +583,6 @@ typedef struct hwif_s { u8 major; /* our major number */ u8 index; /* 0 for ide0; 1 for ide1; ... */ u8 channel; /* for dual-port chips: 0=primary, 1=secondary */ - u8 bus_state; /* power state of the IDE bus */ u32 host_flags; @@ -645,6 +660,7 @@ struct ide_host { ide_hwif_t *ports[MAX_HWIFS]; unsigned int n_ports; struct device *dev[2]; + unsigned int (*init_chipset)(struct pci_dev *); unsigned long host_flags; void *host_priv; }; @@ -692,9 +708,61 @@ typedef struct ide_driver_s ide_driver_t; extern struct mutex ide_setting_mtx; -int set_io_32bit(ide_drive_t *, int); -int set_pio_mode(ide_drive_t *, int); -int set_using_dma(ide_drive_t *, int); +/* + * configurable drive settings + */ + +#define DS_SYNC (1 << 0) + +struct ide_devset { + int (*get)(ide_drive_t *); + int (*set)(ide_drive_t *, int); + unsigned int flags; +}; + +#define __DEVSET(_flags, _get, _set) { \ + .flags = _flags, \ + .get = _get, \ + .set = _set, \ +} + +#define ide_devset_get(name, field) \ +static int get_##name(ide_drive_t *drive) \ +{ \ + return drive->field; \ +} + +#define ide_devset_set(name, field) \ +static int set_##name(ide_drive_t *drive, int arg) \ +{ \ + drive->field = arg; \ + return 0; \ +} + +#define __IDE_DEVSET(_name, _flags, _get, _set) \ +const struct ide_devset ide_devset_##_name = \ + __DEVSET(_flags, _get, _set) + +#define IDE_DEVSET(_name, _flags, _get, _set) \ +static __IDE_DEVSET(_name, _flags, _get, _set) + +#define ide_devset_rw(_name, _func) \ +IDE_DEVSET(_name, 0, get_##_func, set_##_func) + +#define ide_devset_w(_name, _func) \ +IDE_DEVSET(_name, 0, NULL, set_##_func) + +#define ide_devset_rw_sync(_name, _func) \ +IDE_DEVSET(_name, DS_SYNC, get_##_func, set_##_func) + +#define ide_decl_devset(_name) \ +extern const struct ide_devset ide_devset_##_name + +ide_decl_devset(io_32bit); +ide_decl_devset(keepsettings); +ide_decl_devset(pio_mode); +ide_decl_devset(unmaskirq); +ide_decl_devset(using_dma); /* ATAPI packet command flags */ enum { @@ -710,6 +778,12 @@ enum { PC_FLAG_TIMEDOUT = (1 << 7), }; +/* + * With each packet command, we allocate a buffer of IDE_PC_BUFFER_SIZE bytes. + * This is used for several packet commands (not for READ/WRITE commands). + */ +#define IDE_PC_BUFFER_SIZE 256 + struct ide_atapi_pc { /* actual packet bytes */ u8 c[12]; @@ -739,7 +813,7 @@ struct ide_atapi_pc { * those are more or less driver-specific and some of them are subject * to change/removal later. */ - u8 pc_buf[256]; + u8 pc_buf[IDE_PC_BUFFER_SIZE]; /* idetape only */ struct idetape_bh *bh; @@ -757,37 +831,34 @@ struct ide_atapi_pc { #ifdef CONFIG_IDE_PROC_FS /* - * configurable drive settings + * /proc/ide interface */ -#define TYPE_INT 0 -#define TYPE_BYTE 1 -#define TYPE_SHORT 2 +#define ide_devset_rw_field(_name, _field) \ +ide_devset_get(_name, _field); \ +ide_devset_set(_name, _field); \ +IDE_DEVSET(_name, DS_SYNC, get_##_name, set_##_name) + +struct ide_proc_devset { + const char *name; + const struct ide_devset *setting; + int min, max; + int (*mulf)(ide_drive_t *); + int (*divf)(ide_drive_t *); +}; -#define SETTING_READ (1 << 0) -#define SETTING_WRITE (1 << 1) -#define SETTING_RW (SETTING_READ | SETTING_WRITE) +#define __IDE_PROC_DEVSET(_name, _min, _max, _mulf, _divf) { \ + .name = __stringify(_name), \ + .setting = &ide_devset_##_name, \ + .min = _min, \ + .max = _max, \ + .mulf = _mulf, \ + .divf = _divf, \ +} -typedef int (ide_procset_t)(ide_drive_t *, int); -typedef struct ide_settings_s { - char *name; - int rw; - int data_type; - int min; - int max; - int mul_factor; - int div_factor; - void *data; - ide_procset_t *set; - int auto_remove; - struct ide_settings_s *next; -} ide_settings_t; - -int ide_add_setting(ide_drive_t *, const char *, int, int, int, int, int, int, void *, ide_procset_t *set); +#define IDE_PROC_DEVSET(_name, _min, _max) \ +__IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL) -/* - * /proc/ide interface - */ typedef struct { const char *name; mode_t mode; @@ -804,8 +875,6 @@ void ide_proc_unregister_port(ide_hwif_t *); void ide_proc_register_driver(ide_drive_t *, ide_driver_t *); void ide_proc_unregister_driver(ide_drive_t *, ide_driver_t *); -void ide_add_generic_settings(ide_drive_t *); - read_proc_t proc_ide_read_capacity; read_proc_t proc_ide_read_geometry; @@ -833,7 +902,6 @@ static inline void ide_proc_unregister_device(ide_drive_t *drive) { ; } static inline void ide_proc_unregister_port(ide_hwif_t *hwif) { ; } static inline void ide_proc_register_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } static inline void ide_proc_unregister_driver(ide_drive_t *drive, ide_driver_t *driver) { ; } -static inline void ide_add_generic_settings(ide_drive_t *drive) { ; } #define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0; #endif @@ -879,7 +947,6 @@ enum { struct ide_driver_s { const char *version; u8 media; - unsigned supports_dsc_overlap : 1; ide_startstop_t (*do_request)(ide_drive_t *, struct request *, sector_t); int (*end_request)(ide_drive_t *, int, int); ide_startstop_t (*error)(ide_drive_t *, struct request *rq, u8, u8); @@ -889,7 +956,8 @@ struct ide_driver_s { void (*resume)(ide_drive_t *); void (*shutdown)(ide_drive_t *); #ifdef CONFIG_IDE_PROC_FS - ide_proc_entry_t *proc; + ide_proc_entry_t *proc; + const struct ide_proc_devset *settings; #endif }; @@ -898,7 +966,17 @@ struct ide_driver_s { int ide_device_get(ide_drive_t *); void ide_device_put(ide_drive_t *); -int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, unsigned, unsigned long); +struct ide_ioctl_devset { + unsigned int get_ioctl; + unsigned int set_ioctl; + const struct ide_devset *setting; +}; + +int ide_setting_ioctl(ide_drive_t *, struct block_device *, unsigned int, + unsigned long, const struct ide_ioctl_devset *); + +int generic_ide_ioctl(ide_drive_t *, struct file *, struct block_device *, + unsigned, unsigned long); extern int ide_vlb_clk; extern int ide_pci_clk; @@ -920,14 +998,19 @@ ide_startstop_t __ide_error(ide_drive_t *, struct request *, u8, u8); ide_startstop_t ide_error (ide_drive_t *drive, const char *msg, byte stat); -extern void ide_fix_driveid(struct hd_driveid *); +void ide_fix_driveid(u16 *); extern void ide_fixstring(u8 *, const int, const int); +int ide_busy_sleep(ide_hwif_t *, unsigned long, int); + int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long); extern ide_startstop_t ide_do_reset (ide_drive_t *); +extern int ide_devset_execute(ide_drive_t *drive, + const struct ide_devset *setting, int arg); + extern void ide_do_drive_cmd(ide_drive_t *, struct request *); extern void ide_end_drive_cmd(ide_drive_t *, u8, u8); @@ -1051,6 +1134,8 @@ void ide_tf_read(ide_drive_t *, ide_task_t *); void ide_input_data(ide_drive_t *, struct request *, void *, unsigned int); void ide_output_data(ide_drive_t *, struct request *, void *, unsigned int); +int ide_io_buffers(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int); + extern void SELECT_DRIVE(ide_drive_t *); void SELECT_MASK(ide_drive_t *, int); @@ -1061,11 +1146,36 @@ extern int drive_is_ready(ide_drive_t *); void ide_pktcmd_tf_load(ide_drive_t *, u32, u16, u8); +int ide_check_atapi_device(ide_drive_t *, const char *); + +void ide_init_pc(struct ide_atapi_pc *); + +/* + * Special requests for ide-tape block device strategy routine. + * + * In order to service a character device command, we add special requests to + * the tail of our block device request queue and wait for their completion. + */ +enum { + REQ_IDETAPE_PC1 = (1 << 0), /* packet command (first stage) */ + REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */ + REQ_IDETAPE_READ = (1 << 2), + REQ_IDETAPE_WRITE = (1 << 3), +}; + +void ide_queue_pc_head(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *, + struct request *); +int ide_queue_pc_tail(ide_drive_t *, struct gendisk *, struct ide_atapi_pc *); + +int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *); +int ide_do_start_stop(ide_drive_t *, struct gendisk *, int); +int ide_set_media_lock(ide_drive_t *, struct gendisk *, int); + ide_startstop_t ide_pc_intr(ide_drive_t *drive, struct ide_atapi_pc *pc, ide_handler_t *handler, unsigned int timeout, ide_expiry_t *expiry, void (*update_buffers)(ide_drive_t *, struct ide_atapi_pc *), void (*retry_pc)(ide_drive_t *), void (*dsc_handle)(ide_drive_t *), - void (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, + int (*io_buffers)(ide_drive_t *, struct ide_atapi_pc *, unsigned int, int)); ide_startstop_t ide_transfer_pc(ide_drive_t *, struct ide_atapi_pc *, ide_handler_t *, unsigned int, ide_expiry_t *); @@ -1080,8 +1190,6 @@ int ide_raw_taskfile(ide_drive_t *, ide_task_t *, u8 *, u16); int ide_no_data_taskfile(ide_drive_t *, ide_task_t *); int ide_taskfile_ioctl(ide_drive_t *, unsigned int, unsigned long); -int ide_cmd_ioctl(ide_drive_t *, unsigned int, unsigned long); -int ide_task_ioctl(ide_drive_t *, unsigned int, unsigned long); extern int ide_driveid_update(ide_drive_t *); extern int ide_config_drive_speed(ide_drive_t *, u8); @@ -1092,7 +1200,6 @@ extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout); extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); -extern int ide_spin_wait_hwgroup(ide_drive_t *); extern void ide_timer_expiry(unsigned long); extern irqreturn_t ide_intr(int irq, void *dev_id); extern void do_ide_request(struct request_queue *); @@ -1229,6 +1336,14 @@ int ide_pci_init_two(struct pci_dev *, struct pci_dev *, const struct ide_port_info *, void *); void ide_pci_remove(struct pci_dev *); +#ifdef CONFIG_PM +int ide_pci_suspend(struct pci_dev *, pm_message_t); +int ide_pci_resume(struct pci_dev *); +#else +#define ide_pci_suspend NULL +#define ide_pci_resume NULL +#endif + void ide_map_sg(ide_drive_t *, struct request *); void ide_init_sg_cmd(ide_drive_t *, struct request *); @@ -1240,7 +1355,7 @@ struct drive_list_entry { const char *id_firmware; }; -int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *); +int ide_in_drive_list(u16 *, const struct drive_list_entry *); #ifdef CONFIG_BLK_DEV_IDEDMA int __ide_dma_bad_drive(ide_drive_t *); @@ -1347,24 +1462,6 @@ const char *ide_xfer_verbose(u8 mode); extern void ide_toggle_bounce(ide_drive_t *drive, int on); extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate); -static inline int ide_dev_has_iordy(struct hd_driveid *id) -{ - return ((id->field_valid & 2) && (id->capability & 8)) ? 1 : 0; -} - -static inline int ide_dev_is_sata(struct hd_driveid *id) -{ - /* - * See if word 93 is 0 AND drive is at least ATA-5 compatible - * verifying that word 80 by casting it to a signed type -- - * this trick allows us to filter out the reserved values of - * 0x0000 and 0xffff along with the earlier ATA revisions... - */ - if (id->hw_config == 0 && (short)id->major_rev_num >= 0x0020) - return 1; - return 0; -} - u64 ide_get_lba_addr(struct ide_taskfile *, int); u8 ide_dump_status(ide_drive_t *, const char *, u8); @@ -1436,13 +1533,6 @@ extern struct mutex ide_cfg_mtx; extern struct bus_type ide_bus_type; extern struct class *ide_port_class; -/* check if CACHE FLUSH (EXT) command is supported (bits defined in ATA-6) */ -#define ide_id_has_flush_cache(id) ((id)->cfs_enable_2 & 0x3000) - -/* some Maxtor disks have bit 13 defined incorrectly so check bit 10 too */ -#define ide_id_has_flush_cache_ext(id) \ - (((id)->cfs_enable_2 & 0x2400) == 0x2400) - static inline void ide_dump_identify(u8 *id) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 2, id, 512, 0); @@ -1453,10 +1543,10 @@ static inline int hwif_to_node(ide_hwif_t *hwif) return hwif->dev ? dev_to_node(hwif->dev) : -1; } -static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive) +static inline ide_drive_t *ide_get_pair_dev(ide_drive_t *drive) { - ide_hwif_t *hwif = HWIF(drive); + ide_drive_t *peer = &drive->hwif->drives[(drive->dn ^ 1) & 1]; - return &hwif->drives[(drive->dn ^ 1) & 1]; + return peer->present ? peer : NULL; } #endif /* _IDE_H */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 3dd20900709..66c3499478b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -850,7 +850,8 @@ struct journal_s */ struct block_device *j_dev; int j_blocksize; - unsigned long long j_blk_offset; + unsigned long long j_blk_offset; + char j_devname[BDEVNAME_SIZE+24]; /* * Device which holds the client fs. For internal journal this will be diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 20838883535..9007ccdfc11 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); +s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc, 0); + s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } -static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) -{ - return __percpu_counter_sum(fbc, 1); -} - - static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc, 0); + return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 4a8ba4bf5f6..a8663890a88 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) +s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; @@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - if (set) - *pcount = 0; + *pcount = 0; } - if (set) - fbc->count = ret; + fbc->count = ret; spin_unlock(&fbc->lock); return ret; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 05ebce2881e..85c487b8572 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1345,7 +1345,7 @@ out: static void ipgre_netlink_parms(struct nlattr *data[], struct ip_tunnel_parm *parms) { - memset(parms, 0, sizeof(parms)); + memset(parms, 0, sizeof(*parms)); parms->iph.protocol = IPPROTO_GRE; diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index de6004de80b..05048e40326 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -2,8 +2,8 @@ # IP Virtual Server configuration # menuconfig IP_VS - tristate "IP virtual server support (EXPERIMENTAL)" - depends on NETFILTER + tristate "IP virtual server support" + depends on NET && INET && NETFILTER ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This |