diff options
Diffstat (limited to 'drivers/mtd')
47 files changed, 13135 insertions, 241 deletions
diff --git a/drivers/mtd/Kconfig b/drivers/mtd/Kconfig index 26f75c29944..c1b47db29bd 100644 --- a/drivers/mtd/Kconfig +++ b/drivers/mtd/Kconfig @@ -1,8 +1,6 @@ # $Id: Kconfig,v 1.11 2005/11/07 11:14:19 gleixner Exp $ -menu "Memory Technology Devices (MTD)" - -config MTD +menuconfig MTD tristate "Memory Technology Device (MTD) support" help Memory Technology Devices are flash, RAM and similar chips, often @@ -13,9 +11,10 @@ config MTD them. It will also allow you to select individual drivers for particular hardware and users of MTD devices. If unsure, say N. +if MTD + config MTD_DEBUG bool "Debugging" - depends on MTD help This turns on low-level debugging for the entire MTD sub-system. Normally, you should say 'N'. @@ -29,7 +28,6 @@ config MTD_DEBUG_VERBOSE config MTD_CONCAT tristate "MTD concatenating support" - depends on MTD help Support for concatenating several MTD devices into a single (virtual) one. This allows you to have -for example- a JFFS(2) @@ -38,7 +36,6 @@ config MTD_CONCAT config MTD_PARTITIONS bool "MTD partitioning support" - depends on MTD help If you have a device which needs to divide its flash chip(s) up into multiple 'partitions', each of which appears to the user as @@ -153,11 +150,9 @@ config MTD_AFS_PARTS 'armflash' map driver (CONFIG_MTD_ARMFLASH) does this, for example. comment "User Modules And Translation Layers" - depends on MTD config MTD_CHAR tristate "Direct char device access to MTD devices" - depends on MTD help This provides a character device for each MTD device present in the system, allowing the user to read and write directly to the @@ -166,12 +161,12 @@ config MTD_CHAR config MTD_BLKDEVS tristate "Common interface to block layer for MTD 'translation layers'" - depends on MTD && BLOCK + depends on BLOCK default n config MTD_BLOCK tristate "Caching block device access to MTD devices" - depends on MTD && BLOCK + depends on BLOCK select MTD_BLKDEVS ---help--- Although most flash chips have an erase size too large to be useful @@ -194,7 +189,7 @@ config MTD_BLOCK config MTD_BLOCK_RO tristate "Readonly block device access to MTD devices" - depends on MTD_BLOCK!=y && MTD && BLOCK + depends on MTD_BLOCK!=y && BLOCK select MTD_BLKDEVS help This allows you to mount read-only file systems (such as cramfs) @@ -206,7 +201,7 @@ config MTD_BLOCK_RO config FTL tristate "FTL (Flash Translation Layer) support" - depends on MTD && BLOCK + depends on BLOCK select MTD_BLKDEVS ---help--- This provides support for the original Flash Translation Layer which @@ -223,7 +218,7 @@ config FTL config NFTL tristate "NFTL (NAND Flash Translation Layer) support" - depends on MTD && BLOCK + depends on BLOCK select MTD_BLKDEVS ---help--- This provides support for the NAND Flash Translation Layer which is @@ -247,7 +242,7 @@ config NFTL_RW config INFTL tristate "INFTL (Inverse NAND Flash Translation Layer) support" - depends on MTD && BLOCK + depends on BLOCK select MTD_BLKDEVS ---help--- This provides support for the Inverse NAND Flash Translation @@ -265,7 +260,7 @@ config INFTL config RFD_FTL tristate "Resident Flash Disk (Flash Translation Layer) support" - depends on MTD && BLOCK + depends on BLOCK select MTD_BLKDEVS ---help--- This provides support for the flash translation layer known @@ -276,7 +271,7 @@ config RFD_FTL config SSFDC tristate "NAND SSFDC (SmartMedia) read only translation layer" - depends on MTD && BLOCK + depends on BLOCK select MTD_BLKDEVS help This enables read only access to SmartMedia formatted NAND @@ -292,5 +287,6 @@ source "drivers/mtd/nand/Kconfig" source "drivers/mtd/onenand/Kconfig" -endmenu +source "drivers/mtd/ubi/Kconfig" +endif # MTD diff --git a/drivers/mtd/Makefile b/drivers/mtd/Makefile index c130e6261ad..92055405cb3 100644 --- a/drivers/mtd/Makefile +++ b/drivers/mtd/Makefile @@ -28,3 +28,5 @@ nftl-objs := nftlcore.o nftlmount.o inftl-objs := inftlcore.o inftlmount.o obj-y += chips/ maps/ devices/ nand/ onenand/ + +obj-$(CONFIG_MTD_UBI) += ubi/ diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index 72e6d73beb4..d28e0fc85e1 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -6,7 +6,6 @@ menu "RAM/ROM/Flash chip drivers" config MTD_CFI tristate "Detect flash chips by Common Flash Interface (CFI) probe" - depends on MTD select MTD_GEN_PROBE help The Common Flash Interface specification was developed by Intel, @@ -18,7 +17,6 @@ config MTD_CFI config MTD_JEDECPROBE tristate "Detect non-CFI AMD/JEDEC-compatible flash chips" - depends on MTD select MTD_GEN_PROBE help This option enables JEDEC-style probing of flash chips which are not @@ -213,21 +211,18 @@ config MTD_CFI_UTIL config MTD_RAM tristate "Support for RAM chips in bus mapping" - depends on MTD help This option enables basic support for RAM chips accessed through a bus mapping driver. config MTD_ROM tristate "Support for ROM chips in bus mapping" - depends on MTD help This option enables basic support for ROM chips accessed through a bus mapping driver. config MTD_ABSENT tristate "Support for absent chips in bus mapping" - depends on MTD help This option enables support for a dummy probing driver used to allocated placeholder MTD devices on systems that have socketed @@ -237,7 +232,6 @@ config MTD_ABSENT with this driver will return -ENODEV upon access. config MTD_OBSOLETE_CHIPS - depends on MTD bool "Older (theoretically obsoleted now) drivers for non-CFI chips" help This option does not enable any code directly, but will allow you to @@ -250,7 +244,7 @@ config MTD_OBSOLETE_CHIPS config MTD_AMDSTD tristate "AMD compatible flash chip support (non-CFI)" - depends on MTD && MTD_OBSOLETE_CHIPS && BROKEN + depends on MTD_OBSOLETE_CHIPS && BROKEN help This option enables support for flash chips using AMD-compatible commands, including some which are not CFI-compatible and hence @@ -260,7 +254,7 @@ config MTD_AMDSTD config MTD_SHARP tristate "pre-CFI Sharp chip support" - depends on MTD && MTD_OBSOLETE_CHIPS + depends on MTD_OBSOLETE_CHIPS help This option enables support for flash chips using Sharp-compatible commands, including some which are not CFI-compatible and hence @@ -268,7 +262,7 @@ config MTD_SHARP config MTD_JEDEC tristate "JEDEC device support" - depends on MTD && MTD_OBSOLETE_CHIPS && BROKEN + depends on MTD_OBSOLETE_CHIPS && BROKEN help Enable older JEDEC flash interface devices for self programming flash. It is commonly used in older AMD chips. It is diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c index f334959a335..2f19fa78d24 100644 --- a/drivers/mtd/chips/cfi_cmdset_0001.c +++ b/drivers/mtd/chips/cfi_cmdset_0001.c @@ -15,6 +15,8 @@ * - optimized write buffer method * 02/05/2002 Christopher Hoover <ch@hpl.hp.com>/<ch@murgatroid.com> * - reworked lock/unlock/erase support for var size flash + * 21/03/2007 Rodolfo Giometti <giometti@linux.it> + * - auto unlock sectors on resume for auto locking flash on power up */ #include <linux/module.h> @@ -30,6 +32,7 @@ #include <linux/delay.h> #include <linux/interrupt.h> #include <linux/reboot.h> +#include <linux/bitmap.h> #include <linux/mtd/xip.h> #include <linux/mtd/map.h> #include <linux/mtd/mtd.h> @@ -220,6 +223,15 @@ static void fixup_use_write_buffers(struct mtd_info *mtd, void *param) } } +/* + * Some chips power-up with all sectors locked by default. + */ +static void fixup_use_powerup_lock(struct mtd_info *mtd, void *param) +{ + printk(KERN_INFO "Using auto-unlock on power-up/resume\n" ); + mtd->flags |= MTD_STUPID_LOCK; +} + static struct cfi_fixup cfi_fixup_table[] = { #ifdef CMDSET0001_DISABLE_ERASE_SUSPEND_ON_WRITE { CFI_MFR_ANY, CFI_ID_ANY, fixup_intel_strataflash, NULL }, @@ -232,6 +244,7 @@ static struct cfi_fixup cfi_fixup_table[] = { #endif { CFI_MFR_ST, 0x00ba, /* M28W320CT */ fixup_st_m28w320ct, NULL }, { CFI_MFR_ST, 0x00bb, /* M28W320CB */ fixup_st_m28w320cb, NULL }, + { MANUFACTURER_INTEL, 0x891c, fixup_use_powerup_lock, NULL, }, { 0, 0, NULL, NULL } }; @@ -460,6 +473,7 @@ static struct mtd_info *cfi_intelext_setup(struct mtd_info *mtd) mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].offset = (j*devsize)+offset; mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].erasesize = ersize; mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].numblocks = ernum; + mtd->eraseregions[(j*cfi->cfiq->NumEraseRegions)+i].lockmap = kmalloc(ernum / 8 + 1, GFP_KERNEL); } offset += (ersize * ernum); } @@ -1825,8 +1839,7 @@ static void cfi_intelext_sync (struct mtd_info *mtd) } } -#ifdef DEBUG_LOCK_BITS -static int __xipram do_printlockstatus_oneblock(struct map_info *map, +static int __xipram do_getlockstatus_oneblock(struct map_info *map, struct flchip *chip, unsigned long adr, int len, void *thunk) @@ -1840,8 +1853,17 @@ static int __xipram do_printlockstatus_oneblock(struct map_info *map, chip->state = FL_JEDEC_QUERY; status = cfi_read_query(map, adr+(2*ofs_factor)); xip_enable(map, chip, 0); + return status; +} + +#ifdef DEBUG_LOCK_BITS +static int __xipram do_printlockstatus_oneblock(struct map_info *map, + struct flchip *chip, + unsigned long adr, + int len, void *thunk) +{ printk(KERN_DEBUG "block status register for 0x%08lx is %x\n", - adr, status); + adr, do_getlockstatus_oneblock(map, chip, adr, len, thunk)); return 0; } #endif @@ -2216,14 +2238,45 @@ static int cfi_intelext_get_user_prot_info(struct mtd_info *mtd, #endif +static void cfi_intelext_save_locks(struct mtd_info *mtd) +{ + struct mtd_erase_region_info *region; + int block, status, i; + unsigned long adr; + size_t len; + + for (i = 0; i < mtd->numeraseregions; i++) { + region = &mtd->eraseregions[i]; + if (!region->lockmap) + continue; + + for (block = 0; block < region->numblocks; block++){ + len = region->erasesize; + adr = region->offset + block * len; + + status = cfi_varsize_frob(mtd, + do_getlockstatus_oneblock, adr, len, 0); + if (status) + set_bit(block, region->lockmap); + else + clear_bit(block, region->lockmap); + } + } +} + static int cfi_intelext_suspend(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; + struct cfi_pri_intelext *extp = cfi->cmdset_priv; int i; struct flchip *chip; int ret = 0; + if ((mtd->flags & MTD_STUPID_LOCK) + && extp && (extp->FeatureSupport & (1 << 5))) + cfi_intelext_save_locks(mtd); + for (i=0; !ret && i<cfi->numchips; i++) { chip = &cfi->chips[i]; @@ -2285,10 +2338,33 @@ static int cfi_intelext_suspend(struct mtd_info *mtd) return ret; } +static void cfi_intelext_restore_locks(struct mtd_info *mtd) +{ + struct mtd_erase_region_info *region; + int block, i; + unsigned long adr; + size_t len; + + for (i = 0; i < mtd->numeraseregions; i++) { + region = &mtd->eraseregions[i]; + if (!region->lockmap) + continue; + + for (block = 0; block < region->numblocks; block++) { + len = region->erasesize; + adr = region->offset + block * len; + + if (!test_bit(block, region->lockmap)) + cfi_intelext_unlock(mtd, adr, len); + } + } +} + static void cfi_intelext_resume(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; + struct cfi_pri_intelext *extp = cfi->cmdset_priv; int i; struct flchip *chip; @@ -2307,6 +2383,10 @@ static void cfi_intelext_resume(struct mtd_info *mtd) spin_unlock(chip->mutex); } + + if ((mtd->flags & MTD_STUPID_LOCK) + && extp && (extp->FeatureSupport & (1 << 5))) + cfi_intelext_restore_locks(mtd); } static int cfi_intelext_reset(struct mtd_info *mtd) @@ -2347,12 +2427,19 @@ static void cfi_intelext_destroy(struct mtd_info *mtd) { struct map_info *map = mtd->priv; struct cfi_private *cfi = map->fldrv_priv; + struct mtd_erase_region_info *region; + int i; cfi_intelext_reset(mtd); unregister_reboot_notifier(&mtd->reboot_notifier); kfree(cfi->cmdset_priv); kfree(cfi->cfiq); kfree(cfi->chips[0].priv); kfree(cfi); + for (i = 0; i < mtd->numeraseregions; i++) { + region = &mtd->eraseregions[i]; + if (region->lockmap) + kfree(region->lockmap); + } kfree(mtd->eraseregions); } diff --git a/drivers/mtd/chips/fwh_lock.h b/drivers/mtd/chips/fwh_lock.h index 77303ce5dcf..ab44f2b996f 100644 --- a/drivers/mtd/chips/fwh_lock.h +++ b/drivers/mtd/chips/fwh_lock.h @@ -65,11 +65,12 @@ static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip, return ret; } + chip->oldstate = chip->state; chip->state = xxlt->state; map_write(map, CMD(xxlt->val), adr); /* Done and happy. */ - chip->state = FL_READY; + chip->state = chip->oldstate; put_chip(map, chip, adr); spin_unlock(chip->mutex); return 0; diff --git a/drivers/mtd/devices/Kconfig b/drivers/mtd/devices/Kconfig index 440f6851da6..690c94236d7 100644 --- a/drivers/mtd/devices/Kconfig +++ b/drivers/mtd/devices/Kconfig @@ -6,7 +6,7 @@ menu "Self-contained MTD device drivers" config MTD_PMC551 tristate "Ramix PMC551 PCI Mezzanine RAM card support" - depends on MTD && PCI + depends on PCI ---help--- This provides a MTD device driver for the Ramix PMC551 RAM PCI card from Ramix Inc. <http://www.ramix.com/products/memory/pmc551.html>. @@ -40,7 +40,7 @@ config MTD_PMC551_DEBUG config MTD_MS02NV tristate "DEC MS02-NV NVRAM module support" - depends on MTD && MACH_DECSTATION + depends on MACH_DECSTATION help This is an MTD driver for the DEC's MS02-NV (54-20948-01) battery backed-up NVRAM module. The module was originally meant as an NFS @@ -54,15 +54,23 @@ config MTD_MS02NV config MTD_DATAFLASH tristate "Support for AT45xxx DataFlash" - depends on MTD && SPI_MASTER && EXPERIMENTAL + depends on SPI_MASTER && EXPERIMENTAL help This enables access to AT45xxx DataFlash chips, using SPI. Sometimes DataFlash chips are packaged inside MMC-format cards; at this writing, the MMC stack won't handle those. +config MTD_DATAFLASH26 + tristate "AT91RM9200 DataFlash AT26xxx" + depends on MTD && ARCH_AT91RM9200 && AT91_SPI + help + This enables access to the DataFlash chip (AT26xxx) on an + AT91RM9200-based board. + If you have such a board and such a DataFlash, say 'Y'. + config MTD_M25P80 tristate "Support for M25 SPI Flash" - depends on MTD && SPI_MASTER && EXPERIMENTAL + depends on SPI_MASTER && EXPERIMENTAL help This enables access to ST M25P80 and similar SPI flash chips, used for program and data storage. Set up your spi devices @@ -70,7 +78,6 @@ config MTD_M25P80 config MTD_SLRAM tristate "Uncached system RAM" - depends on MTD help If your CPU cannot cache all of the physical memory in your machine, you can still use it for storage or swap by using this driver to @@ -78,7 +85,6 @@ config MTD_SLRAM config MTD_PHRAM tristate "Physical system RAM" - depends on MTD help This is a re-implementation of the slram driver above. @@ -88,7 +94,7 @@ config MTD_PHRAM config MTD_LART tristate "28F160xx flash driver for LART" - depends on SA1100_LART && MTD + depends on SA1100_LART help This enables the flash driver for LART. Please note that you do not need any mapping/chip driver for LART. This one does it all @@ -96,7 +102,6 @@ config MTD_LART config MTD_MTDRAM tristate "Test driver using RAM" - depends on MTD help This enables a test MTD device driver which uses vmalloc() to provide storage. You probably want to say 'N' unless you're @@ -136,7 +141,7 @@ config MTDRAM_ABS_POS config MTD_BLOCK2MTD tristate "MTD using block device" - depends on MTD && BLOCK + depends on BLOCK help This driver allows a block device to appear as an MTD. It would generally be used in the following cases: @@ -150,7 +155,6 @@ comment "Disk-On-Chip Device Drivers" config MTD_DOC2000 tristate "M-Systems Disk-On-Chip 2000 and Millennium (DEPRECATED)" - depends on MTD select MTD_DOCPROBE select MTD_NAND_IDS ---help--- @@ -173,7 +177,6 @@ config MTD_DOC2000 config MTD_DOC2001 tristate "M-Systems Disk-On-Chip Millennium-only alternative driver (DEPRECATED)" - depends on MTD select MTD_DOCPROBE select MTD_NAND_IDS ---help--- @@ -195,7 +198,6 @@ config MTD_DOC2001 config MTD_DOC2001PLUS tristate "M-Systems Disk-On-Chip Millennium Plus" - depends on MTD select MTD_DOCPROBE select MTD_NAND_IDS ---help--- diff --git a/drivers/mtd/devices/Makefile b/drivers/mtd/devices/Makefile index 0f788d5c4bf..8ab568b3f53 100644 --- a/drivers/mtd/devices/Makefile +++ b/drivers/mtd/devices/Makefile @@ -16,4 +16,5 @@ obj-$(CONFIG_MTD_MTDRAM) += mtdram.o obj-$(CONFIG_MTD_LART) += lart.o obj-$(CONFIG_MTD_BLOCK2MTD) += block2mtd.o obj-$(CONFIG_MTD_DATAFLASH) += mtd_dataflash.o +obj-$(CONFIG_MTD_DATAFLASH26) += at91_dataflash26.o obj-$(CONFIG_MTD_M25P80) += m25p80.o diff --git a/drivers/mtd/devices/at91_dataflash26.c b/drivers/mtd/devices/at91_dataflash26.c new file mode 100644 index 00000000000..64ce37f986f --- /dev/null +++ b/drivers/mtd/devices/at91_dataflash26.c @@ -0,0 +1,485 @@ +/* + * Atmel DataFlash driver for Atmel AT91RM9200 (Thunder) + * This is a largely modified version of at91_dataflash.c that + * supports AT26xxx dataflash chips. The original driver supports + * AT45xxx chips. + * + * Note: This driver was only tested with an AT26F004. It should be + * easy to make it work with other AT26xxx dataflash devices, though. + * + * Copyright (C) 2007 Hans J. Koch <hjk@linutronix.de> + * original Copyright (C) SAN People (Pty) Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. +*/ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/mtd/mtd.h> + +#include <asm/arch/at91_spi.h> + +#define DATAFLASH_MAX_DEVICES 4 /* max number of dataflash devices */ + +#define MANUFACTURER_ID_ATMEL 0x1F + +/* command codes */ + +#define AT26_OP_READ_STATUS 0x05 +#define AT26_OP_READ_DEV_ID 0x9F +#define AT26_OP_ERASE_PAGE_4K 0x20 +#define AT26_OP_READ_ARRAY_FAST 0x0B +#define AT26_OP_SEQUENTIAL_WRITE 0xAF +#define AT26_OP_WRITE_ENABLE 0x06 +#define AT26_OP_WRITE_DISABLE 0x04 +#define AT26_OP_SECTOR_PROTECT 0x36 +#define AT26_OP_SECTOR_UNPROTECT 0x39 + +/* status register bits */ + +#define AT26_STATUS_BUSY 0x01 +#define AT26_STATUS_WRITE_ENABLE 0x02 + +struct dataflash_local +{ + int spi; /* SPI chip-select number */ + unsigned int page_size; /* number of bytes per page */ +}; + + +/* Detected DataFlash devices */ +static struct mtd_info* mtd_devices[DATAFLASH_MAX_DEVICES]; +static int nr_devices = 0; + +/* Allocate a single SPI transfer descriptor. We're assuming that if multiple + SPI transfers occur at the same time, spi_access_bus() will serialize them. + If this is not valid, then either (i) each dataflash 'priv' structure + needs it's own transfer descriptor, (ii) we lock this one, or (iii) use + another mechanism. */ +static struct spi_transfer_list* spi_transfer_desc; + +/* + * Perform a SPI transfer to access the DataFlash device. + */ +static int do_spi_transfer(int nr, char* tx, int tx_len, char* rx, int rx_len, + char* txnext, int txnext_len, char* rxnext, int rxnext_len) +{ + struct spi_transfer_list* list = spi_transfer_desc; + + list->tx[0] = tx; list->txlen[0] = tx_len; + list->rx[0] = rx; list->rxlen[0] = rx_len; + + list->tx[1] = txnext; list->txlen[1] = txnext_len; + list->rx[1] = rxnext; list->rxlen[1] = rxnext_len; + + list->nr_transfers = nr; + /* Note: spi_transfer() always returns 0, there are no error checks */ + return spi_transfer(list); +} + +/* + * Return the status of the DataFlash device. + */ +static unsigned char at91_dataflash26_status(void) +{ + unsigned char command[2]; + + command[0] = AT26_OP_READ_STATUS; + command[1] = 0; + + do_spi_transfer(1, command, 2, command, 2, NULL, 0, NULL, 0); + + return command[1]; +} + +/* + * Poll the DataFlash device until it is READY. + */ +static unsigned char at91_dataflash26_waitready(void) +{ + unsigned char status; + + while (1) { + status = at91_dataflash26_status(); + if (!(status & AT26_STATUS_BUSY)) + return status; + } +} + +/* + * Enable/disable write access + */ + static void at91_dataflash26_write_enable(int enable) +{ + unsigned char cmd[2]; + + DEBUG(MTD_DEBUG_LEVEL3, "write_enable: enable=%i\n", enable); + + if (enable) + cmd[0] = AT26_OP_WRITE_ENABLE; + else + cmd[0] = AT26_OP_WRITE_DISABLE; + cmd[1] = 0; + + do_spi_transfer(1, cmd, 2, cmd, 2, NULL, 0, NULL, 0); +} + +/* + * Protect/unprotect sector + */ + static void at91_dataflash26_sector_protect(loff_t addr, int protect) +{ + unsigned char cmd[4]; + + DEBUG(MTD_DEBUG_LEVEL3, "sector_protect: addr=0x%06x prot=%d\n", + addr, protect); + + if (protect) + cmd[0] = AT26_OP_SECTOR_PROTECT; + else + cmd[0] = AT26_OP_SECTOR_UNPROTECT; + cmd[1] = (addr & 0x00FF0000) >> 16; + cmd[2] = (addr & 0x0000FF00) >> 8; + cmd[3] = (addr & 0x000000FF); + + do_spi_transfer(1, cmd, 4, cmd, 4, NULL, 0, NULL, 0); +} + +/* + * Erase blocks of flash. + */ +static int at91_dataflash26_erase(struct mtd_info *mtd, + struct erase_info *instr) +{ + struct dataflash_local *priv = (struct dataflash_local *) mtd->priv; + unsigned char cmd[4]; + + DEBUG(MTD_DEBUG_LEVEL1, "dataflash_erase: addr=0x%06x len=%i\n", + instr->addr, instr->len); + + /* Sanity checks */ + if (priv->page_size != 4096) + return -EINVAL; /* Can't handle other sizes at the moment */ + + if ( ((instr->len % mtd->erasesize) != 0) + || ((instr->len % priv->page_size) != 0) + || ((instr->addr % priv->page_size) != 0) + || ((instr->addr + instr->len) > mtd->size)) + return -EINVAL; + + spi_access_bus(priv->spi); + + while (instr->len > 0) { + at91_dataflash26_write_enable(1); + at91_dataflash26_sector_protect(instr->addr, 0); + at91_dataflash26_write_enable(1); + cmd[0] = AT26_OP_ERASE_PAGE_4K; + cmd[1] = (instr->addr & 0x00FF0000) >> 16; + cmd[2] = (instr->addr & 0x0000FF00) >> 8; + cmd[3] = (instr->addr & 0x000000FF); + + DEBUG(MTD_DEBUG_LEVEL3, "ERASE: (0x%02x) 0x%02x 0x%02x" + "0x%02x\n", + cmd[0], cmd[1], cmd[2], cmd[3]); + + do_spi_transfer(1, cmd, 4, cmd, 4, NULL, 0, NULL, 0); + at91_dataflash26_waitready(); + + instr->addr += priv->page_size; /* next page */ + instr->len -= priv->page_size; + } + + at91_dataflash26_write_enable(0); + spi_release_bus(priv->spi); + + /* Inform MTD subsystem that erase is complete */ + instr->state = MTD_ERASE_DONE; + if (instr->callback) + instr->callback(instr); + + return 0; +} + +/* + * Read from the DataFlash device. + * from : Start offset in flash device + * len : Number of bytes to read + * retlen : Number of bytes actually read + * buf : Buffer that will receive data + */ +static int at91_dataflash26_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, u_char *buf) +{ + struct dataflash_local *priv = (struct dataflash_local *) mtd->priv; + unsigned char cmd[5]; + + DEBUG(MTD_DEBUG_LEVEL1, "dataflash_read: %lli .. %lli\n", + from, from+len); + + *retlen = 0; + + /* Sanity checks */ + if (!len) + return 0; + if (from + len > mtd->size) + return -EINVAL; + + cmd[0] = AT26_OP_READ_ARRAY_FAST; + cmd[1] = (from & 0x00FF0000) >> 16; + cmd[2] = (from & 0x0000FF00) >> 8; + cmd[3] = (from & 0x000000FF); + /* cmd[4] is a "Don't care" byte */ + + DEBUG(MTD_DEBUG_LEVEL3, "READ: (0x%02x) 0x%02x 0x%02x 0x%02x\n", + cmd[0], cmd[1], cmd[2], cmd[3]); + + spi_access_bus(priv->spi); + do_spi_transfer(2, cmd, 5, cmd, 5, buf, len, buf, len); + spi_release_bus(priv->spi); + + *retlen = len; + return 0; +} + +/* + * Write to the DataFlash device. + * to : Start offset in flash device + * len : Number of bytes to write + * retlen : Number of bytes actually written + * buf : Buffer containing the data + */ +static int at91_dataflash26_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + struct dataflash_local *priv = (struct dataflash_local *) mtd->priv; + unsigned int addr, buf_index = 0; + int ret = -EIO, sector, last_sector; + unsigned char status, cmd[5]; + + DEBUG(MTD_DEBUG_LEVEL1, "dataflash_write: %lli .. %lli\n", to, to+len); + + *retlen = 0; + + /* Sanity checks */ + if (!len) + return 0; + if (to + len > mtd->size) + return -EINVAL; + + spi_access_bus(priv->spi); + + addr = to; + last_sector = -1; + + while (buf_index < len) { + sector = addr / priv->page_size; + /* Write first byte if a new sector begins */ + if (sector != last_sector) { + at91_dataflash26_write_enable(1); + at91_dataflash26_sector_protect(addr, 0); + at91_dataflash26_write_enable(1); + + /* Program first byte of a new sector */ + cmd[0] = AT26_OP_SEQUENTIAL_WRITE; + cmd[1] = (addr & 0x00FF0000) >> 16; + cmd[2] = (addr & 0x0000FF00) >> 8; + cmd[3] = (addr & 0x000000FF); + cmd[4] = buf[buf_index++]; + do_spi_transfer(1, cmd, 5, cmd, 5, NULL, 0, NULL, 0); + status = at91_dataflash26_waitready(); + addr++; + /* On write errors, the chip resets the write enable + flag. This also happens after the last byte of a + sector is successfully programmed. */ + if ( ( !(status & AT26_STATUS_WRITE_ENABLE)) + && ((addr % priv->page_size) != 0) ) { + DEBUG(MTD_DEBUG_LEVEL1, + "write error1: addr=0x%06x, " + "status=0x%02x\n", addr, status); + goto write_err; + } + (*retlen)++; + last_sector = sector; + } + + /* Write subsequent bytes in the same sector */ + cmd[0] = AT26_OP_SEQUENTIAL_WRITE; + cmd[1] = buf[buf_index++]; + do_spi_transfer(1, cmd, 2, cmd, 2, NULL, 0, NULL, 0); + status = at91_dataflash26_waitready(); + addr++; + + if ( ( !(status & AT26_STATUS_WRITE_ENABLE)) + && ((addr % priv->page_size) != 0) ) { + DEBUG(MTD_DEBUG_LEVEL1, "write error2: addr=0x%06x, " + "status=0x%02x\n", addr, status); + goto write_err; + } + + (*retlen)++; + } + + ret = 0; + at91_dataflash26_write_enable(0); +write_err: + spi_release_bus(priv->spi); + return ret; +} + +/* + * Initialize and register DataFlash device with MTD subsystem. + */ +static int __init add_dataflash(int channel, char *name, int nr_pages, + int pagesize) +{ + struct mtd_info *device; + struct dataflash_local *priv; + + if (nr_devices >= DATAFLASH_MAX_DEVICES) { + printk(KERN_ERR "at91_dataflash26: Too many devices " + "detected\n"); + return 0; + } + + device = kzalloc(sizeof(struct mtd_info) + strlen(name) + 8, + GFP_KERNEL); + if (!device) + return -ENOMEM; + + device->name = (char *)&device[1]; + sprintf(device->name, "%s.spi%d", name, channel); + device->size = nr_pages * pagesize; + device->erasesize = pagesize; + device->owner = THIS_MODULE; + device->type = MTD_DATAFLASH; + device->flags = MTD_CAP_NORFLASH; + device->erase = at91_dataflash26_erase; + device->read = at91_dataflash26_read; + device->write = at91_dataflash26_write; + + priv = (struct dataflash_local *)kzalloc(sizeof(struct dataflash_local), + GFP_KERNEL); + if (!priv) { + kfree(device); + return -ENOMEM; + } + + priv->spi = channel; + priv->page_size = pagesize; + device->priv = priv; + + mtd_devices[nr_devices] = device; + nr_devices++; + printk(KERN_INFO "at91_dataflash26: %s detected [spi%i] (%i bytes)\n", + name, channel, device->size); + + return add_mtd_device(device); +} + +/* + * Detect and initialize DataFlash device connected to specified SPI channel. + * + */ + +struct dataflash26_types { + unsigned char id0; + unsigned char id1; + char *name; + int pagesize; + int nr_pages; +}; + +struct dataflash26_types df26_types[] = { + { + .id0 = 0x04, + .id1 = 0x00, + .name = "AT26F004", + .pagesize = 4096, + .nr_pages = 128, + }, + { + .id0 = 0x45, + .id1 = 0x01, + .name = "AT26DF081A", /* Not tested ! */ + .pagesize = 4096, + .nr_pages = 256, + }, +}; + +static int __init at91_dataflash26_detect(int channel) +{ + unsigned char status, cmd[5]; + int i; + + spi_access_bus(channel); + status = at91_dataflash26_status(); + + if (status == 0 || status == 0xff) { + printk(KERN_ERR "at91_dataflash26_detect: status error %d\n", + status); + spi_release_bus(channel); + return -ENODEV; + } + + cmd[0] = AT26_OP_READ_DEV_ID; + do_spi_transfer(1, cmd, 5, cmd, 5, NULL, 0, NULL, 0); + spi_release_bus(channel); + + if (cmd[1] != MANUFACTURER_ID_ATMEL) + return -ENODEV; + + for (i = 0; i < ARRAY_SIZE(df26_types); i++) { + if ( cmd[2] == df26_types[i].id0 + && cmd[3] == df26_types[i].id1) + return add_dataflash(channel, + df26_types[i].name, + df26_types[i].nr_pages, + df26_types[i].pagesize); + } + + printk(KERN_ERR "at91_dataflash26_detect: Unsupported device " + "(0x%02x/0x%02x)\n", cmd[2], cmd[3]); + return -ENODEV; +} + +static int __init at91_dataflash26_init(void) +{ + spi_transfer_desc = kmalloc(sizeof(struct spi_transfer_list), + GFP_KERNEL); + if (!spi_transfer_desc) + return -ENOMEM; + + /* DataFlash (SPI chip select 0) */ + at91_dataflash26_detect(0); + +#ifdef CONFIG_MTD_AT91_DATAFLASH_CARD + /* DataFlash card (SPI chip select 3) */ + at91_dataflash26_detect(3); +#endif + return 0; +} + +static void __exit at91_dataflash26_exit(void) +{ + int i; + + for (i = 0; i < DATAFLASH_MAX_DEVICES; i++) { + if (mtd_devices[i]) { + del_mtd_device(mtd_devices[i]); + kfree(mtd_devices[i]->priv); + kfree(mtd_devices[i]); + } + } + nr_devices = 0; + kfree(spi_transfer_desc); +} + +module_init(at91_dataflash26_init); +module_exit(at91_dataflash26_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Hans J. Koch"); +MODULE_DESCRIPTION("DataFlash AT26xxx driver for Atmel AT91RM9200"); diff --git a/drivers/mtd/devices/block2mtd.c b/drivers/mtd/devices/block2mtd.c index f9f2ce7806b..ce47544dc12 100644 --- a/drivers/mtd/devices/block2mtd.c +++ b/drivers/mtd/devices/block2mtd.c @@ -40,56 +40,9 @@ struct block2mtd_dev { static LIST_HEAD(blkmtd_device_list); -#define PAGE_READAHEAD 64 -static void cache_readahead(struct address_space *mapping, int index) +static struct page* page_read(struct address_space *mapping, int index) { filler_t *filler = (filler_t*)mapping->a_ops->readpage; - int i, pagei; - unsigned ret = 0; - unsigned long end_index; - struct page *page; - LIST_HEAD(page_pool); - struct inode *inode = mapping->host; - loff_t isize = i_size_read(inode); - - if (!isize) { - INFO("iSize=0 in cache_readahead\n"); - return; - } - - end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); - - read_lock_irq(&mapping->tree_lock); - for (i = 0; i < PAGE_READAHEAD; i++) { - pagei = index + i; - if (pagei > end_index) { - INFO("Overrun end of disk in cache readahead\n"); - break; - } - page = radix_tree_lookup(&mapping->page_tree, pagei); - if (page && (!i)) - break; - if (page) - continue; - read_unlock_irq(&mapping->tree_lock); - page = page_cache_alloc_cold(mapping); - read_lock_irq(&mapping->tree_lock); - if (!page) - break; - page->index = pagei; - list_add(&page->lru, &page_pool); - ret++; - } - read_unlock_irq(&mapping->tree_lock); - if (ret) - read_cache_pages(mapping, &page_pool, filler, NULL); -} - - -static struct page* page_readahead(struct address_space *mapping, int index) -{ - filler_t *filler = (filler_t*)mapping->a_ops->readpage; - cache_readahead(mapping, index); return read_cache_page(mapping, index, filler, NULL); } @@ -105,14 +58,14 @@ static int _block2mtd_erase(struct block2mtd_dev *dev, loff_t to, size_t len) u_long *max; while (pages) { - page = page_readahead(mapping, index); + page = page_read(mapping, index); if (!page) return -ENOMEM; if (IS_ERR(page)) return PTR_ERR(page); - max = (u_long*)page_address(page) + PAGE_SIZE; - for (p=(u_long*)page_address(page); p<max; p++) + max = page_address(page) + PAGE_SIZE; + for (p=page_address(page); p<max; p++) if (*p != -1UL) { lock_page(page); memset(page_address(page), 0xff, PAGE_SIZE); @@ -174,8 +127,7 @@ static int block2mtd_read(struct mtd_info *mtd, loff_t from, size_t len, cpylen = len; // this page len = len - cpylen; - // Get page - page = page_readahead(dev->blkdev->bd_inode->i_mapping, index); + page = page_read(dev->blkdev->bd_inode->i_mapping, index); if (!page) return -ENOMEM; if (IS_ERR(page)) @@ -213,8 +165,7 @@ static int _block2mtd_write(struct block2mtd_dev *dev, const u_char *buf, cpylen = len; // this page len = len - cpylen; - // Get page - page = page_readahead(mapping, index); + page = page_read(mapping, index); if (!page) return -ENOMEM; if (IS_ERR(page)) @@ -308,9 +259,9 @@ static struct block2mtd_dev *add_device(char *devname, int erase_size) /* We might not have rootfs mounted at this point. Try to resolve the device name by other means. */ - dev_t dev = name_to_dev_t(devname); - if (dev != 0) { - bdev = open_by_devnum(dev, FMODE_WRITE | FMODE_READ); + dev_t devt = name_to_dev_t(devname); + if (devt) { + bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ); } } #endif diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index bbf0553bdb2..d990d8141ef 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -6,7 +6,6 @@ menu "Mapping drivers for chip access" config MTD_COMPLEX_MAPPINGS bool "Support non-linear mappings of flash chips" - depends on MTD help This causes the chip drivers to allow for complicated paged mappings of flash chips. @@ -69,6 +68,39 @@ config MTD_PHYSMAP_OF physically into the CPU's memory. The mapping description here is taken from OF device tree. +config MTD_PMC_MSP_EVM + tristate "CFI Flash device mapped on PMC-Sierra MSP" + depends on PMC_MSP && MTD_CFI + select MTD_PARTITIONS + help + This provides a 'mapping' driver which support the way + in which user-programmable flash chips are connected on the + PMC-Sierra MSP eval/demo boards + +choice + prompt "Maximum mappable memory avialable for flash IO" + depends on MTD_PMC_MSP_EVM + default MSP_FLASH_MAP_LIMIT_32M + +config MSP_FLASH_MAP_LIMIT_32M + bool "32M" + +endchoice + +config MSP_FLASH_MAP_LIMIT + hex + default "0x02000000" + depends on MSP_FLASH_MAP_LIMIT_32M + +config MTD_PMC_MSP_RAMROOT + tristate "Embedded RAM block device for root on PMC-Sierra MSP" + depends on PMC_MSP_EMBEDDED_ROOTFS && \ + (MTD_BLOCK || MTD_BLOCK_RO) && \ + MTD_RAM + help + This provides support for the embedded root file system + on PMC MSP devices. This memory is mapped as a MTD block device. + config MTD_SUN_UFLASH tristate "Sun Microsystems userflash support" depends on SPARC && MTD_CFI @@ -240,13 +272,13 @@ config MTD_NETtel config MTD_ALCHEMY tristate "AMD Alchemy Pb1xxx/Db1xxx/RDK MTD support" - depends on SOC_AU1X00 + depends on SOC_AU1X00 && MTD_PARTITIONS && MTD_CFI help Flash memory access on AMD Alchemy Pb/Db/RDK Reference Boards config MTD_MTX1 tristate "4G Systems MTX-1 Flash device" - depends on MIPS && MIPS_MTX1 + depends on MIPS_MTX1 && MTD_CFI help Flash memory access on 4G Systems MTX-1 Board. If you have one of these boards and would like to use the flash chips on it, say 'Y'. @@ -384,7 +416,7 @@ config MTD_TQM834x config MTD_OCELOT tristate "Momenco Ocelot boot flash device" - depends on MIPS && MOMENCO_OCELOT + depends on MOMENCO_OCELOT help This enables access routines for the boot flash device and for the NVRAM on the Momenco Ocelot board. If you have one of these boards @@ -517,7 +549,7 @@ config MTD_OMAP_NOR # This needs CFI or JEDEC, depending on the cards found. config MTD_PCI tristate "PCI MTD driver" - depends on MTD && PCI && MTD_COMPLEX_MAPPINGS + depends on PCI && MTD_COMPLEX_MAPPINGS help Mapping for accessing flash devices on add-in cards like the Intel XScale IQ80310 card, and the Intel EBSA285 card in blank ROM programming mode @@ -527,7 +559,7 @@ config MTD_PCI config MTD_PCMCIA tristate "PCMCIA MTD driver" - depends on MTD && PCMCIA && MTD_COMPLEX_MAPPINGS && BROKEN + depends on PCMCIA && MTD_COMPLEX_MAPPINGS && BROKEN help Map driver for accessing PCMCIA linear flash memory cards. These cards are usually around 4-16MiB in size. This does not include @@ -591,13 +623,12 @@ config MTD_BAST_MAXSIZE config MTD_SHARP_SL bool "ROM mapped on Sharp SL Series" - depends on MTD && ARCH_PXA + depends on ARCH_PXA help This enables access to the flash chip on the Sharp SL Series of PDAs. config MTD_PLATRAM tristate "Map driver for platform device RAM (mtd-ram)" - depends on MTD select MTD_RAM help Map driver for RAM areas described via the platform device diff --git a/drivers/mtd/maps/Makefile b/drivers/mtd/maps/Makefile index 071d0bf922b..de036c5e613 100644 --- a/drivers/mtd/maps/Makefile +++ b/drivers/mtd/maps/Makefile @@ -27,6 +27,8 @@ obj-$(CONFIG_MTD_CEIVA) += ceiva.o obj-$(CONFIG_MTD_OCTAGON) += octagon-5066.o obj-$(CONFIG_MTD_PHYSMAP) += physmap.o obj-$(CONFIG_MTD_PHYSMAP_OF) += physmap_of.o +obj-$(CONFIG_MTD_PMC_MSP_EVM) += pmcmsp-flash.o +obj-$(CONFIG_MTD_PMC_MSP_RAMROOT)+= pmcmsp-ramroot.o obj-$(CONFIG_MTD_PNC2000) += pnc2000.o obj-$(CONFIG_MTD_PCMCIA) += pcmciamtd.o obj-$(CONFIG_MTD_RPXLITE) += rpxlite.o diff --git a/drivers/mtd/maps/alchemy-flash.c b/drivers/mtd/maps/alchemy-flash.c index 7fc8097e41d..84fbe0e8c47 100644 --- a/drivers/mtd/maps/alchemy-flash.c +++ b/drivers/mtd/maps/alchemy-flash.c @@ -1,10 +1,7 @@ /* * Flash memory access on AMD Alchemy evaluation boards * - * $Id: alchemy-flash.c,v 1.2 2005/11/07 11:14:26 gleixner Exp $ - * * (C) 2003, 2004 Pete Popov <ppopov@embeddedalley.com> - * */ #include <linux/init.h> @@ -18,12 +15,6 @@ #include <asm/io.h> -#ifdef DEBUG_RW -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - #ifdef CONFIG_MIPS_PB1000 #define BOARD_MAP_NAME "Pb1000 Flash" #define BOARD_FLASH_SIZE 0x00800000 /* 8MB */ diff --git a/drivers/mtd/maps/ck804xrom.c b/drivers/mtd/maps/ck804xrom.c index 3d4a4d8ac78..688ef495888 100644 --- a/drivers/mtd/maps/ck804xrom.c +++ b/drivers/mtd/maps/ck804xrom.c @@ -338,7 +338,7 @@ static int __init init_ck804xrom(void) } return -ENXIO; #if 0 - return pci_module_init(&ck804xrom_driver); + return pci_register_driver(&ck804xrom_driver); #endif } diff --git a/drivers/mtd/maps/plat-ram.c b/drivers/mtd/maps/plat-ram.c index 2b6504ecbbd..894c0b27128 100644 --- a/drivers/mtd/maps/plat-ram.c +++ b/drivers/mtd/maps/plat-ram.c @@ -169,7 +169,8 @@ static int platram_probe(struct platform_device *pdev) goto exit_free; } - dev_dbg(&pdev->dev, "got platform resource %p (0x%lx)\n", res, res->start); + dev_dbg(&pdev->dev, "got platform resource %p (0x%llx)\n", res, + (unsigned long long)res->start); /* setup map parameters */ diff --git a/drivers/mtd/maps/pmcmsp-flash.c b/drivers/mtd/maps/pmcmsp-flash.c new file mode 100644 index 00000000000..7e0377ec1c4 --- /dev/null +++ b/drivers/mtd/maps/pmcmsp-flash.c @@ -0,0 +1,184 @@ +/* + * Mapping of a custom board with both AMD CFI and JEDEC flash in partitions. + * Config with both CFI and JEDEC device support. + * + * Basically physmap.c with the addition of partitions and + * an array of mapping info to accomodate more than one flash type per board. + * + * Copyright 2005-2007 PMC-Sierra, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/map.h> +#include <linux/mtd/partitions.h> + +#include <asm/io.h> + +#include <msp_prom.h> +#include <msp_regs.h> + + +static struct mtd_info **msp_flash; +static struct mtd_partition **msp_parts; +static struct map_info *msp_maps; +static int fcnt; + +#define DEBUG_MARKER printk(KERN_NOTICE "%s[%d]\n",__FUNCTION__,__LINE__) + +int __init init_msp_flash(void) +{ + int i, j; + int offset, coff; + char *env; + int pcnt; + char flash_name[] = "flash0"; + char part_name[] = "flash0_0"; + unsigned addr, size; + + /* If ELB is disabled by "ful-mux" mode, we can't get at flash */ + if ((*DEV_ID_REG & DEV_ID_SINGLE_PC) && + (*ELB_1PC_EN_REG & SINGLE_PCCARD)) { + printk(KERN_NOTICE "Single PC Card mode: no flash access\n"); + return -ENXIO; + } + + /* examine the prom environment for flash devices */ + for (fcnt = 0; (env = prom_getenv(flash_name)); fcnt++) + flash_name[5] = '0' + fcnt + 1; + + if (fcnt < 1) + return -ENXIO; + + printk(KERN_NOTICE "Found %d PMC flash devices\n", fcnt); + msp_flash = (struct mtd_info **)kmalloc( + fcnt * sizeof(struct map_info *), GFP_KERNEL); + msp_parts = (struct mtd_partition **)kmalloc( + fcnt * sizeof(struct mtd_partition *), GFP_KERNEL); + msp_maps = (struct map_info *)kmalloc( + fcnt * sizeof(struct mtd_info), GFP_KERNEL); + memset(msp_maps, 0, fcnt * sizeof(struct mtd_info)); + + /* loop over the flash devices, initializing each */ + for (i = 0; i < fcnt; i++) { + /* examine the prom environment for flash partititions */ + part_name[5] = '0' + i; + part_name[7] = '0'; + for (pcnt = 0; (env = prom_getenv(part_name)); pcnt++) + part_name[7] = '0' + pcnt + 1; + + if (pcnt == 0) { + printk(KERN_NOTICE "Skipping flash device %d " + "(no partitions defined)\n", i); + continue; + } + + msp_parts[i] = (struct mtd_partition *)kmalloc( + pcnt * sizeof(struct mtd_partition), GFP_KERNEL); + memset(msp_parts[i], 0, pcnt * sizeof(struct mtd_partition)); + + /* now initialize the devices proper */ + flash_name[5] = '0' + i; + env = prom_getenv(flash_name); + + if (sscanf(env, "%x:%x", &addr, &size) < 2) + return -ENXIO; + addr = CPHYSADDR(addr); + + printk(KERN_NOTICE + "MSP flash device \"%s\": 0x%08x at 0x%08x\n", + flash_name, size, addr); + /* This must matchs the actual size of the flash chip */ + msp_maps[i].size = size; + msp_maps[i].phys = addr; + + /* + * Platforms have a specific limit of the size of memory + * which may be mapped for flash: + */ + if (size > CONFIG_MSP_FLASH_MAP_LIMIT) + size = CONFIG_MSP_FLASH_MAP_LIMIT; + msp_maps[i].virt = ioremap(addr, size); + msp_maps[i].bankwidth = 1; + msp_maps[i].name = strncpy(kmalloc(7, GFP_KERNEL), + flash_name, 7); + + if (msp_maps[i].virt == NULL) + return -ENXIO; + + for (j = 0; j < pcnt; j++) { + part_name[5] = '0' + i; + part_name[7] = '0' + j; + + env = prom_getenv(part_name); + + if (sscanf(env, "%x:%x:%n", &offset, &size, &coff) < 2) + return -ENXIO; + + msp_parts[i][j].size = size; + msp_parts[i][j].offset = offset; + msp_parts[i][j].name = env + coff; + } + + /* now probe and add the device */ + simple_map_init(&msp_maps[i]); + msp_flash[i] = do_map_probe("cfi_probe", &msp_maps[i]); + if (msp_flash[i]) { + msp_flash[i]->owner = THIS_MODULE; + add_mtd_partitions(msp_flash[i], msp_parts[i], pcnt); + } else { + printk(KERN_ERR "map probe failed for flash\n"); + return -ENXIO; + } + } + + return 0; +} + +static void __exit cleanup_msp_flash(void) +{ + int i; + + for (i = 0; i < sizeof(msp_flash) / sizeof(struct mtd_info **); i++) { + del_mtd_partitions(msp_flash[i]); + map_destroy(msp_flash[i]); + iounmap((void *)msp_maps[i].virt); + + /* free the memory */ + kfree(msp_maps[i].name); + kfree(msp_parts[i]); + } + + kfree(msp_flash); + kfree(msp_parts); + kfree(msp_maps); +} + +MODULE_AUTHOR("PMC-Sierra, Inc"); +MODULE_DESCRIPTION("MTD map driver for PMC-Sierra MSP boards"); +MODULE_LICENSE("GPL"); + +module_init(init_msp_flash); +module_exit(cleanup_msp_flash); diff --git a/drivers/mtd/maps/pmcmsp-ramroot.c b/drivers/mtd/maps/pmcmsp-ramroot.c new file mode 100644 index 00000000000..18049bceba8 --- /dev/null +++ b/drivers/mtd/maps/pmcmsp-ramroot.c @@ -0,0 +1,105 @@ +/* + * Mapping of the rootfs in a physical region of memory + * + * Copyright (C) 2005-2007 PMC-Sierra Inc. + * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN + * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/fs.h> +#include <linux/root_dev.h> +#include <linux/mtd/mtd.h> +#include <linux/mtd/map.h> + +#include <asm/io.h> + +#include <msp_prom.h> + +static struct mtd_info *rr_mtd; + +struct map_info rr_map = { + .name = "ramroot", + .bankwidth = 4, +}; + +static int __init init_rrmap(void) +{ + void *ramroot_start; + unsigned long ramroot_size; + + /* Check for supported rootfs types */ + if (get_ramroot(&ramroot_start, &ramroot_size)) { + rr_map.phys = CPHYSADDR(ramroot_start); + rr_map.size = ramroot_size; + + printk(KERN_NOTICE + "PMC embedded root device: 0x%08lx @ 0x%08lx\n", + rr_map.size, (unsigned long)rr_map.phys); + } else { + printk(KERN_ERR + "init_rrmap: no supported embedded rootfs detected!\n"); + return -ENXIO; + } + + /* Map rootfs to I/O space for block device driver */ + rr_map.virt = ioremap(rr_map.phys, rr_map.size); + if (!rr_map.virt) { + printk(KERN_ERR "Failed to ioremap\n"); + return -EIO; + } + + simple_map_init(&rr_map); + + rr_mtd = do_map_probe("map_ram", &rr_map); + if (rr_mtd) { + rr_mtd->owner = THIS_MODULE; + + add_mtd_device(rr_mtd); + ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, rr_mtd->index); + + return 0; + } + + iounmap(rr_map.virt); + return -ENXIO; +} + +static void __exit cleanup_rrmap(void) +{ + del_mtd_device(rr_mtd); + map_destroy(rr_mtd); + + iounmap(rr_map.virt); + rr_map.virt = NULL; +} + +MODULE_AUTHOR("PMC-Sierra, Inc"); +MODULE_DESCRIPTION("MTD map driver for embedded PMC-Sierra MSP filesystem"); +MODULE_LICENSE("GPL"); + +module_init(init_rrmap); +module_exit(cleanup_rrmap); diff --git a/drivers/mtd/maps/sun_uflash.c b/drivers/mtd/maps/sun_uflash.c index 4db2055cee3..001af7f7ddd 100644 --- a/drivers/mtd/maps/sun_uflash.c +++ b/drivers/mtd/maps/sun_uflash.c @@ -39,7 +39,7 @@ MODULE_VERSION("2.0"); static LIST_HEAD(device_list); struct uflash_dev { - char *name; /* device name */ + const char *name; /* device name */ struct map_info map; /* mtd map info */ struct mtd_info *mtd; /* mtd info */ }; @@ -80,7 +80,7 @@ int uflash_devinit(struct linux_ebus_device *edev, struct device_node *dp) up->name = of_get_property(dp, "model", NULL); if (up->name && 0 < strlen(up->name)) - up->map.name = up->name; + up->map.name = (char *)up->name; up->map.phys = res->start; diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index b879a66daa9..524b83b5ebf 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -20,6 +20,7 @@ #include <linux/hdreg.h> #include <linux/init.h> #include <linux/mutex.h> +#include <linux/kthread.h> #include <asm/uaccess.h> static LIST_HEAD(blktrans_majors); @@ -28,9 +29,7 @@ extern struct mutex mtd_table_mutex; extern struct mtd_info *mtd_table[]; struct mtd_blkcore_priv { - struct completion thread_dead; - int exiting; - wait_queue_head_t thread_wq; + struct task_struct *thread; struct request_queue *rq; spinlock_t queue_lock; }; @@ -83,38 +82,19 @@ static int mtd_blktrans_thread(void *arg) /* we might get involved when memory gets low, so use PF_MEMALLOC */ current->flags |= PF_MEMALLOC | PF_NOFREEZE; - daemonize("%sd", tr->name); - - /* daemonize() doesn't do this for us since some kernel threads - actually want to deal with signals. We can't just call - exit_sighand() since that'll cause an oops when we finally - do exit. */ - spin_lock_irq(¤t->sighand->siglock); - sigfillset(¤t->blocked); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - spin_lock_irq(rq->queue_lock); - - while (!tr->blkcore_priv->exiting) { + while (!kthread_should_stop()) { struct request *req; struct mtd_blktrans_dev *dev; int res = 0; - DECLARE_WAITQUEUE(wait, current); req = elv_next_request(rq); if (!req) { - add_wait_queue(&tr->blkcore_priv->thread_wq, &wait); set_current_state(TASK_INTERRUPTIBLE); - spin_unlock_irq(rq->queue_lock); - schedule(); - remove_wait_queue(&tr->blkcore_priv->thread_wq, &wait); - spin_lock_irq(rq->queue_lock); - continue; } @@ -133,13 +113,13 @@ static int mtd_blktrans_thread(void *arg) } spin_unlock_irq(rq->queue_lock); - complete_and_exit(&tr->blkcore_priv->thread_dead, 0); + return 0; } static void mtd_blktrans_request(struct request_queue *rq) { struct mtd_blktrans_ops *tr = rq->queuedata; - wake_up(&tr->blkcore_priv->thread_wq); + wake_up_process(tr->blkcore_priv->thread); } @@ -388,8 +368,6 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) return ret; } spin_lock_init(&tr->blkcore_priv->queue_lock); - init_completion(&tr->blkcore_priv->thread_dead); - init_waitqueue_head(&tr->blkcore_priv->thread_wq); tr->blkcore_priv->rq = blk_init_queue(mtd_blktrans_request, &tr->blkcore_priv->queue_lock); if (!tr->blkcore_priv->rq) { @@ -403,13 +381,14 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); tr->blkshift = ffs(tr->blksize) - 1; - ret = kernel_thread(mtd_blktrans_thread, tr, CLONE_KERNEL); - if (ret < 0) { + tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, + "%sd", tr->name); + if (IS_ERR(tr->blkcore_priv->thread)) { blk_cleanup_queue(tr->blkcore_priv->rq); unregister_blkdev(tr->major, tr->name); kfree(tr->blkcore_priv); mutex_unlock(&mtd_table_mutex); - return ret; + return PTR_ERR(tr->blkcore_priv->thread); } INIT_LIST_HEAD(&tr->devs); @@ -432,9 +411,7 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr) mutex_lock(&mtd_table_mutex); /* Clean up the kernel thread */ - tr->blkcore_priv->exiting = 1; - wake_up(&tr->blkcore_priv->thread_wq); - wait_for_completion(&tr->blkcore_priv->thread_dead); + kthread_stop(tr->blkcore_priv->thread); /* Remove it from the list of active majors */ list_del(&tr->list); diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 1592eac64e5..8c86b802f21 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -553,7 +553,7 @@ static int mtd_ioctl(struct inode *inode, struct file *file, ops.datbuf = NULL; ops.mode = MTD_OOB_PLACE; - if (ops.ooboffs && ops.len > (mtd->oobsize - ops.ooboffs)) + if (ops.ooboffs && ops.ooblen > (mtd->oobsize - ops.ooboffs)) return -EINVAL; ops.oobbuf = kmalloc(buf.length, GFP_KERNEL); diff --git a/drivers/mtd/nand/Kconfig b/drivers/mtd/nand/Kconfig index 2d12dcdd740..d05873b8c15 100644 --- a/drivers/mtd/nand/Kconfig +++ b/drivers/mtd/nand/Kconfig @@ -1,10 +1,7 @@ # drivers/mtd/nand/Kconfig # $Id: Kconfig,v 1.35 2005/11/07 11:14:30 gleixner Exp $ -menu "NAND Flash Device Drivers" - depends on MTD!=n - -config MTD_NAND +menuconfig MTD_NAND tristate "NAND Device Support" depends on MTD select MTD_NAND_IDS @@ -13,9 +10,10 @@ config MTD_NAND devices. For further information see <http://www.linux-mtd.infradead.org/doc/nand.html>. +if MTD_NAND + config MTD_NAND_VERIFY_WRITE bool "Verify NAND page writes" - depends on MTD_NAND help This adds an extra check when data is written to the flash. The NAND flash device internally checks only bits transitioning @@ -25,53 +23,61 @@ config MTD_NAND_VERIFY_WRITE config MTD_NAND_ECC_SMC bool "NAND ECC Smart Media byte order" - depends on MTD_NAND default n help Software ECC according to the Smart Media Specification. The original Linux implementation had byte 0 and 1 swapped. +config MTD_NAND_MUSEUM_IDS + bool "Enable chip ids for obsolete ancient NAND devices" + depends on MTD_NAND + default n + help + Enable this option only when your board has first generation + NAND chips (page size 256 byte, erase size 4-8KiB). The IDs + of these chips were reused by later, larger chips. + config MTD_NAND_AUTCPU12 tristate "SmartMediaCard on autronix autcpu12 board" - depends on MTD_NAND && ARCH_AUTCPU12 + depends on ARCH_AUTCPU12 help This enables the driver for the autronix autcpu12 board to access the SmartMediaCard. config MTD_NAND_EDB7312 tristate "Support for Cirrus Logic EBD7312 evaluation board" - depends on MTD_NAND && ARCH_EDB7312 + depends on ARCH_EDB7312 help This enables the driver for the Cirrus Logic EBD7312 evaluation board to access the onboard NAND Flash. config MTD_NAND_H1900 tristate "iPAQ H1900 flash" - depends on MTD_NAND && ARCH_PXA && MTD_PARTITIONS + depends on ARCH_PXA && MTD_PARTITIONS help This enables the driver for the iPAQ h1900 flash. config MTD_NAND_SPIA tristate "NAND Flash device on SPIA board" - depends on ARCH_P720T && MTD_NAND + depends on ARCH_P720T help If you had to ask, you don't have one. Say 'N'. config MTD_NAND_AMS_DELTA tristate "NAND Flash device on Amstrad E3" - depends on MACH_AMS_DELTA && MTD_NAND + depends on MACH_AMS_DELTA help Support for NAND flash on Amstrad E3 (Delta). config MTD_NAND_TOTO tristate "NAND Flash device on TOTO board" - depends on ARCH_OMAP && MTD_NAND && BROKEN + depends on ARCH_OMAP && BROKEN help Support for NAND flash on Texas Instruments Toto platform. config MTD_NAND_TS7250 tristate "NAND Flash device on TS-7250 board" - depends on MACH_TS72XX && MTD_NAND + depends on MACH_TS72XX help Support for NAND flash on Technologic Systems TS-7250 platform. @@ -80,14 +86,14 @@ config MTD_NAND_IDS config MTD_NAND_AU1550 tristate "Au1550/1200 NAND support" - depends on (SOC_AU1200 || SOC_AU1550) && MTD_NAND + depends on SOC_AU1200 || SOC_AU1550 help This enables the driver for the NAND flash controller on the AMD/Alchemy 1550 SOC. config MTD_NAND_RTC_FROM4 tristate "Renesas Flash ROM 4-slot interface board (FROM_BOARD4)" - depends on MTD_NAND && SH_SOLUTION_ENGINE + depends on SH_SOLUTION_ENGINE select REED_SOLOMON select REED_SOLOMON_DEC8 select BITREVERSE @@ -97,13 +103,13 @@ config MTD_NAND_RTC_FROM4 config MTD_NAND_PPCHAMELEONEVB tristate "NAND Flash device on PPChameleonEVB board" - depends on PPCHAMELEONEVB && MTD_NAND && BROKEN + depends on PPCHAMELEONEVB && BROKEN help This enables the NAND flash driver on the PPChameleon EVB Board. config MTD_NAND_S3C2410 tristate "NAND Flash support for S3C2410/S3C2440 SoC" - depends on ARCH_S3C2410 && MTD_NAND + depends on ARCH_S3C2410 help This enables the NAND flash controller on the S3C2410 and S3C2440 SoCs @@ -128,7 +134,7 @@ config MTD_NAND_S3C2410_HWECC config MTD_NAND_NDFC tristate "NDFC NanD Flash Controller" - depends on MTD_NAND && 44x + depends on 44x select MTD_NAND_ECC_SMC help NDFC Nand Flash Controllers are integrated in EP44x SoCs @@ -145,7 +151,7 @@ config MTD_NAND_S3C2410_CLKSTOP config MTD_NAND_DISKONCHIP tristate "DiskOnChip 2000, Millennium and Millennium Plus (NAND reimplementation) (EXPERIMENTAL)" - depends on MTD_NAND && EXPERIMENTAL + depends on EXPERIMENTAL select REED_SOLOMON select REED_SOLOMON_DEC16 help @@ -215,11 +221,11 @@ config MTD_NAND_DISKONCHIP_BBTWRITE config MTD_NAND_SHARPSL tristate "Support for NAND Flash on Sharp SL Series (C7xx + others)" - depends on MTD_NAND && ARCH_PXA + depends on ARCH_PXA config MTD_NAND_BASLER_EXCITE tristate "Support for NAND Flash on Basler eXcite" - depends on MTD_NAND && BASLER_EXCITE + depends on BASLER_EXCITE help This enables the driver for the NAND flash device found on the Basler eXcite Smart Camera. If built as a module, the driver @@ -227,14 +233,14 @@ config MTD_NAND_BASLER_EXCITE config MTD_NAND_CAFE tristate "NAND support for OLPC CAFÉ chip" - depends on MTD_NAND && PCI + depends on PCI help Use NAND flash attached to the CAFÉ chip designed for the $100 laptop. config MTD_NAND_CS553X tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)" - depends on MTD_NAND && X86_32 && (X86_PC || X86_GENERICARCH) + depends on X86_32 && (X86_PC || X86_GENERICARCH) help The CS553x companion chips for the AMD Geode processor include NAND flash controllers with built-in hardware ECC @@ -247,16 +253,21 @@ config MTD_NAND_CS553X config MTD_NAND_AT91 bool "Support for NAND Flash / SmartMedia on AT91" - depends on MTD_NAND && ARCH_AT91 + depends on ARCH_AT91 help Enables support for NAND Flash / Smart Media Card interface on Atmel AT91 processors. +config MTD_NAND_CM_X270 + tristate "Support for NAND Flash on CM-X270 modules" + depends on MTD_NAND && MACH_ARMCORE + + config MTD_NAND_NANDSIM tristate "Support for NAND Flash Simulator" - depends on MTD_NAND && MTD_PARTITIONS + depends on MTD_PARTITIONS help The simulator may simulate various NAND flash chips for the MTD nand layer. -endmenu +endif # MTD_NAND diff --git a/drivers/mtd/nand/Makefile b/drivers/mtd/nand/Makefile index 80f1dfc7794..6872031a3fb 100644 --- a/drivers/mtd/nand/Makefile +++ b/drivers/mtd/nand/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_MTD_NAND_NANDSIM) += nandsim.o obj-$(CONFIG_MTD_NAND_CS553X) += cs553x_nand.o obj-$(CONFIG_MTD_NAND_NDFC) += ndfc.o obj-$(CONFIG_MTD_NAND_AT91) += at91_nand.o +obj-$(CONFIG_MTD_NAND_CM_X270) += cmx270_nand.o obj-$(CONFIG_MTD_NAND_BASLER_EXCITE) += excite_nandflash.o nand-objs := nand_base.o nand_bbt.o diff --git a/drivers/mtd/nand/cafe.c b/drivers/mtd/nand/cafe.c index fd6bb3ed40d..c328a751451 100644 --- a/drivers/mtd/nand/cafe.c +++ b/drivers/mtd/nand/cafe.c @@ -530,7 +530,6 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev, { struct mtd_info *mtd; struct cafe_priv *cafe; - uint32_t timing1, timing2, timing3; uint32_t ctrl; int err = 0; @@ -587,21 +586,19 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev, } if (numtimings == 3) { - timing1 = timing[0]; - timing2 = timing[1]; - timing3 = timing[2]; cafe_dev_dbg(&cafe->pdev->dev, "Using provided timings (%08x %08x %08x)\n", - timing1, timing2, timing3); + timing[0], timing[1], timing[2]); } else { - timing1 = cafe_readl(cafe, NAND_TIMING1); - timing2 = cafe_readl(cafe, NAND_TIMING2); - timing3 = cafe_readl(cafe, NAND_TIMING3); + timing[0] = cafe_readl(cafe, NAND_TIMING1); + timing[1] = cafe_readl(cafe, NAND_TIMING2); + timing[2] = cafe_readl(cafe, NAND_TIMING3); - if (timing1 | timing2 | timing3) { - cafe_dev_dbg(&cafe->pdev->dev, "Timing registers already set (%08x %08x %08x)\n", timing1, timing2, timing3); + if (timing[0] | timing[1] | timing[2]) { + cafe_dev_dbg(&cafe->pdev->dev, "Timing registers already set (%08x %08x %08x)\n", + timing[0], timing[1], timing[2]); } else { dev_warn(&cafe->pdev->dev, "Timing registers unset; using most conservative defaults\n"); - timing1 = timing2 = timing3 = 0xffffffff; + timing[0] = timing[1] = timing[2] = 0xffffffff; } } @@ -609,9 +606,9 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev, cafe_writel(cafe, 1, NAND_RESET); cafe_writel(cafe, 0, NAND_RESET); - cafe_writel(cafe, timing1, NAND_TIMING1); - cafe_writel(cafe, timing2, NAND_TIMING2); - cafe_writel(cafe, timing3, NAND_TIMING3); + cafe_writel(cafe, timing[0], NAND_TIMING1); + cafe_writel(cafe, timing[1], NAND_TIMING2); + cafe_writel(cafe, timing[2], NAND_TIMING3); cafe_writel(cafe, 0xffffffff, NAND_IRQ_MASK); err = request_irq(pdev->irq, &cafe_nand_interrupt, IRQF_SHARED, diff --git a/drivers/mtd/nand/cmx270_nand.c b/drivers/mtd/nand/cmx270_nand.c new file mode 100644 index 00000000000..cb663ef245d --- /dev/null +++ b/drivers/mtd/nand/cmx270_nand.c @@ -0,0 +1,267 @@ +/* + * linux/drivers/mtd/nand/cmx270-nand.c + * + * Copyright (C) 2006 Compulab, Ltd. + * Mike Rapoport <mike@compulab.co.il> + * + * Derived from drivers/mtd/nand/h1910.c + * Copyright (C) 2002 Marius Gröger (mag@sysgo.de) + * Copyright (c) 2001 Thomas Gleixner (gleixner@autronix.de) + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Overview: + * This is a device driver for the NAND flash device found on the + * CM-X270 board. + */ + +#include <linux/mtd/nand.h> +#include <linux/mtd/partitions.h> + +#include <asm/io.h> +#include <asm/irq.h> + +#include <asm/arch/hardware.h> +#include <asm/arch/pxa-regs.h> + +#define GPIO_NAND_CS (11) +#define GPIO_NAND_RB (89) + +/* This macro needed to ensure in-order operation of GPIO and local + * bus. Without both asm command and dummy uncached read there're + * states when NAND access is broken. I've looked for such macro(s) in + * include/asm-arm but found nothing approptiate. + * dmac_clean_range is close, but is makes cache invalidation + * unnecessary here and it cannot be used in module + */ +#define DRAIN_WB() \ + do { \ + unsigned char dummy; \ + asm volatile ("mcr p15, 0, r0, c7, c10, 4":::"r0"); \ + dummy=*((unsigned char*)UNCACHED_ADDR); \ + } while(0) + +/* MTD structure for CM-X270 board */ +static struct mtd_info *cmx270_nand_mtd; + +/* remaped IO address of the device */ +static void __iomem *cmx270_nand_io; + +/* + * Define static partitions for flash device + */ +static struct mtd_partition partition_info[] = { + [0] = { + .name = "cmx270-0", + .offset = 0, + .size = MTDPART_SIZ_FULL + } +}; +#define NUM_PARTITIONS (ARRAY_SIZE(partition_info)) + +const char *part_probes[] = { "cmdlinepart", NULL }; + +static u_char cmx270_read_byte(struct mtd_info *mtd) +{ + struct nand_chip *this = mtd->priv; + + return (readl(this->IO_ADDR_R) >> 16); +} + +static void cmx270_write_buf(struct mtd_info *mtd, const u_char *buf, int len) +{ + int i; + struct nand_chip *this = mtd->priv; + + for (i=0; i<len; i++) + writel((*buf++ << 16), this->IO_ADDR_W); +} + +static void cmx270_read_buf(struct mtd_info *mtd, u_char *buf, int len) +{ + int i; + struct nand_chip *this = mtd->priv; + + for (i=0; i<len; i++) + *buf++ = readl(this->IO_ADDR_R) >> 16; +} + +static int cmx270_verify_buf(struct mtd_info *mtd, const u_char *buf, int len) +{ + int i; + struct nand_chip *this = mtd->priv; + + for (i=0; i<len; i++) + if (buf[i] != (u_char)(readl(this->IO_ADDR_R) >> 16)) + return -EFAULT; + + return 0; +} + +static inline void nand_cs_on(void) +{ + GPCR(GPIO_NAND_CS) = GPIO_bit(GPIO_NAND_CS); +} + +static void nand_cs_off(void) +{ + DRAIN_WB(); + + GPSR(GPIO_NAND_CS) = GPIO_bit(GPIO_NAND_CS); +} + +/* + * hardware specific access to control-lines + */ +static void cmx270_hwcontrol(struct mtd_info *mtd, int dat, + unsigned int ctrl) +{ + struct nand_chip* this = mtd->priv; + unsigned int nandaddr = (unsigned int)this->IO_ADDR_W; + + DRAIN_WB(); + + if (ctrl & NAND_CTRL_CHANGE) { + if ( ctrl & NAND_ALE ) + nandaddr |= (1 << 3); + else + nandaddr &= ~(1 << 3); + if ( ctrl & NAND_CLE ) + nandaddr |= (1 << 2); + else + nandaddr &= ~(1 << 2); + if ( ctrl & NAND_NCE ) + nand_cs_on(); + else + nand_cs_off(); + } + + DRAIN_WB(); + this->IO_ADDR_W = (void __iomem*)nandaddr; + if (dat != NAND_CMD_NONE) + writel((dat << 16), this->IO_ADDR_W); + + DRAIN_WB(); +} + +/* + * read device ready pin + */ +static int cmx270_device_ready(struct mtd_info *mtd) +{ + DRAIN_WB(); + + return (GPLR(GPIO_NAND_RB) & GPIO_bit(GPIO_NAND_RB)); +} + +/* + * Main initialization routine + */ +static int cmx270_init(void) +{ + struct nand_chip *this; + const char *part_type; + struct mtd_partition *mtd_parts; + int mtd_parts_nb = 0; + int ret; + + /* Allocate memory for MTD device structure and private data */ + cmx270_nand_mtd = kzalloc(sizeof(struct mtd_info) + + sizeof(struct nand_chip), + GFP_KERNEL); + if (!cmx270_nand_mtd) { + printk("Unable to allocate CM-X270 NAND MTD device structure.\n"); + return -ENOMEM; + } + + cmx270_nand_io = ioremap(PXA_CS1_PHYS, 12); + if (!cmx270_nand_io) { + printk("Unable to ioremap NAND device\n"); + ret = -EINVAL; + goto err1; + } + + /* Get pointer to private data */ + this = (struct nand_chip *)(&cmx270_nand_mtd[1]); + + /* Link the private data with the MTD structure */ + cmx270_nand_mtd->owner = THIS_MODULE; + cmx270_nand_mtd->priv = this; + + /* insert callbacks */ + this->IO_ADDR_R = cmx270_nand_io; + this->IO_ADDR_W = cmx270_nand_io; + this->cmd_ctrl = cmx270_hwcontrol; + this->dev_ready = cmx270_device_ready; + + /* 15 us command delay time */ + this->chip_delay = 20; + this->ecc.mode = NAND_ECC_SOFT; + + /* read/write functions */ + this->read_byte = cmx270_read_byte; + this->read_buf = cmx270_read_buf; + this->write_buf = cmx270_write_buf; + this->verify_buf = cmx270_verify_buf; + + /* Scan to find existence of the device */ + if (nand_scan (cmx270_nand_mtd, 1)) { + printk(KERN_NOTICE "No NAND device\n"); + ret = -ENXIO; + goto err2; + } + +#ifdef CONFIG_MTD_CMDLINE_PARTS + mtd_parts_nb = parse_mtd_partitions(cmx270_nand_mtd, part_probes, + &mtd_parts, 0); + if (mtd_parts_nb > 0) + part_type = "command line"; + else + mtd_parts_nb = 0; +#endif + if (!mtd_parts_nb) { + mtd_parts = partition_info; + mtd_parts_nb = NUM_PARTITIONS; + part_type = "static"; + } + + /* Register the partitions */ + printk(KERN_NOTICE "Using %s partition definition\n", part_type); + ret = add_mtd_partitions(cmx270_nand_mtd, mtd_parts, mtd_parts_nb); + if (ret) + goto err2; + + /* Return happy */ + return 0; + +err2: + iounmap(cmx270_nand_io); +err1: + kfree(cmx270_nand_mtd); + + return ret; + +} +module_init(cmx270_init); + +/* + * Clean up routine + */ +static void cmx270_cleanup(void) +{ + /* Release resources, unregister device */ + nand_release(cmx270_nand_mtd); + + iounmap(cmx270_nand_io); + + /* Free the MTD device structure */ + kfree (cmx270_nand_mtd); +} +module_exit(cmx270_cleanup); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Mike Rapoport <mike@compulab.co.il>"); +MODULE_DESCRIPTION("NAND flash driver for Compulab CM-X270 Module"); diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 6af37b8cff6..04de315e493 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -312,7 +312,7 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip) /* Select the NAND device */ chip->select_chip(mtd, chipnr); } else - page = (int)ofs; + page = (int)(ofs >> chip->page_shift); if (chip->options & NAND_BUSWIDTH_16) { chip->cmdfunc(mtd, NAND_CMD_READOOB, chip->badblockpos & 0xFE, @@ -350,7 +350,7 @@ static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs) int block, ret; /* Get block number */ - block = ((int)ofs) >> chip->bbt_erase_shift; + block = (int)(ofs >> chip->bbt_erase_shift); if (chip->bbt) chip->bbt[block >> 2] |= 0x01 << ((block & 0x03) << 1); @@ -771,7 +771,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *ecc_code = chip->buffers->ecccode; int *eccpos = chip->ecc.layout->eccpos; - nand_read_page_raw(mtd, chip, buf); + chip->ecc.read_page_raw(mtd, chip, buf); for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) chip->ecc.calculate(mtd, p, &ecc_calc[i]); @@ -1426,7 +1426,7 @@ static void nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip, for (i = 0; i < chip->ecc.total; i++) chip->oob_poi[eccpos[i]] = ecc_calc[i]; - nand_write_page_raw(mtd, chip, buf); + chip->ecc.write_page_raw(mtd, chip, buf); } /** diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c index 2e2cdf2fc91..2fc674a190c 100644 --- a/drivers/mtd/nand/nand_ids.c +++ b/drivers/mtd/nand/nand_ids.c @@ -24,6 +24,8 @@ * 512 512 Byte page size */ struct nand_flash_dev nand_flash_ids[] = { + +#ifdef CONFIG_MTD_NAND_MUSEUM_IDS {"NAND 1MiB 5V 8-bit", 0x6e, 256, 1, 0x1000, 0}, {"NAND 2MiB 5V 8-bit", 0x64, 256, 2, 0x1000, 0}, {"NAND 4MiB 5V 8-bit", 0x6b, 512, 4, 0x2000, 0}, @@ -39,6 +41,7 @@ struct nand_flash_dev nand_flash_ids[] = { {"NAND 8MiB 3,3V 8-bit", 0xe6, 512, 8, 0x2000, 0}, {"NAND 8MiB 1,8V 16-bit", 0x49, 512, 8, 0x2000, NAND_BUSWIDTH_16}, {"NAND 8MiB 3,3V 16-bit", 0x59, 512, 8, 0x2000, NAND_BUSWIDTH_16}, +#endif {"NAND 16MiB 1,8V 8-bit", 0x33, 512, 16, 0x4000, 0}, {"NAND 16MiB 3,3V 8-bit", 0x73, 512, 16, 0x4000, 0}, @@ -137,6 +140,7 @@ struct nand_manufacturers nand_manuf_ids[] = { {NAND_MFR_RENESAS, "Renesas"}, {NAND_MFR_STMICRO, "ST Micro"}, {NAND_MFR_HYNIX, "Hynix"}, + {NAND_MFR_MICRON, "Micron"}, {0x0, "Unknown"} }; diff --git a/drivers/mtd/nand/nandsim.c b/drivers/mtd/nand/nandsim.c index c3bca9590ad..205df0f771f 100644 --- a/drivers/mtd/nand/nandsim.c +++ b/drivers/mtd/nand/nandsim.c @@ -37,6 +37,8 @@ #include <linux/mtd/nand.h> #include <linux/mtd/partitions.h> #include <linux/delay.h> +#include <linux/list.h> +#include <linux/random.h> /* Default simulator parameters values */ #if !defined(CONFIG_NANDSIM_FIRST_ID_BYTE) || \ @@ -90,6 +92,15 @@ static uint bus_width = CONFIG_NANDSIM_BUS_WIDTH; static uint do_delays = CONFIG_NANDSIM_DO_DELAYS; static uint log = CONFIG_NANDSIM_LOG; static uint dbg = CONFIG_NANDSIM_DBG; +static unsigned long parts[MAX_MTD_DEVICES]; +static unsigned int parts_num; +static char *badblocks = NULL; +static char *weakblocks = NULL; +static char *weakpages = NULL; +static unsigned int bitflips = 0; +static char *gravepages = NULL; +static unsigned int rptwear = 0; +static unsigned int overridesize = 0; module_param(first_id_byte, uint, 0400); module_param(second_id_byte, uint, 0400); @@ -104,8 +115,16 @@ module_param(bus_width, uint, 0400); module_param(do_delays, uint, 0400); module_param(log, uint, 0400); module_param(dbg, uint, 0400); - -MODULE_PARM_DESC(first_id_byte, "The fist byte returned by NAND Flash 'read ID' command (manufaturer ID)"); +module_param_array(parts, ulong, &parts_num, 0400); +module_param(badblocks, charp, 0400); +module_param(weakblocks, charp, 0400); +module_param(weakpages, charp, 0400); +module_param(bitflips, uint, 0400); +module_param(gravepages, charp, 0400); +module_param(rptwear, uint, 0400); +module_param(overridesize, uint, 0400); + +MODULE_PARM_DESC(first_id_byte, "The first byte returned by NAND Flash 'read ID' command (manufacturer ID)"); MODULE_PARM_DESC(second_id_byte, "The second byte returned by NAND Flash 'read ID' command (chip ID)"); MODULE_PARM_DESC(third_id_byte, "The third byte returned by NAND Flash 'read ID' command"); MODULE_PARM_DESC(fourth_id_byte, "The fourth byte returned by NAND Flash 'read ID' command"); @@ -118,6 +137,23 @@ MODULE_PARM_DESC(bus_width, "Chip's bus width (8- or 16-bit)"); MODULE_PARM_DESC(do_delays, "Simulate NAND delays using busy-waits if not zero"); MODULE_PARM_DESC(log, "Perform logging if not zero"); MODULE_PARM_DESC(dbg, "Output debug information if not zero"); +MODULE_PARM_DESC(parts, "Partition sizes (in erase blocks) separated by commas"); +/* Page and erase block positions for the following parameters are independent of any partitions */ +MODULE_PARM_DESC(badblocks, "Erase blocks that are initially marked bad, separated by commas"); +MODULE_PARM_DESC(weakblocks, "Weak erase blocks [: remaining erase cycles (defaults to 3)]" + " separated by commas e.g. 113:2 means eb 113" + " can be erased only twice before failing"); +MODULE_PARM_DESC(weakpages, "Weak pages [: maximum writes (defaults to 3)]" + " separated by commas e.g. 1401:2 means page 1401" + " can be written only twice before failing"); +MODULE_PARM_DESC(bitflips, "Maximum number of random bit flips per page (zero by default)"); +MODULE_PARM_DESC(gravepages, "Pages that lose data [: maximum reads (defaults to 3)]" + " separated by commas e.g. 1401:2 means page 1401" + " can be read only twice before failing"); +MODULE_PARM_DESC(rptwear, "Number of erases inbetween reporting wear, if not zero"); +MODULE_PARM_DESC(overridesize, "Specifies the NAND Flash size overriding the ID bytes. " + "The size is specified in erase blocks and as the exponent of a power of two" + " e.g. 5 means a size of 32 erase blocks"); /* The largest possible page size */ #define NS_LARGEST_PAGE_SIZE 2048 @@ -131,9 +167,11 @@ MODULE_PARM_DESC(dbg, "Output debug information if not zero"); #define NS_DBG(args...) \ do { if (dbg) printk(KERN_DEBUG NS_OUTPUT_PREFIX " debug: " args); } while(0) #define NS_WARN(args...) \ - do { printk(KERN_WARNING NS_OUTPUT_PREFIX " warnig: " args); } while(0) + do { printk(KERN_WARNING NS_OUTPUT_PREFIX " warning: " args); } while(0) #define NS_ERR(args...) \ - do { printk(KERN_ERR NS_OUTPUT_PREFIX " errorr: " args); } while(0) + do { printk(KERN_ERR NS_OUTPUT_PREFIX " error: " args); } while(0) +#define NS_INFO(args...) \ + do { printk(KERN_INFO NS_OUTPUT_PREFIX " " args); } while(0) /* Busy-wait delay macros (microseconds, milliseconds) */ #define NS_UDELAY(us) \ @@ -238,7 +276,8 @@ union ns_mem { * The structure which describes all the internal simulator data. */ struct nandsim { - struct mtd_partition part; + struct mtd_partition partitions[MAX_MTD_DEVICES]; + unsigned int nbparts; uint busw; /* flash chip bus width (8 or 16) */ u_char ids[4]; /* chip's ID bytes */ @@ -338,6 +377,38 @@ static struct nandsim_operations { STATE_DATAOUT, STATE_READY}} }; +struct weak_block { + struct list_head list; + unsigned int erase_block_no; + unsigned int max_erases; + unsigned int erases_done; +}; + +static LIST_HEAD(weak_blocks); + +struct weak_page { + struct list_head list; + unsigned int page_no; + unsigned int max_writes; + unsigned int writes_done; +}; + +static LIST_HEAD(weak_pages); + +struct grave_page { + struct list_head list; + unsigned int page_no; + unsigned int max_reads; + unsigned int reads_done; +}; + +static LIST_HEAD(grave_pages); + +static unsigned long *erase_block_wear = NULL; +static unsigned int wear_eb_count = 0; +static unsigned long total_wear = 0; +static unsigned int rptwear_cnt = 0; + /* MTD structure for NAND controller */ static struct mtd_info *nsmtd; @@ -381,6 +452,13 @@ static void free_device(struct nandsim *ns) } } +static char *get_partition_name(int i) +{ + char buf[64]; + sprintf(buf, "NAND simulator partition %d", i); + return kstrdup(buf, GFP_KERNEL); +} + /* * Initialize the nandsim structure. * @@ -390,7 +468,9 @@ static int init_nandsim(struct mtd_info *mtd) { struct nand_chip *chip = (struct nand_chip *)mtd->priv; struct nandsim *ns = (struct nandsim *)(chip->priv); - int i; + int i, ret = 0; + u_int32_t remains; + u_int32_t next_offset; if (NS_IS_INITIALIZED(ns)) { NS_ERR("init_nandsim: nandsim is already initialized\n"); @@ -448,6 +528,40 @@ static int init_nandsim(struct mtd_info *mtd) } } + /* Fill the partition_info structure */ + if (parts_num > ARRAY_SIZE(ns->partitions)) { + NS_ERR("too many partitions.\n"); + ret = -EINVAL; + goto error; + } + remains = ns->geom.totsz; + next_offset = 0; + for (i = 0; i < parts_num; ++i) { + unsigned long part = parts[i]; + if (!part || part > remains / ns->geom.secsz) { + NS_ERR("bad partition size.\n"); + ret = -EINVAL; + goto error; + } + ns->partitions[i].name = get_partition_name(i); + ns->partitions[i].offset = next_offset; + ns->partitions[i].size = part * ns->geom.secsz; + next_offset += ns->partitions[i].size; + remains -= ns->partitions[i].size; + } + ns->nbparts = parts_num; + if (remains) { + if (parts_num + 1 > ARRAY_SIZE(ns->partitions)) { + NS_ERR("too many partitions.\n"); + ret = -EINVAL; + goto error; + } + ns->partitions[i].name = get_partition_name(i); + ns->partitions[i].offset = next_offset; + ns->partitions[i].size = remains; + ns->nbparts += 1; + } + /* Detect how many ID bytes the NAND chip outputs */ for (i = 0; nand_flash_ids[i].name != NULL; i++) { if (second_id_byte != nand_flash_ids[i].id) @@ -474,7 +588,7 @@ static int init_nandsim(struct mtd_info *mtd) printk("sector address bytes: %u\n", ns->geom.secaddrbytes); printk("options: %#x\n", ns->options); - if (alloc_device(ns) != 0) + if ((ret = alloc_device(ns)) != 0) goto error; /* Allocate / initialize the internal buffer */ @@ -482,21 +596,17 @@ static int init_nandsim(struct mtd_info *mtd) if (!ns->buf.byte) { NS_ERR("init_nandsim: unable to allocate %u bytes for the internal buffer\n", ns->geom.pgszoob); + ret = -ENOMEM; goto error; } memset(ns->buf.byte, 0xFF, ns->geom.pgszoob); - /* Fill the partition_info structure */ - ns->part.name = "NAND simulator partition"; - ns->part.offset = 0; - ns->part.size = ns->geom.totsz; - return 0; error: free_device(ns); - return -ENOMEM; + return ret; } /* @@ -510,6 +620,287 @@ static void free_nandsim(struct nandsim *ns) return; } +static int parse_badblocks(struct nandsim *ns, struct mtd_info *mtd) +{ + char *w; + int zero_ok; + unsigned int erase_block_no; + loff_t offset; + + if (!badblocks) + return 0; + w = badblocks; + do { + zero_ok = (*w == '0' ? 1 : 0); + erase_block_no = simple_strtoul(w, &w, 0); + if (!zero_ok && !erase_block_no) { + NS_ERR("invalid badblocks.\n"); + return -EINVAL; + } + offset = erase_block_no * ns->geom.secsz; + if (mtd->block_markbad(mtd, offset)) { + NS_ERR("invalid badblocks.\n"); + return -EINVAL; + } + if (*w == ',') + w += 1; + } while (*w); + return 0; +} + +static int parse_weakblocks(void) +{ + char *w; + int zero_ok; + unsigned int erase_block_no; + unsigned int max_erases; + struct weak_block *wb; + + if (!weakblocks) + return 0; + w = weakblocks; + do { + zero_ok = (*w == '0' ? 1 : 0); + erase_block_no = simple_strtoul(w, &w, 0); + if (!zero_ok && !erase_block_no) { + NS_ERR("invalid weakblocks.\n"); + return -EINVAL; + } + max_erases = 3; + if (*w == ':') { + w += 1; + max_erases = simple_strtoul(w, &w, 0); + } + if (*w == ',') + w += 1; + wb = kzalloc(sizeof(*wb), GFP_KERNEL); + if (!wb) { + NS_ERR("unable to allocate memory.\n"); + return -ENOMEM; + } + wb->erase_block_no = erase_block_no; + wb->max_erases = max_erases; + list_add(&wb->list, &weak_blocks); + } while (*w); + return 0; +} + +static int erase_error(unsigned int erase_block_no) +{ + struct weak_block *wb; + + list_for_each_entry(wb, &weak_blocks, list) + if (wb->erase_block_no == erase_block_no) { + if (wb->erases_done >= wb->max_erases) + return 1; + wb->erases_done += 1; + return 0; + } + return 0; +} + +static int parse_weakpages(void) +{ + char *w; + int zero_ok; + unsigned int page_no; + unsigned int max_writes; + struct weak_page *wp; + + if (!weakpages) + return 0; + w = weakpages; + do { + zero_ok = (*w == '0' ? 1 : 0); + page_no = simple_strtoul(w, &w, 0); + if (!zero_ok && !page_no) { + NS_ERR("invalid weakpagess.\n"); + return -EINVAL; + } + max_writes = 3; + if (*w == ':') { + w += 1; + max_writes = simple_strtoul(w, &w, 0); + } + if (*w == ',') + w += 1; + wp = kzalloc(sizeof(*wp), GFP_KERNEL); + if (!wp) { + NS_ERR("unable to allocate memory.\n"); + return -ENOMEM; + } + wp->page_no = page_no; + wp->max_writes = max_writes; + list_add(&wp->list, &weak_pages); + } while (*w); + return 0; +} + +static int write_error(unsigned int page_no) +{ + struct weak_page *wp; + + list_for_each_entry(wp, &weak_pages, list) + if (wp->page_no == page_no) { + if (wp->writes_done >= wp->max_writes) + return 1; + wp->writes_done += 1; + return 0; + } + return 0; +} + +static int parse_gravepages(void) +{ + char *g; + int zero_ok; + unsigned int page_no; + unsigned int max_reads; + struct grave_page *gp; + + if (!gravepages) + return 0; + g = gravepages; + do { + zero_ok = (*g == '0' ? 1 : 0); + page_no = simple_strtoul(g, &g, 0); + if (!zero_ok && !page_no) { + NS_ERR("invalid gravepagess.\n"); + return -EINVAL; + } + max_reads = 3; + if (*g == ':') { + g += 1; + max_reads = simple_strtoul(g, &g, 0); + } + if (*g == ',') + g += 1; + gp = kzalloc(sizeof(*gp), GFP_KERNEL); + if (!gp) { + NS_ERR("unable to allocate memory.\n"); + return -ENOMEM; + } + gp->page_no = page_no; + gp->max_reads = max_reads; + list_add(&gp->list, &grave_pages); + } while (*g); + return 0; +} + +static int read_error(unsigned int page_no) +{ + struct grave_page *gp; + + list_for_each_entry(gp, &grave_pages, list) + if (gp->page_no == page_no) { + if (gp->reads_done >= gp->max_reads) + return 1; + gp->reads_done += 1; + return 0; + } + return 0; +} + +static void free_lists(void) +{ + struct list_head *pos, *n; + list_for_each_safe(pos, n, &weak_blocks) { + list_del(pos); + kfree(list_entry(pos, struct weak_block, list)); + } + list_for_each_safe(pos, n, &weak_pages) { + list_del(pos); + kfree(list_entry(pos, struct weak_page, list)); + } + list_for_each_safe(pos, n, &grave_pages) { + list_del(pos); + kfree(list_entry(pos, struct grave_page, list)); + } + kfree(erase_block_wear); +} + +static int setup_wear_reporting(struct mtd_info *mtd) +{ + size_t mem; + + if (!rptwear) + return 0; + wear_eb_count = mtd->size / mtd->erasesize; + mem = wear_eb_count * sizeof(unsigned long); + if (mem / sizeof(unsigned long) != wear_eb_count) { + NS_ERR("Too many erase blocks for wear reporting\n"); + return -ENOMEM; + } + erase_block_wear = kzalloc(mem, GFP_KERNEL); + if (!erase_block_wear) { + NS_ERR("Too many erase blocks for wear reporting\n"); + return -ENOMEM; + } + return 0; +} + +static void update_wear(unsigned int erase_block_no) +{ + unsigned long wmin = -1, wmax = 0, avg; + unsigned long deciles[10], decile_max[10], tot = 0; + unsigned int i; + + if (!erase_block_wear) + return; + total_wear += 1; + if (total_wear == 0) + NS_ERR("Erase counter total overflow\n"); + erase_block_wear[erase_block_no] += 1; + if (erase_block_wear[erase_block_no] == 0) + NS_ERR("Erase counter overflow for erase block %u\n", erase_block_no); + rptwear_cnt += 1; + if (rptwear_cnt < rptwear) + return; + rptwear_cnt = 0; + /* Calc wear stats */ + for (i = 0; i < wear_eb_count; ++i) { + unsigned long wear = erase_block_wear[i]; + if (wear < wmin) + wmin = wear; + if (wear > wmax) + wmax = wear; + tot += wear; + } + for (i = 0; i < 9; ++i) { + deciles[i] = 0; + decile_max[i] = (wmax * (i + 1) + 5) / 10; + } + deciles[9] = 0; + decile_max[9] = wmax; + for (i = 0; i < wear_eb_count; ++i) { + int d; + unsigned long wear = erase_block_wear[i]; + for (d = 0; d < 10; ++d) + if (wear <= decile_max[d]) { + deciles[d] += 1; + break; + } + } + avg = tot / wear_eb_count; + /* Output wear report */ + NS_INFO("*** Wear Report ***\n"); + NS_INFO("Total numbers of erases: %lu\n", tot); + NS_INFO("Number of erase blocks: %u\n", wear_eb_count); + NS_INFO("Average number of erases: %lu\n", avg); + NS_INFO("Maximum number of erases: %lu\n", wmax); + NS_INFO("Minimum number of erases: %lu\n", wmin); + for (i = 0; i < 10; ++i) { + unsigned long from = (i ? decile_max[i - 1] + 1 : 0); + if (from > decile_max[i]) + continue; + NS_INFO("Number of ebs with erase counts from %lu to %lu : %lu\n", + from, + decile_max[i], + deciles[i]); + } + NS_INFO("*** End of Wear Report ***\n"); +} + /* * Returns the string representation of 'state' state. */ @@ -822,9 +1213,31 @@ static void read_page(struct nandsim *ns, int num) NS_DBG("read_page: page %d not allocated\n", ns->regs.row); memset(ns->buf.byte, 0xFF, num); } else { + unsigned int page_no = ns->regs.row; NS_DBG("read_page: page %d allocated, reading from %d\n", ns->regs.row, ns->regs.column + ns->regs.off); + if (read_error(page_no)) { + int i; + memset(ns->buf.byte, 0xFF, num); + for (i = 0; i < num; ++i) + ns->buf.byte[i] = random32(); + NS_WARN("simulating read error in page %u\n", page_no); + return; + } memcpy(ns->buf.byte, NS_PAGE_BYTE_OFF(ns), num); + if (bitflips && random32() < (1 << 22)) { + int flips = 1; + if (bitflips > 1) + flips = (random32() % (int) bitflips) + 1; + while (flips--) { + int pos = random32() % (num * 8); + ns->buf.byte[pos / 8] ^= (1 << (pos % 8)); + NS_WARN("read_page: flipping bit %d in page %d " + "reading from %d ecc: corrected=%u failed=%u\n", + pos, ns->regs.row, ns->regs.column + ns->regs.off, + nsmtd->ecc_stats.corrected, nsmtd->ecc_stats.failed); + } + } } } @@ -883,6 +1296,7 @@ static int do_state_action(struct nandsim *ns, uint32_t action) { int num; int busdiv = ns->busw == 8 ? 1 : 2; + unsigned int erase_block_no, page_no; action &= ACTION_MASK; @@ -942,14 +1356,24 @@ static int do_state_action(struct nandsim *ns, uint32_t action) 8 * (ns->geom.pgaddrbytes - ns->geom.secaddrbytes)) | ns->regs.column; ns->regs.column = 0; + erase_block_no = ns->regs.row >> (ns->geom.secshift - ns->geom.pgshift); + NS_DBG("do_state_action: erase sector at address %#x, off = %d\n", ns->regs.row, NS_RAW_OFFSET(ns)); - NS_LOG("erase sector %d\n", ns->regs.row >> (ns->geom.secshift - ns->geom.pgshift)); + NS_LOG("erase sector %u\n", erase_block_no); erase_sector(ns); NS_MDELAY(erase_delay); + if (erase_block_wear) + update_wear(erase_block_no); + + if (erase_error(erase_block_no)) { + NS_WARN("simulating erase failure in erase block %u\n", erase_block_no); + return -1; + } + break; case ACTION_PRGPAGE: @@ -972,6 +1396,8 @@ static int do_state_action(struct nandsim *ns, uint32_t action) if (prog_page(ns, num) == -1) return -1; + page_no = ns->regs.row; + NS_DBG("do_state_action: copy %d bytes from int buf to (%#x, %#x), raw off = %d\n", num, ns->regs.row, ns->regs.column, NS_RAW_OFFSET(ns) + ns->regs.off); NS_LOG("programm page %d\n", ns->regs.row); @@ -979,6 +1405,11 @@ static int do_state_action(struct nandsim *ns, uint32_t action) NS_UDELAY(programm_delay); NS_UDELAY(output_cycle * ns->geom.pgsz / 1000 / busdiv); + if (write_error(page_no)) { + NS_WARN("simulating write failure in page %u\n", page_no); + return -1; + } + break; case ACTION_ZEROOFF: @@ -1503,7 +1934,7 @@ static int __init ns_init_module(void) { struct nand_chip *chip; struct nandsim *nand; - int retval = -ENOMEM; + int retval = -ENOMEM, i; if (bus_width != 8 && bus_width != 16) { NS_ERR("wrong bus width (%d), use only 8 or 16\n", bus_width); @@ -1533,6 +1964,8 @@ static int __init ns_init_module(void) chip->verify_buf = ns_nand_verify_buf; chip->read_word = ns_nand_read_word; chip->ecc.mode = NAND_ECC_SOFT; + /* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */ + /* and 'badblocks' parameters to work */ chip->options |= NAND_SKIP_BBTSCAN; /* @@ -1557,6 +1990,15 @@ static int __init ns_init_module(void) nsmtd->owner = THIS_MODULE; + if ((retval = parse_weakblocks()) != 0) + goto error; + + if ((retval = parse_weakpages()) != 0) + goto error; + + if ((retval = parse_gravepages()) != 0) + goto error; + if ((retval = nand_scan(nsmtd, 1)) != 0) { NS_ERR("can't register NAND Simulator\n"); if (retval > 0) @@ -1564,23 +2006,44 @@ static int __init ns_init_module(void) goto error; } - if ((retval = init_nandsim(nsmtd)) != 0) { - NS_ERR("scan_bbt: can't initialize the nandsim structure\n"); - goto error; + if (overridesize) { + u_int32_t new_size = nsmtd->erasesize << overridesize; + if (new_size >> overridesize != nsmtd->erasesize) { + NS_ERR("overridesize is too big\n"); + goto err_exit; + } + /* N.B. This relies on nand_scan not doing anything with the size before we change it */ + nsmtd->size = new_size; + chip->chipsize = new_size; + chip->chip_shift = ffs(new_size) - 1; } - if ((retval = nand_default_bbt(nsmtd)) != 0) { - free_nandsim(nand); - goto error; - } + if ((retval = setup_wear_reporting(nsmtd)) != 0) + goto err_exit; + + if ((retval = init_nandsim(nsmtd)) != 0) + goto err_exit; - /* Register NAND as one big partition */ - add_mtd_partitions(nsmtd, &nand->part, 1); + if ((retval = parse_badblocks(nand, nsmtd)) != 0) + goto err_exit; + + if ((retval = nand_default_bbt(nsmtd)) != 0) + goto err_exit; + + /* Register NAND partitions */ + if ((retval = add_mtd_partitions(nsmtd, &nand->partitions[0], nand->nbparts)) != 0) + goto err_exit; return 0; +err_exit: + free_nandsim(nand); + nand_release(nsmtd); + for (i = 0;i < ARRAY_SIZE(nand->partitions); ++i) + kfree(nand->partitions[i].name); error: kfree(nsmtd); + free_lists(); return retval; } @@ -1593,10 +2056,14 @@ module_init(ns_init_module); static void __exit ns_cleanup_module(void) { struct nandsim *ns = (struct nandsim *)(((struct nand_chip *)nsmtd->priv)->priv); + int i; free_nandsim(ns); /* Free nandsim private resources */ - nand_release(nsmtd); /* Unregisterd drived */ + nand_release(nsmtd); /* Unregister driver */ + for (i = 0;i < ARRAY_SIZE(ns->partitions); ++i) + kfree(ns->partitions[i].name); kfree(nsmtd); /* Free other structures */ + free_lists(); } module_exit(ns_cleanup_module); @@ -1604,4 +2071,3 @@ module_exit(ns_cleanup_module); MODULE_LICENSE ("GPL"); MODULE_AUTHOR ("Artem B. Bityuckiy"); MODULE_DESCRIPTION ("The NAND flash simulator"); - diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig index 373bddce8f1..c257d397d08 100644 --- a/drivers/mtd/onenand/Kconfig +++ b/drivers/mtd/onenand/Kconfig @@ -2,20 +2,18 @@ # linux/drivers/mtd/onenand/Kconfig # -menu "OneNAND Flash Device Drivers" - depends on MTD != n - -config MTD_ONENAND +menuconfig MTD_ONENAND tristate "OneNAND Device Support" depends on MTD help This enables support for accessing all type of OneNAND flash devices. For further information see - <http://www.samsung.com/Products/Semiconductor/Flash/OneNAND_TM/index.htm>. + <http://www.samsung.com/Products/Semiconductor/OneNAND/index.htm> + +if MTD_ONENAND config MTD_ONENAND_VERIFY_WRITE bool "Verify OneNAND page writes" - depends on MTD_ONENAND help This adds an extra check when data is written to the flash. The OneNAND flash device internally checks only bits transitioning @@ -25,13 +23,12 @@ config MTD_ONENAND_VERIFY_WRITE config MTD_ONENAND_GENERIC tristate "OneNAND Flash device via platform device driver" - depends on MTD_ONENAND && ARM + depends on ARM help Support for OneNAND flash via platform device driver. config MTD_ONENAND_OTP bool "OneNAND OTP Support" - depends on MTD_ONENAND help One Block of the NAND Flash Array memory is reserved as a One-Time Programmable Block memory area. @@ -43,4 +40,4 @@ config MTD_ONENAND_OTP OTP block is fully-guaranteed to be a valid block. -endmenu +endif # MTD_ONENAND diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c index 9e14a26ca4e..000794c6caf 100644 --- a/drivers/mtd/onenand/onenand_base.c +++ b/drivers/mtd/onenand/onenand_base.c @@ -836,9 +836,11 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col int readcol = column; int readend = column + thislen; int lastgap = 0; + unsigned int i; uint8_t *oob_buf = this->oob_buf; - for (free = this->ecclayout->oobfree; free->length; ++free) { + free = this->ecclayout->oobfree; + for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) { if (readcol >= lastgap) readcol += free->offset - lastgap; if (readend >= lastgap) @@ -846,7 +848,8 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col lastgap = free->offset + free->length; } this->read_bufferram(mtd, ONENAND_SPARERAM, oob_buf, 0, mtd->oobsize); - for (free = this->ecclayout->oobfree; free->length; ++free) { + free = this->ecclayout->oobfree; + for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) { int free_end = free->offset + free->length; if (free->offset < readend && free_end > readcol) { int st = max_t(int,free->offset,readcol); @@ -854,7 +857,7 @@ static int onenand_transfer_auto_oob(struct mtd_info *mtd, uint8_t *buf, int col int n = ed - st; memcpy(buf, oob_buf + st, n); buf += n; - } else + } else if (column == 0) break; } return 0; @@ -1280,15 +1283,18 @@ static int onenand_fill_auto_oob(struct mtd_info *mtd, u_char *oob_buf, int writecol = column; int writeend = column + thislen; int lastgap = 0; + unsigned int i; - for (free = this->ecclayout->oobfree; free->length; ++free) { + free = this->ecclayout->oobfree; + for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) { if (writecol >= lastgap) writecol += free->offset - lastgap; if (writeend >= lastgap) writeend += free->offset - lastgap; lastgap = free->offset + free->length; } - for (free = this->ecclayout->oobfree; free->length; ++free) { + free = this->ecclayout->oobfree; + for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && free->length; i++, free++) { int free_end = free->offset + free->length; if (free->offset < writeend && free_end > writecol) { int st = max_t(int,free->offset,writecol); @@ -1296,7 +1302,7 @@ static int onenand_fill_auto_oob(struct mtd_info *mtd, u_char *oob_buf, int n = ed - st; memcpy(oob_buf + st, buf, n); buf += n; - } else + } else if (column == 0) break; } return 0; @@ -2386,7 +2392,8 @@ int onenand_scan(struct mtd_info *mtd, int maxchips) * the out of band area */ this->ecclayout->oobavail = 0; - for (i = 0; this->ecclayout->oobfree[i].length; i++) + for (i = 0; i < MTD_MAX_OOBFREE_ENTRIES && + this->ecclayout->oobfree[i].length; i++) this->ecclayout->oobavail += this->ecclayout->oobfree[i].length; mtd->oobavail = this->ecclayout->oobavail; diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig new file mode 100644 index 00000000000..b9daf159a4a --- /dev/null +++ b/drivers/mtd/ubi/Kconfig @@ -0,0 +1,58 @@ +# drivers/mtd/ubi/Kconfig + +menu "UBI - Unsorted block images" + depends on MTD + +config MTD_UBI + tristate "Enable UBI" + depends on MTD + select CRC32 + help + UBI is a software layer above MTD layer which admits of LVM-like + logical volumes on top of MTD devices, hides some complexities of + flash chips like wear and bad blocks and provides some other useful + capabilities. Please, consult the MTD web site for more details + (www.linux-mtd.infradead.org). + +config MTD_UBI_WL_THRESHOLD + int "UBI wear-leveling threshold" + default 4096 + range 2 65536 + depends on MTD_UBI + help + This parameter defines the maximum difference between the highest + erase counter value and the lowest erase counter value of eraseblocks + of UBI devices. When this threshold is exceeded, UBI starts performing + wear leveling by means of moving data from eraseblock with low erase + counter to eraseblocks with high erase counter. Leave the default + value if unsure. + +config MTD_UBI_BEB_RESERVE + int "Percentage of reserved eraseblocks for bad eraseblocks handling" + default 1 + range 0 25 + depends on MTD_UBI + help + If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI + reserves some amount of physical eraseblocks to handle new bad + eraseblocks. For example, if a flash physical eraseblock becomes bad, + UBI uses these reserved physical eraseblocks to relocate the bad one. + This option specifies how many physical eraseblocks will be reserved + for bad eraseblock handling (percents of total number of good flash + eraseblocks). If the underlying flash does not admit of bad + eraseblocks (e.g. NOR flash), this value is ignored and nothing is + reserved. Leave the default value if unsure. + +config MTD_UBI_GLUEBI + bool "Emulate MTD devices" + default n + depends on MTD_UBI + help + This option enables MTD devices emulation on top of UBI volumes: for + each UBI volumes an MTD device is created, and all I/O to this MTD + device is redirected to the UBI volume. This is handy to make + MTD-oriented software (like JFFS2) work on top of UBI. Do not enable + this if no legacy software will be used. + +source "drivers/mtd/ubi/Kconfig.debug" +endmenu diff --git a/drivers/mtd/ubi/Kconfig.debug b/drivers/mtd/ubi/Kconfig.debug new file mode 100644 index 00000000000..1e2ee22edef --- /dev/null +++ b/drivers/mtd/ubi/Kconfig.debug @@ -0,0 +1,104 @@ +comment "UBI debugging options" + depends on MTD_UBI + +config MTD_UBI_DEBUG + bool "UBI debugging" + depends on SYSFS + depends on MTD_UBI + select DEBUG_FS + select KALLSYMS_ALL + help + This option enables UBI debugging. + +config MTD_UBI_DEBUG_MSG + bool "UBI debugging messages" + depends on MTD_UBI_DEBUG + default n + help + This option enables UBI debugging messages. + +config MTD_UBI_DEBUG_PARANOID + bool "Extra self-checks" + default n + depends on MTD_UBI_DEBUG + help + This option enables extra checks in UBI code. Note this slows UBI down + significantly. + +config MTD_UBI_DEBUG_DISABLE_BGT + bool "Do not enable the UBI background thread" + depends on MTD_UBI_DEBUG + default n + help + This option switches the background thread off by default. The thread + may be also be enabled/disabled via UBI sysfs. + +config MTD_UBI_DEBUG_USERSPACE_IO + bool "Direct user-space write/erase support" + default n + depends on MTD_UBI_DEBUG + help + By default, users cannot directly write and erase individual + eraseblocks of dynamic volumes, and have to use update operation + instead. This option enables this capability - it is very useful for + debugging and testing. + +config MTD_UBI_DEBUG_EMULATE_BITFLIPS + bool "Emulate flash bit-flips" + depends on MTD_UBI_DEBUG + default n + help + This option emulates bit-flips with probability 1/50, which in turn + causes scrubbing. Useful for debugging and stressing UBI. + +config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES + bool "Emulate flash write failures" + depends on MTD_UBI_DEBUG + default n + help + This option emulates write failures with probability 1/100. Useful for + debugging and testing how UBI handlines errors. + +config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES + bool "Emulate flash erase failures" + depends on MTD_UBI_DEBUG + default n + help + This option emulates erase failures with probability 1/100. Useful for + debugging and testing how UBI handlines errors. + +menu "Additional UBI debugging messages" + depends on MTD_UBI_DEBUG + +config MTD_UBI_DEBUG_MSG_BLD + bool "Additional UBI initialization and build messages" + default n + depends on MTD_UBI_DEBUG + help + This option enables detailed UBI initialization and device build + debugging messages. + +config MTD_UBI_DEBUG_MSG_EBA + bool "Eraseblock association unit messages" + default n + depends on MTD_UBI_DEBUG + help + This option enables debugging messages from the UBI eraseblock + association unit. + +config MTD_UBI_DEBUG_MSG_WL + bool "Wear-leveling unit messages" + default n + depends on MTD_UBI_DEBUG + help + This option enables debugging messages from the UBI wear-leveling + unit. + +config MTD_UBI_DEBUG_MSG_IO + bool "Input/output unit messages" + default n + depends on MTD_UBI_DEBUG + help + This option enables debugging messages from the UBI input/output unit. + +endmenu # UBI debugging messages diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile new file mode 100644 index 00000000000..dd834e04151 --- /dev/null +++ b/drivers/mtd/ubi/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_MTD_UBI) += ubi.o + +ubi-y += vtbl.o vmt.o upd.o build.o cdev.o kapi.o eba.o io.o wl.o scan.o +ubi-y += misc.o + +ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o +ubi-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c new file mode 100644 index 00000000000..555d594d181 --- /dev/null +++ b/drivers/mtd/ubi/build.c @@ -0,0 +1,848 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём), + * Frank Haverkamp + */ + +/* + * This file includes UBI initialization and building of UBI devices. At the + * moment UBI devices may only be added while UBI is initialized, but dynamic + * device add/remove functionality is planned. Also, at the moment we only + * attach UBI devices by scanning, which will become a bottleneck when flashes + * reach certain large size. Then one may improve UBI and add other methods. + */ + +#include <linux/err.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/stringify.h> +#include <linux/stat.h> +#include "ubi.h" + +/* Maximum length of the 'mtd=' parameter */ +#define MTD_PARAM_LEN_MAX 64 + +/** + * struct mtd_dev_param - MTD device parameter description data structure. + * @name: MTD device name or number string + * @vid_hdr_offs: VID header offset + * @data_offs: data offset + */ +struct mtd_dev_param +{ + char name[MTD_PARAM_LEN_MAX]; + int vid_hdr_offs; + int data_offs; +}; + +/* Numbers of elements set in the @mtd_dev_param array */ +static int mtd_devs = 0; + +/* MTD devices specification parameters */ +static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; + +/* Number of UBI devices in system */ +int ubi_devices_cnt; + +/* All UBI devices in system */ +struct ubi_device *ubi_devices[UBI_MAX_DEVICES]; + +/* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */ +struct class *ubi_class; + +/* "Show" method for files in '/<sysfs>/class/ubi/' */ +static ssize_t ubi_version_show(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", UBI_VERSION); +} + +/* UBI version attribute ('/<sysfs>/class/ubi/version') */ +static struct class_attribute ubi_version = + __ATTR(version, S_IRUGO, ubi_version_show, NULL); + +static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf); + +/* UBI device attributes (correspond to files in '/<sysfs>/class/ubi/ubiX') */ +static struct device_attribute dev_eraseblock_size = + __ATTR(eraseblock_size, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_avail_eraseblocks = + __ATTR(avail_eraseblocks, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_total_eraseblocks = + __ATTR(total_eraseblocks, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_volumes_count = + __ATTR(volumes_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_max_ec = + __ATTR(max_ec, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_reserved_for_bad = + __ATTR(reserved_for_bad, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_bad_peb_count = + __ATTR(bad_peb_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_max_vol_count = + __ATTR(max_vol_count, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_min_io_size = + __ATTR(min_io_size, S_IRUGO, dev_attribute_show, NULL); +static struct device_attribute dev_bgt_enabled = + __ATTR(bgt_enabled, S_IRUGO, dev_attribute_show, NULL); + +/* "Show" method for files in '/<sysfs>/class/ubi/ubiX/' */ +static ssize_t dev_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct ubi_device *ubi; + + ubi = container_of(dev, struct ubi_device, dev); + if (attr == &dev_eraseblock_size) + return sprintf(buf, "%d\n", ubi->leb_size); + else if (attr == &dev_avail_eraseblocks) + return sprintf(buf, "%d\n", ubi->avail_pebs); + else if (attr == &dev_total_eraseblocks) + return sprintf(buf, "%d\n", ubi->good_peb_count); + else if (attr == &dev_volumes_count) + return sprintf(buf, "%d\n", ubi->vol_count); + else if (attr == &dev_max_ec) + return sprintf(buf, "%d\n", ubi->max_ec); + else if (attr == &dev_reserved_for_bad) + return sprintf(buf, "%d\n", ubi->beb_rsvd_pebs); + else if (attr == &dev_bad_peb_count) + return sprintf(buf, "%d\n", ubi->bad_peb_count); + else if (attr == &dev_max_vol_count) + return sprintf(buf, "%d\n", ubi->vtbl_slots); + else if (attr == &dev_min_io_size) + return sprintf(buf, "%d\n", ubi->min_io_size); + else if (attr == &dev_bgt_enabled) + return sprintf(buf, "%d\n", ubi->thread_enabled); + else + BUG(); + + return 0; +} + +/* Fake "release" method for UBI devices */ +static void dev_release(struct device *dev) { } + +/** + * ubi_sysfs_init - initialize sysfs for an UBI device. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int ubi_sysfs_init(struct ubi_device *ubi) +{ + int err; + + ubi->dev.release = dev_release; + ubi->dev.devt = MKDEV(ubi->major, 0); + ubi->dev.class = ubi_class; + sprintf(&ubi->dev.bus_id[0], UBI_NAME_STR"%d", ubi->ubi_num); + err = device_register(&ubi->dev); + if (err) + goto out; + + err = device_create_file(&ubi->dev, &dev_eraseblock_size); + if (err) + goto out_unregister; + err = device_create_file(&ubi->dev, &dev_avail_eraseblocks); + if (err) + goto out_eraseblock_size; + err = device_create_file(&ubi->dev, &dev_total_eraseblocks); + if (err) + goto out_avail_eraseblocks; + err = device_create_file(&ubi->dev, &dev_volumes_count); + if (err) + goto out_total_eraseblocks; + err = device_create_file(&ubi->dev, &dev_max_ec); + if (err) + goto out_volumes_count; + err = device_create_file(&ubi->dev, &dev_reserved_for_bad); + if (err) + goto out_volumes_max_ec; + err = device_create_file(&ubi->dev, &dev_bad_peb_count); + if (err) + goto out_reserved_for_bad; + err = device_create_file(&ubi->dev, &dev_max_vol_count); + if (err) + goto out_bad_peb_count; + err = device_create_file(&ubi->dev, &dev_min_io_size); + if (err) + goto out_max_vol_count; + err = device_create_file(&ubi->dev, &dev_bgt_enabled); + if (err) + goto out_min_io_size; + + return 0; + +out_min_io_size: + device_remove_file(&ubi->dev, &dev_min_io_size); +out_max_vol_count: + device_remove_file(&ubi->dev, &dev_max_vol_count); +out_bad_peb_count: + device_remove_file(&ubi->dev, &dev_bad_peb_count); +out_reserved_for_bad: + device_remove_file(&ubi->dev, &dev_reserved_for_bad); +out_volumes_max_ec: + device_remove_file(&ubi->dev, &dev_max_ec); +out_volumes_count: + device_remove_file(&ubi->dev, &dev_volumes_count); +out_total_eraseblocks: + device_remove_file(&ubi->dev, &dev_total_eraseblocks); +out_avail_eraseblocks: + device_remove_file(&ubi->dev, &dev_avail_eraseblocks); +out_eraseblock_size: + device_remove_file(&ubi->dev, &dev_eraseblock_size); +out_unregister: + device_unregister(&ubi->dev); +out: + ubi_err("failed to initialize sysfs for %s", ubi->ubi_name); + return err; +} + +/** + * ubi_sysfs_close - close sysfs for an UBI device. + * @ubi: UBI device description object + */ +static void ubi_sysfs_close(struct ubi_device *ubi) +{ + device_remove_file(&ubi->dev, &dev_bgt_enabled); + device_remove_file(&ubi->dev, &dev_min_io_size); + device_remove_file(&ubi->dev, &dev_max_vol_count); + device_remove_file(&ubi->dev, &dev_bad_peb_count); + device_remove_file(&ubi->dev, &dev_reserved_for_bad); + device_remove_file(&ubi->dev, &dev_max_ec); + device_remove_file(&ubi->dev, &dev_volumes_count); + device_remove_file(&ubi->dev, &dev_total_eraseblocks); + device_remove_file(&ubi->dev, &dev_avail_eraseblocks); + device_remove_file(&ubi->dev, &dev_eraseblock_size); + device_unregister(&ubi->dev); +} + +/** + * kill_volumes - destroy all volumes. + * @ubi: UBI device description object + */ +static void kill_volumes(struct ubi_device *ubi) +{ + int i; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) + ubi_free_volume(ubi, i); +} + +/** + * uif_init - initialize user interfaces for an UBI device. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int uif_init(struct ubi_device *ubi) +{ + int i, err; + dev_t dev; + + mutex_init(&ubi->vtbl_mutex); + spin_lock_init(&ubi->volumes_lock); + + sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); + + /* + * Major numbers for the UBI character devices are allocated + * dynamically. Major numbers of volume character devices are + * equivalent to ones of the corresponding UBI character device. Minor + * numbers of UBI character devices are 0, while minor numbers of + * volume character devices start from 1. Thus, we allocate one major + * number and ubi->vtbl_slots + 1 minor numbers. + */ + err = alloc_chrdev_region(&dev, 0, ubi->vtbl_slots + 1, ubi->ubi_name); + if (err) { + ubi_err("cannot register UBI character devices"); + return err; + } + + cdev_init(&ubi->cdev, &ubi_cdev_operations); + ubi->major = MAJOR(dev); + dbg_msg("%s major is %u", ubi->ubi_name, ubi->major); + ubi->cdev.owner = THIS_MODULE; + + dev = MKDEV(ubi->major, 0); + err = cdev_add(&ubi->cdev, dev, 1); + if (err) { + ubi_err("cannot add character device %s", ubi->ubi_name); + goto out_unreg; + } + + err = ubi_sysfs_init(ubi); + if (err) + goto out_cdev; + + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i]) { + err = ubi_add_volume(ubi, i); + if (err) + goto out_volumes; + } + + return 0; + +out_volumes: + kill_volumes(ubi); + ubi_sysfs_close(ubi); +out_cdev: + cdev_del(&ubi->cdev); +out_unreg: + unregister_chrdev_region(MKDEV(ubi->major, 0), + ubi->vtbl_slots + 1); + return err; +} + +/** + * uif_close - close user interfaces for an UBI device. + * @ubi: UBI device description object + */ +static void uif_close(struct ubi_device *ubi) +{ + kill_volumes(ubi); + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); + unregister_chrdev_region(MKDEV(ubi->major, 0), ubi->vtbl_slots + 1); +} + +/** + * attach_by_scanning - attach an MTD device using scanning method. + * @ubi: UBI device descriptor + * + * This function returns zero in case of success and a negative error code in + * case of failure. + * + * Note, currently this is the only method to attach UBI devices. Hopefully in + * the future we'll have more scalable attaching methods and avoid full media + * scanning. But even in this case scanning will be needed as a fall-back + * attaching method if there are some on-flash table corruptions. + */ +static int attach_by_scanning(struct ubi_device *ubi) +{ + int err; + struct ubi_scan_info *si; + + si = ubi_scan(ubi); + if (IS_ERR(si)) + return PTR_ERR(si); + + ubi->bad_peb_count = si->bad_peb_count; + ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; + ubi->max_ec = si->max_ec; + ubi->mean_ec = si->mean_ec; + + err = ubi_read_volume_table(ubi, si); + if (err) + goto out_si; + + err = ubi_wl_init_scan(ubi, si); + if (err) + goto out_vtbl; + + err = ubi_eba_init_scan(ubi, si); + if (err) + goto out_wl; + + ubi_scan_destroy_si(si); + return 0; + +out_wl: + ubi_wl_close(ubi); +out_vtbl: + kfree(ubi->vtbl); +out_si: + ubi_scan_destroy_si(si); + return err; +} + +/** + * io_init - initialize I/O unit for a given UBI device. + * @ubi: UBI device description object + * + * If @ubi->vid_hdr_offset or @ubi->leb_start is zero, default offsets are + * assumed: + * o EC header is always at offset zero - this cannot be changed; + * o VID header starts just after the EC header at the closest address + * aligned to @io->@hdrs_min_io_size; + * o data starts just after the VID header at the closest address aligned to + * @io->@min_io_size + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int io_init(struct ubi_device *ubi) +{ + if (ubi->mtd->numeraseregions != 0) { + /* + * Some flashes have several erase regions. Different regions + * may have different eraseblock size and other + * characteristics. It looks like mostly multi-region flashes + * have one "main" region and one or more small regions to + * store boot loader code or boot parameters or whatever. I + * guess we should just pick the largest region. But this is + * not implemented. + */ + ubi_err("multiple regions, not implemented"); + return -EINVAL; + } + + /* + * Note, in this implementation we support MTD devices with 0x7FFFFFFF + * physical eraseblocks maximum. + */ + + ubi->peb_size = ubi->mtd->erasesize; + ubi->peb_count = ubi->mtd->size / ubi->mtd->erasesize; + ubi->flash_size = ubi->mtd->size; + + if (ubi->mtd->block_isbad && ubi->mtd->block_markbad) + ubi->bad_allowed = 1; + + ubi->min_io_size = ubi->mtd->writesize; + ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; + + /* Make sure minimal I/O unit is power of 2 */ + if (ubi->min_io_size == 0 || + (ubi->min_io_size & (ubi->min_io_size - 1))) { + ubi_err("bad min. I/O unit"); + return -EINVAL; + } + + ubi_assert(ubi->hdrs_min_io_size > 0); + ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); + ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); + + /* Calculate default aligned sizes of EC and VID headers */ + ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); + ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); + + dbg_msg("min_io_size %d", ubi->min_io_size); + dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size); + dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize); + dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize); + + if (ubi->vid_hdr_offset == 0) + /* Default offset */ + ubi->vid_hdr_offset = ubi->vid_hdr_aloffset = + ubi->ec_hdr_alsize; + else { + ubi->vid_hdr_aloffset = ubi->vid_hdr_offset & + ~(ubi->hdrs_min_io_size - 1); + ubi->vid_hdr_shift = ubi->vid_hdr_offset - + ubi->vid_hdr_aloffset; + } + + /* Similar for the data offset */ + if (ubi->leb_start == 0) { + ubi->leb_start = ubi->vid_hdr_offset + ubi->vid_hdr_alsize; + ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); + } + + dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); + dbg_msg("vid_hdr_aloffset %d", ubi->vid_hdr_aloffset); + dbg_msg("vid_hdr_shift %d", ubi->vid_hdr_shift); + dbg_msg("leb_start %d", ubi->leb_start); + + /* The shift must be aligned to 32-bit boundary */ + if (ubi->vid_hdr_shift % 4) { + ubi_err("unaligned VID header shift %d", + ubi->vid_hdr_shift); + return -EINVAL; + } + + /* Check sanity */ + if (ubi->vid_hdr_offset < UBI_EC_HDR_SIZE || + ubi->leb_start < ubi->vid_hdr_offset + UBI_VID_HDR_SIZE || + ubi->leb_start > ubi->peb_size - UBI_VID_HDR_SIZE || + ubi->leb_start % ubi->min_io_size) { + ubi_err("bad VID header (%d) or data offsets (%d)", + ubi->vid_hdr_offset, ubi->leb_start); + return -EINVAL; + } + + /* + * It may happen that EC and VID headers are situated in one minimal + * I/O unit. In this case we can only accept this UBI image in + * read-only mode. + */ + if (ubi->vid_hdr_offset + UBI_VID_HDR_SIZE <= ubi->hdrs_min_io_size) { + ubi_warn("EC and VID headers are in the same minimal I/O unit, " + "switch to read-only mode"); + ubi->ro_mode = 1; + } + + ubi->leb_size = ubi->peb_size - ubi->leb_start; + + if (!(ubi->mtd->flags & MTD_WRITEABLE)) { + ubi_msg("MTD device %d is write-protected, attach in " + "read-only mode", ubi->mtd->index); + ubi->ro_mode = 1; + } + + dbg_msg("leb_size %d", ubi->leb_size); + dbg_msg("ro_mode %d", ubi->ro_mode); + + /* + * Note, ideally, we have to initialize ubi->bad_peb_count here. But + * unfortunately, MTD does not provide this information. We should loop + * over all physical eraseblocks and invoke mtd->block_is_bad() for + * each physical eraseblock. So, we skip ubi->bad_peb_count + * uninitialized and initialize it after scanning. + */ + + return 0; +} + +/** + * attach_mtd_dev - attach an MTD device. + * @mtd_dev: MTD device name or number string + * @vid_hdr_offset: VID header offset + * @data_offset: data offset + * + * This function attaches an MTD device to UBI. It first treats @mtd_dev as the + * MTD device name, and tries to open it by this name. If it is unable to open, + * it tries to convert @mtd_dev to an integer and open the MTD device by its + * number. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int attach_mtd_dev(const char *mtd_dev, int vid_hdr_offset, + int data_offset) +{ + struct ubi_device *ubi; + struct mtd_info *mtd; + int i, err; + + mtd = get_mtd_device_nm(mtd_dev); + if (IS_ERR(mtd)) { + int mtd_num; + char *endp; + + if (PTR_ERR(mtd) != -ENODEV) + return PTR_ERR(mtd); + + /* + * Probably this is not MTD device name but MTD device number - + * check this out. + */ + mtd_num = simple_strtoul(mtd_dev, &endp, 0); + if (*endp != '\0' || mtd_dev == endp) { + ubi_err("incorrect MTD device: \"%s\"", mtd_dev); + return -ENODEV; + } + + mtd = get_mtd_device(NULL, mtd_num); + if (IS_ERR(mtd)) + return PTR_ERR(mtd); + } + + /* Check if we already have the same MTD device attached */ + for (i = 0; i < ubi_devices_cnt; i++) + if (ubi_devices[i]->mtd->index == mtd->index) { + ubi_err("mtd%d is already attached to ubi%d", + mtd->index, i); + err = -EINVAL; + goto out_mtd; + } + + ubi = ubi_devices[ubi_devices_cnt] = kzalloc(sizeof(struct ubi_device), + GFP_KERNEL); + if (!ubi) { + err = -ENOMEM; + goto out_mtd; + } + + ubi->ubi_num = ubi_devices_cnt; + ubi->mtd = mtd; + + dbg_msg("attaching mtd%d to ubi%d: VID header offset %d data offset %d", + ubi->mtd->index, ubi_devices_cnt, vid_hdr_offset, data_offset); + + ubi->vid_hdr_offset = vid_hdr_offset; + ubi->leb_start = data_offset; + err = io_init(ubi); + if (err) + goto out_free; + + err = attach_by_scanning(ubi); + if (err) { + dbg_err("failed to attach by scanning, error %d", err); + goto out_free; + } + + err = uif_init(ubi); + if (err) + goto out_detach; + + ubi_devices_cnt += 1; + + ubi_msg("attached mtd%d to ubi%d", ubi->mtd->index, ubi_devices_cnt); + ubi_msg("MTD device name: \"%s\"", ubi->mtd->name); + ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); + ubi_msg("physical eraseblock size: %d bytes (%d KiB)", + ubi->peb_size, ubi->peb_size >> 10); + ubi_msg("logical eraseblock size: %d bytes", ubi->leb_size); + ubi_msg("number of good PEBs: %d", ubi->good_peb_count); + ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); + ubi_msg("smallest flash I/O unit: %d", ubi->min_io_size); + ubi_msg("VID header offset: %d (aligned %d)", + ubi->vid_hdr_offset, ubi->vid_hdr_aloffset); + ubi_msg("data offset: %d", ubi->leb_start); + ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); + ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); + ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); + ubi_msg("number of user volumes: %d", + ubi->vol_count - UBI_INT_VOL_COUNT); + ubi_msg("available PEBs: %d", ubi->avail_pebs); + ubi_msg("total number of reserved PEBs: %d", ubi->rsvd_pebs); + ubi_msg("number of PEBs reserved for bad PEB handling: %d", + ubi->beb_rsvd_pebs); + ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); + + /* Enable the background thread */ + if (!DBG_DISABLE_BGT) { + ubi->thread_enabled = 1; + wake_up_process(ubi->bgt_thread); + } + + return 0; + +out_detach: + ubi_eba_close(ubi); + ubi_wl_close(ubi); + kfree(ubi->vtbl); +out_free: + kfree(ubi); +out_mtd: + put_mtd_device(mtd); + ubi_devices[ubi_devices_cnt] = NULL; + return err; +} + +/** + * detach_mtd_dev - detach an MTD device. + * @ubi: UBI device description object + */ +static void detach_mtd_dev(struct ubi_device *ubi) +{ + int ubi_num = ubi->ubi_num, mtd_num = ubi->mtd->index; + + dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); + uif_close(ubi); + ubi_eba_close(ubi); + ubi_wl_close(ubi); + kfree(ubi->vtbl); + put_mtd_device(ubi->mtd); + kfree(ubi_devices[ubi_num]); + ubi_devices[ubi_num] = NULL; + ubi_devices_cnt -= 1; + ubi_assert(ubi_devices_cnt >= 0); + ubi_msg("mtd%d is detached from ubi%d", mtd_num, ubi_num); +} + +static int __init ubi_init(void) +{ + int err, i, k; + + /* Ensure that EC and VID headers have correct size */ + BUILD_BUG_ON(sizeof(struct ubi_ec_hdr) != 64); + BUILD_BUG_ON(sizeof(struct ubi_vid_hdr) != 64); + + if (mtd_devs > UBI_MAX_DEVICES) { + printk("UBI error: too many MTD devices, maximum is %d\n", + UBI_MAX_DEVICES); + return -EINVAL; + } + + ubi_class = class_create(THIS_MODULE, UBI_NAME_STR); + if (IS_ERR(ubi_class)) + return PTR_ERR(ubi_class); + + err = class_create_file(ubi_class, &ubi_version); + if (err) + goto out_class; + + /* Attach MTD devices */ + for (i = 0; i < mtd_devs; i++) { + struct mtd_dev_param *p = &mtd_dev_param[i]; + + cond_resched(); + + if (!p->name) { + dbg_err("empty name"); + err = -EINVAL; + goto out_detach; + } + + err = attach_mtd_dev(p->name, p->vid_hdr_offs, p->data_offs); + if (err) + goto out_detach; + } + + return 0; + +out_detach: + for (k = 0; k < i; k++) + detach_mtd_dev(ubi_devices[k]); + class_remove_file(ubi_class, &ubi_version); +out_class: + class_destroy(ubi_class); + return err; +} +module_init(ubi_init); + +static void __exit ubi_exit(void) +{ + int i, n = ubi_devices_cnt; + + for (i = 0; i < n; i++) + detach_mtd_dev(ubi_devices[i]); + class_remove_file(ubi_class, &ubi_version); + class_destroy(ubi_class); +} +module_exit(ubi_exit); + +/** + * bytes_str_to_int - convert a string representing number of bytes to an + * integer. + * @str: the string to convert + * + * This function returns positive resulting integer in case of success and a + * negative error code in case of failure. + */ +static int __init bytes_str_to_int(const char *str) +{ + char *endp; + unsigned long result; + + result = simple_strtoul(str, &endp, 0); + if (str == endp || result < 0) { + printk("UBI error: incorrect bytes count: \"%s\"\n", str); + return -EINVAL; + } + + switch (*endp) { + case 'G': + result *= 1024; + case 'M': + result *= 1024; + case 'K': + case 'k': + result *= 1024; + if (endp[1] == 'i' && (endp[2] == '\0' || + endp[2] == 'B' || endp[2] == 'b')) + endp += 2; + case '\0': + break; + default: + printk("UBI error: incorrect bytes count: \"%s\"\n", str); + return -EINVAL; + } + + return result; +} + +/** + * ubi_mtd_param_parse - parse the 'mtd=' UBI parameter. + * @val: the parameter value to parse + * @kp: not used + * + * This function returns zero in case of success and a negative error code in + * case of error. + */ +static int __init ubi_mtd_param_parse(const char *val, struct kernel_param *kp) +{ + int i, len; + struct mtd_dev_param *p; + char buf[MTD_PARAM_LEN_MAX]; + char *pbuf = &buf[0]; + char *tokens[3] = {NULL, NULL, NULL}; + + if (mtd_devs == UBI_MAX_DEVICES) { + printk("UBI error: too many parameters, max. is %d\n", + UBI_MAX_DEVICES); + return -EINVAL; + } + + len = strnlen(val, MTD_PARAM_LEN_MAX); + if (len == MTD_PARAM_LEN_MAX) { + printk("UBI error: parameter \"%s\" is too long, max. is %d\n", + val, MTD_PARAM_LEN_MAX); + return -EINVAL; + } + + if (len == 0) { + printk("UBI warning: empty 'mtd=' parameter - ignored\n"); + return 0; + } + + strcpy(buf, val); + + /* Get rid of the final newline */ + if (buf[len - 1] == '\n') + buf[len - 1] = 0; + + for (i = 0; i < 3; i++) + tokens[i] = strsep(&pbuf, ","); + + if (pbuf) { + printk("UBI error: too many arguments at \"%s\"\n", val); + return -EINVAL; + } + + if (tokens[0] == '\0') + return -EINVAL; + + p = &mtd_dev_param[mtd_devs]; + strcpy(&p->name[0], tokens[0]); + + if (tokens[1]) + p->vid_hdr_offs = bytes_str_to_int(tokens[1]); + if (tokens[2]) + p->data_offs = bytes_str_to_int(tokens[2]); + + if (p->vid_hdr_offs < 0) + return p->vid_hdr_offs; + if (p->data_offs < 0) + return p->data_offs; + + mtd_devs += 1; + return 0; +} + +module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); +MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: " + "mtd=<name|num>[,<vid_hdr_offs>,<data_offs>]. " + "Multiple \"mtd\" parameters may be specified.\n" + "MTD devices may be specified by their number or name. " + "Optional \"vid_hdr_offs\" and \"data_offs\" parameters " + "specify UBI VID header position and data starting " + "position to be used by UBI.\n" + "Example: mtd=content,1984,2048 mtd=4 - attach MTD device" + "with name content using VID header offset 1984 and data " + "start 2048, and MTD device number 4 using default " + "offsets"); + +MODULE_VERSION(__stringify(UBI_VERSION)); +MODULE_DESCRIPTION("UBI - Unsorted Block Images"); +MODULE_AUTHOR("Artem Bityutskiy"); +MODULE_LICENSE("GPL"); diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c new file mode 100644 index 00000000000..6612eb79bf1 --- /dev/null +++ b/drivers/mtd/ubi/cdev.c @@ -0,0 +1,722 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file includes implementation of UBI character device operations. + * + * There are two kinds of character devices in UBI: UBI character devices and + * UBI volume character devices. UBI character devices allow users to + * manipulate whole volumes: create, remove, and re-size them. Volume character + * devices provide volume I/O capabilities. + * + * Major and minor numbers are assigned dynamically to both UBI and volume + * character devices. + */ + +#include <linux/module.h> +#include <linux/stat.h> +#include <linux/ioctl.h> +#include <linux/capability.h> +#include <mtd/ubi-user.h> +#include <asm/uaccess.h> +#include <asm/div64.h> +#include "ubi.h" + +/* + * Maximum sequence numbers of UBI and volume character device IOCTLs (direct + * logical eraseblock erase is a debug-only feature). + */ +#define UBI_CDEV_IOC_MAX_SEQ 2 +#ifndef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO +#define VOL_CDEV_IOC_MAX_SEQ 1 +#else +#define VOL_CDEV_IOC_MAX_SEQ 2 +#endif + +/** + * major_to_device - get UBI device object by character device major number. + * @major: major number + * + * This function returns a pointer to the UBI device object. + */ +static struct ubi_device *major_to_device(int major) +{ + int i; + + for (i = 0; i < ubi_devices_cnt; i++) + if (ubi_devices[i] && ubi_devices[i]->major == major) + return ubi_devices[i]; + BUG(); +} + +/** + * get_exclusive - get exclusive access to an UBI volume. + * @desc: volume descriptor + * + * This function changes UBI volume open mode to "exclusive". Returns previous + * mode value (positive integer) in case of success and a negative error code + * in case of failure. + */ +static int get_exclusive(struct ubi_volume_desc *desc) +{ + int users, err; + struct ubi_volume *vol = desc->vol; + + spin_lock(&vol->ubi->volumes_lock); + users = vol->readers + vol->writers + vol->exclusive; + ubi_assert(users > 0); + if (users > 1) { + dbg_err("%d users for volume %d", users, vol->vol_id); + err = -EBUSY; + } else { + vol->readers = vol->writers = 0; + vol->exclusive = 1; + err = desc->mode; + desc->mode = UBI_EXCLUSIVE; + } + spin_unlock(&vol->ubi->volumes_lock); + + return err; +} + +/** + * revoke_exclusive - revoke exclusive mode. + * @desc: volume descriptor + * @mode: new mode to switch to + */ +static void revoke_exclusive(struct ubi_volume_desc *desc, int mode) +{ + struct ubi_volume *vol = desc->vol; + + spin_lock(&vol->ubi->volumes_lock); + ubi_assert(vol->readers == 0 && vol->writers == 0); + ubi_assert(vol->exclusive == 1 && desc->mode == UBI_EXCLUSIVE); + vol->exclusive = 0; + if (mode == UBI_READONLY) + vol->readers = 1; + else if (mode == UBI_READWRITE) + vol->writers = 1; + else + vol->exclusive = 1; + spin_unlock(&vol->ubi->volumes_lock); + + desc->mode = mode; +} + +static int vol_cdev_open(struct inode *inode, struct file *file) +{ + struct ubi_volume_desc *desc; + const struct ubi_device *ubi = major_to_device(imajor(inode)); + int vol_id = iminor(inode) - 1; + int mode; + + if (file->f_mode & FMODE_WRITE) + mode = UBI_READWRITE; + else + mode = UBI_READONLY; + + dbg_msg("open volume %d, mode %d", vol_id, mode); + + desc = ubi_open_volume(ubi->ubi_num, vol_id, mode); + if (IS_ERR(desc)) + return PTR_ERR(desc); + + file->private_data = desc; + return 0; +} + +static int vol_cdev_release(struct inode *inode, struct file *file) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + + dbg_msg("release volume %d, mode %d", vol->vol_id, desc->mode); + + if (vol->updating) { + ubi_warn("update of volume %d not finished, volume is damaged", + vol->vol_id); + vol->updating = 0; + kfree(vol->upd_buf); + } + + ubi_close_volume(desc); + return 0; +} + +static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + loff_t new_offset; + + if (vol->updating) { + /* Update is in progress, seeking is prohibited */ + dbg_err("updating"); + return -EBUSY; + } + + switch (origin) { + case 0: /* SEEK_SET */ + new_offset = offset; + break; + case 1: /* SEEK_CUR */ + new_offset = file->f_pos + offset; + break; + case 2: /* SEEK_END */ + new_offset = vol->used_bytes + offset; + break; + default: + return -EINVAL; + } + + if (new_offset < 0 || new_offset > vol->used_bytes) { + dbg_err("bad seek %lld", new_offset); + return -EINVAL; + } + + dbg_msg("seek volume %d, offset %lld, origin %d, new offset %lld", + vol->vol_id, offset, origin, new_offset); + + file->f_pos = new_offset; + return new_offset; +} + +static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, + loff_t *offp) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, lnum, off, len, vol_id = desc->vol->vol_id, tbuf_size; + size_t count_save = count; + void *tbuf; + uint64_t tmp; + + dbg_msg("read %zd bytes from offset %lld of volume %d", + count, *offp, vol_id); + + if (vol->updating) { + dbg_err("updating"); + return -EBUSY; + } + if (vol->upd_marker) { + dbg_err("damaged volume, update marker is set"); + return -EBADF; + } + if (*offp == vol->used_bytes || count == 0) + return 0; + + if (vol->corrupted) + dbg_msg("read from corrupted volume %d", vol_id); + + if (*offp + count > vol->used_bytes) + count_save = count = vol->used_bytes - *offp; + + tbuf_size = vol->usable_leb_size; + if (count < tbuf_size) + tbuf_size = ALIGN(count, ubi->min_io_size); + tbuf = kmalloc(tbuf_size, GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + len = count > tbuf_size ? tbuf_size : count; + + tmp = *offp; + off = do_div(tmp, vol->usable_leb_size); + lnum = tmp; + + do { + cond_resched(); + + if (off + len >= vol->usable_leb_size) + len = vol->usable_leb_size - off; + + err = ubi_eba_read_leb(ubi, vol_id, lnum, tbuf, off, len, 0); + if (err) + break; + + off += len; + if (off == vol->usable_leb_size) { + lnum += 1; + off -= vol->usable_leb_size; + } + + count -= len; + *offp += len; + + err = copy_to_user(buf, tbuf, len); + if (err) { + err = -EFAULT; + break; + } + + buf += len; + len = count > tbuf_size ? tbuf_size : count; + } while (count); + + kfree(tbuf); + return err ? err : count_save - count; +} + +#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO + +/* + * This function allows to directly write to dynamic UBI volumes, without + * issuing the volume update operation. Available only as a debugging feature. + * Very useful for testing UBI. + */ +static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) +{ + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int lnum, off, len, tbuf_size, vol_id = vol->vol_id, err = 0; + size_t count_save = count; + char *tbuf; + uint64_t tmp; + + dbg_msg("requested: write %zd bytes to offset %lld of volume %u", + count, *offp, desc->vol->vol_id); + + if (vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + tmp = *offp; + off = do_div(tmp, vol->usable_leb_size); + lnum = tmp; + + if (off % ubi->min_io_size) { + dbg_err("unaligned position"); + return -EINVAL; + } + + if (*offp + count > vol->used_bytes) + count_save = count = vol->used_bytes - *offp; + + /* We can write only in fractions of the minimum I/O unit */ + if (count % ubi->min_io_size) { + dbg_err("unaligned write length"); + return -EINVAL; + } + + tbuf_size = vol->usable_leb_size; + if (count < tbuf_size) + tbuf_size = ALIGN(count, ubi->min_io_size); + tbuf = kmalloc(tbuf_size, GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + len = count > tbuf_size ? tbuf_size : count; + + while (count) { + cond_resched(); + + if (off + len >= vol->usable_leb_size) + len = vol->usable_leb_size - off; + + err = copy_from_user(tbuf, buf, len); + if (err) { + err = -EFAULT; + break; + } + + err = ubi_eba_write_leb(ubi, vol_id, lnum, tbuf, off, len, + UBI_UNKNOWN); + if (err) + break; + + off += len; + if (off == vol->usable_leb_size) { + lnum += 1; + off -= vol->usable_leb_size; + } + + count -= len; + *offp += len; + buf += len; + len = count > tbuf_size ? tbuf_size : count; + } + + kfree(tbuf); + return err ? err : count_save - count; +} + +#else +#define vol_cdev_direct_write(file, buf, count, offp) -EPERM +#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */ + +static ssize_t vol_cdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) +{ + int err = 0; + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + + if (!vol->updating) + return vol_cdev_direct_write(file, buf, count, offp); + + err = ubi_more_update_data(ubi, vol->vol_id, buf, count); + if (err < 0) { + ubi_err("cannot write %zd bytes of update data", count); + return err; + } + + if (err) { + /* + * Update is finished, @err contains number of actually written + * bytes now. + */ + count = err; + + err = ubi_check_volume(ubi, vol->vol_id); + if (err < 0) + return err; + + if (err) { + ubi_warn("volume %d on UBI device %d is corrupted", + vol->vol_id, ubi->ubi_num); + vol->corrupted = 1; + } + vol->checked = 1; + revoke_exclusive(desc, UBI_READWRITE); + } + + *offp += count; + return count; +} + +static int vol_cdev_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int err = 0; + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + void __user *argp = (void __user *)arg; + + if (_IOC_NR(cmd) > VOL_CDEV_IOC_MAX_SEQ || + _IOC_TYPE(cmd) != UBI_VOL_IOC_MAGIC) + return -ENOTTY; + + if (_IOC_DIR(cmd) && _IOC_READ) + err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd)); + else if (_IOC_DIR(cmd) && _IOC_WRITE) + err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd)); + if (err) + return -EFAULT; + + switch (cmd) { + + /* Volume update command */ + case UBI_IOCVOLUP: + { + int64_t bytes, rsvd_bytes; + + if (!capable(CAP_SYS_RESOURCE)) { + err = -EPERM; + break; + } + + err = copy_from_user(&bytes, argp, sizeof(int64_t)); + if (err) { + err = -EFAULT; + break; + } + + if (desc->mode == UBI_READONLY) { + err = -EROFS; + break; + } + + rsvd_bytes = vol->reserved_pebs * (ubi->leb_size-vol->data_pad); + if (bytes < 0 || bytes > rsvd_bytes) { + err = -EINVAL; + break; + } + + err = get_exclusive(desc); + if (err < 0) + break; + + err = ubi_start_update(ubi, vol->vol_id, bytes); + if (bytes == 0) + revoke_exclusive(desc, UBI_READWRITE); + + file->f_pos = 0; + break; + } + +#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO + /* Logical eraseblock erasure command */ + case UBI_IOCEBER: + { + int32_t lnum; + + err = __get_user(lnum, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + + if (desc->mode == UBI_READONLY) { + err = -EROFS; + break; + } + + if (lnum < 0 || lnum >= vol->reserved_pebs) { + err = -EINVAL; + break; + } + + if (vol->vol_type != UBI_DYNAMIC_VOLUME) { + err = -EROFS; + break; + } + + dbg_msg("erase LEB %d:%d", vol->vol_id, lnum); + err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum); + if (err) + break; + + err = ubi_wl_flush(ubi); + break; + } +#endif + + default: + err = -ENOTTY; + break; + } + + return err; +} + +/** + * verify_mkvol_req - verify volume creation request. + * @ubi: UBI device description object + * @req: the request to check + * + * This function zero if the request is correct, and %-EINVAL if not. + */ +static int verify_mkvol_req(const struct ubi_device *ubi, + const struct ubi_mkvol_req *req) +{ + int n, err = -EINVAL; + + if (req->bytes < 0 || req->alignment < 0 || req->vol_type < 0 || + req->name_len < 0) + goto bad; + + if ((req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) && + req->vol_id != UBI_VOL_NUM_AUTO) + goto bad; + + if (req->alignment == 0) + goto bad; + + if (req->bytes == 0) + goto bad; + + if (req->vol_type != UBI_DYNAMIC_VOLUME && + req->vol_type != UBI_STATIC_VOLUME) + goto bad; + + if (req->alignment > ubi->leb_size) + goto bad; + + n = req->alignment % ubi->min_io_size; + if (req->alignment != 1 && n) + goto bad; + + if (req->name_len > UBI_VOL_NAME_MAX) { + err = -ENAMETOOLONG; + goto bad; + } + + return 0; + +bad: + dbg_err("bad volume creation request"); + ubi_dbg_dump_mkvol_req(req); + return err; +} + +/** + * verify_rsvol_req - verify volume re-size request. + * @ubi: UBI device description object + * @req: the request to check + * + * This function returns zero if the request is correct, and %-EINVAL if not. + */ +static int verify_rsvol_req(const struct ubi_device *ubi, + const struct ubi_rsvol_req *req) +{ + if (req->bytes <= 0) + return -EINVAL; + + if (req->vol_id < 0 || req->vol_id >= ubi->vtbl_slots) + return -EINVAL; + + return 0; +} + +static int ubi_cdev_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + int err = 0; + struct ubi_device *ubi; + struct ubi_volume_desc *desc; + void __user *argp = (void __user *)arg; + + if (_IOC_NR(cmd) > UBI_CDEV_IOC_MAX_SEQ || + _IOC_TYPE(cmd) != UBI_IOC_MAGIC) + return -ENOTTY; + + if (_IOC_DIR(cmd) && _IOC_READ) + err = !access_ok(VERIFY_WRITE, argp, _IOC_SIZE(cmd)); + else if (_IOC_DIR(cmd) && _IOC_WRITE) + err = !access_ok(VERIFY_READ, argp, _IOC_SIZE(cmd)); + if (err) + return -EFAULT; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + ubi = major_to_device(imajor(inode)); + if (IS_ERR(ubi)) + return PTR_ERR(ubi); + + switch (cmd) { + /* Create volume command */ + case UBI_IOCMKVOL: + { + struct ubi_mkvol_req req; + + dbg_msg("create volume"); + err = __copy_from_user(&req, argp, + sizeof(struct ubi_mkvol_req)); + if (err) { + err = -EFAULT; + break; + } + + err = verify_mkvol_req(ubi, &req); + if (err) + break; + + req.name[req.name_len] = '\0'; + + err = ubi_create_volume(ubi, &req); + if (err) + break; + + err = __put_user(req.vol_id, (__user int32_t *)argp); + if (err) + err = -EFAULT; + + break; + } + + /* Remove volume command */ + case UBI_IOCRMVOL: + { + int vol_id; + + dbg_msg("remove volume"); + err = __get_user(vol_id, (__user int32_t *)argp); + if (err) { + err = -EFAULT; + break; + } + + desc = ubi_open_volume(ubi->ubi_num, vol_id, UBI_EXCLUSIVE); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + break; + } + + err = ubi_remove_volume(desc); + if (err) + ubi_close_volume(desc); + + break; + } + + /* Re-size volume command */ + case UBI_IOCRSVOL: + { + int pebs; + uint64_t tmp; + struct ubi_rsvol_req req; + + dbg_msg("re-size volume"); + err = __copy_from_user(&req, argp, + sizeof(struct ubi_rsvol_req)); + if (err) { + err = -EFAULT; + break; + } + + err = verify_rsvol_req(ubi, &req); + if (err) + break; + + desc = ubi_open_volume(ubi->ubi_num, req.vol_id, UBI_EXCLUSIVE); + if (IS_ERR(desc)) { + err = PTR_ERR(desc); + break; + } + + tmp = req.bytes; + pebs = !!do_div(tmp, desc->vol->usable_leb_size); + pebs += tmp; + + err = ubi_resize_volume(desc, pebs); + ubi_close_volume(desc); + break; + } + + default: + err = -ENOTTY; + break; + } + + return err; +} + +/* UBI character device operations */ +struct file_operations ubi_cdev_operations = { + .owner = THIS_MODULE, + .ioctl = ubi_cdev_ioctl, + .llseek = no_llseek +}; + +/* UBI volume character device operations */ +struct file_operations ubi_vol_cdev_operations = { + .owner = THIS_MODULE, + .open = vol_cdev_open, + .release = vol_cdev_release, + .llseek = vol_cdev_llseek, + .read = vol_cdev_read, + .write = vol_cdev_write, + .ioctl = vol_cdev_ioctl +}; diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c new file mode 100644 index 00000000000..86364221faf --- /dev/null +++ b/drivers/mtd/ubi/debug.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * Here we keep all the UBI debugging stuff which should normally be disabled + * and compiled-out, but it is extremely helpful when hunting bugs or doing big + * changes. + */ + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG + +#include "ubi.h" + +/** + * ubi_dbg_dump_ec_hdr - dump an erase counter header. + * @ec_hdr: the erase counter header to dump + */ +void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) +{ + dbg_msg("erase counter header dump:"); + dbg_msg("magic %#08x", ubi32_to_cpu(ec_hdr->magic)); + dbg_msg("version %d", (int)ec_hdr->version); + dbg_msg("ec %llu", (long long)ubi64_to_cpu(ec_hdr->ec)); + dbg_msg("vid_hdr_offset %d", ubi32_to_cpu(ec_hdr->vid_hdr_offset)); + dbg_msg("data_offset %d", ubi32_to_cpu(ec_hdr->data_offset)); + dbg_msg("hdr_crc %#08x", ubi32_to_cpu(ec_hdr->hdr_crc)); + dbg_msg("erase counter header hexdump:"); + ubi_dbg_hexdump(ec_hdr, UBI_EC_HDR_SIZE); +} + +/** + * ubi_dbg_dump_vid_hdr - dump a volume identifier header. + * @vid_hdr: the volume identifier header to dump + */ +void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) +{ + dbg_msg("volume identifier header dump:"); + dbg_msg("magic %08x", ubi32_to_cpu(vid_hdr->magic)); + dbg_msg("version %d", (int)vid_hdr->version); + dbg_msg("vol_type %d", (int)vid_hdr->vol_type); + dbg_msg("copy_flag %d", (int)vid_hdr->copy_flag); + dbg_msg("compat %d", (int)vid_hdr->compat); + dbg_msg("vol_id %d", ubi32_to_cpu(vid_hdr->vol_id)); + dbg_msg("lnum %d", ubi32_to_cpu(vid_hdr->lnum)); + dbg_msg("leb_ver %u", ubi32_to_cpu(vid_hdr->leb_ver)); + dbg_msg("data_size %d", ubi32_to_cpu(vid_hdr->data_size)); + dbg_msg("used_ebs %d", ubi32_to_cpu(vid_hdr->used_ebs)); + dbg_msg("data_pad %d", ubi32_to_cpu(vid_hdr->data_pad)); + dbg_msg("sqnum %llu", + (unsigned long long)ubi64_to_cpu(vid_hdr->sqnum)); + dbg_msg("hdr_crc %08x", ubi32_to_cpu(vid_hdr->hdr_crc)); + dbg_msg("volume identifier header hexdump:"); +} + +/** + * ubi_dbg_dump_vol_info- dump volume information. + * @vol: UBI volume description object + */ +void ubi_dbg_dump_vol_info(const struct ubi_volume *vol) +{ + dbg_msg("volume information dump:"); + dbg_msg("vol_id %d", vol->vol_id); + dbg_msg("reserved_pebs %d", vol->reserved_pebs); + dbg_msg("alignment %d", vol->alignment); + dbg_msg("data_pad %d", vol->data_pad); + dbg_msg("vol_type %d", vol->vol_type); + dbg_msg("name_len %d", vol->name_len); + dbg_msg("usable_leb_size %d", vol->usable_leb_size); + dbg_msg("used_ebs %d", vol->used_ebs); + dbg_msg("used_bytes %lld", vol->used_bytes); + dbg_msg("last_eb_bytes %d", vol->last_eb_bytes); + dbg_msg("corrupted %d", vol->corrupted); + dbg_msg("upd_marker %d", vol->upd_marker); + + if (vol->name_len <= UBI_VOL_NAME_MAX && + strnlen(vol->name, vol->name_len + 1) == vol->name_len) { + dbg_msg("name %s", vol->name); + } else { + dbg_msg("the 1st 5 characters of the name: %c%c%c%c%c", + vol->name[0], vol->name[1], vol->name[2], + vol->name[3], vol->name[4]); + } +} + +/** + * ubi_dbg_dump_vtbl_record - dump a &struct ubi_vtbl_record object. + * @r: the object to dump + * @idx: volume table index + */ +void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) +{ + int name_len = ubi16_to_cpu(r->name_len); + + dbg_msg("volume table record %d dump:", idx); + dbg_msg("reserved_pebs %d", ubi32_to_cpu(r->reserved_pebs)); + dbg_msg("alignment %d", ubi32_to_cpu(r->alignment)); + dbg_msg("data_pad %d", ubi32_to_cpu(r->data_pad)); + dbg_msg("vol_type %d", (int)r->vol_type); + dbg_msg("upd_marker %d", (int)r->upd_marker); + dbg_msg("name_len %d", name_len); + + if (r->name[0] == '\0') { + dbg_msg("name NULL"); + return; + } + + if (name_len <= UBI_VOL_NAME_MAX && + strnlen(&r->name[0], name_len + 1) == name_len) { + dbg_msg("name %s", &r->name[0]); + } else { + dbg_msg("1st 5 characters of the name: %c%c%c%c%c", + r->name[0], r->name[1], r->name[2], r->name[3], + r->name[4]); + } + dbg_msg("crc %#08x", ubi32_to_cpu(r->crc)); +} + +/** + * ubi_dbg_dump_sv - dump a &struct ubi_scan_volume object. + * @sv: the object to dump + */ +void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) +{ + dbg_msg("volume scanning information dump:"); + dbg_msg("vol_id %d", sv->vol_id); + dbg_msg("highest_lnum %d", sv->highest_lnum); + dbg_msg("leb_count %d", sv->leb_count); + dbg_msg("compat %d", sv->compat); + dbg_msg("vol_type %d", sv->vol_type); + dbg_msg("used_ebs %d", sv->used_ebs); + dbg_msg("last_data_size %d", sv->last_data_size); + dbg_msg("data_pad %d", sv->data_pad); +} + +/** + * ubi_dbg_dump_seb - dump a &struct ubi_scan_leb object. + * @seb: the object to dump + * @type: object type: 0 - not corrupted, 1 - corrupted + */ +void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type) +{ + dbg_msg("eraseblock scanning information dump:"); + dbg_msg("ec %d", seb->ec); + dbg_msg("pnum %d", seb->pnum); + if (type == 0) { + dbg_msg("lnum %d", seb->lnum); + dbg_msg("scrub %d", seb->scrub); + dbg_msg("sqnum %llu", seb->sqnum); + dbg_msg("leb_ver %u", seb->leb_ver); + } +} + +/** + * ubi_dbg_dump_mkvol_req - dump a &struct ubi_mkvol_req object. + * @req: the object to dump + */ +void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) +{ + char nm[17]; + + dbg_msg("volume creation request dump:"); + dbg_msg("vol_id %d", req->vol_id); + dbg_msg("alignment %d", req->alignment); + dbg_msg("bytes %lld", (long long)req->bytes); + dbg_msg("vol_type %d", req->vol_type); + dbg_msg("name_len %d", req->name_len); + + memcpy(nm, req->name, 16); + nm[16] = 0; + dbg_msg("the 1st 16 characters of the name: %s", nm); +} + +#define BYTES_PER_LINE 32 + +/** + * ubi_dbg_hexdump - dump a buffer. + * @ptr: the buffer to dump + * @size: buffer size which must be multiple of 4 bytes + */ +void ubi_dbg_hexdump(const void *ptr, int size) +{ + int i, k = 0, rows, columns; + const uint8_t *p = ptr; + + size = ALIGN(size, 4); + rows = size/BYTES_PER_LINE + size % BYTES_PER_LINE; + for (i = 0; i < rows; i++) { + int j; + + cond_resched(); + columns = min(size - k, BYTES_PER_LINE) / 4; + if (columns == 0) + break; + printk(KERN_DEBUG "%5d: ", i * BYTES_PER_LINE); + for (j = 0; j < columns; j++) { + int n, N; + + N = size - k > 4 ? 4 : size - k; + for (n = 0; n < N; n++) + printk("%02x", p[k++]); + printk(" "); + } + printk("\n"); + } +} + +#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h new file mode 100644 index 00000000000..f816ad9a36c --- /dev/null +++ b/drivers/mtd/ubi/debug.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +#ifndef __UBI_DEBUG_H__ +#define __UBI_DEBUG_H__ + +#ifdef CONFIG_MTD_UBI_DEBUG +#include <linux/random.h> + +#define ubi_assert(expr) BUG_ON(!(expr)) +#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) +#else +#define ubi_assert(expr) ({}) +#define dbg_err(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT +#define DBG_DISABLE_BGT 1 +#else +#define DBG_DISABLE_BGT 0 +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG +/* Generic debugging message */ +#define dbg_msg(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG: %s: " fmt "\n", __FUNCTION__, ##__VA_ARGS__) + +#define ubi_dbg_dump_stack() dump_stack() + +struct ubi_ec_hdr; +struct ubi_vid_hdr; +struct ubi_volume; +struct ubi_vtbl_record; +struct ubi_scan_volume; +struct ubi_scan_leb; +struct ubi_mkvol_req; + +void ubi_dbg_print(int type, const char *func, const char *fmt, ...); +void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); +void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); +void ubi_dbg_dump_vol_info(const struct ubi_volume *vol); +void ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx); +void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); +void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); +void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); +void ubi_dbg_hexdump(const void *buf, int size); + +#else + +#define dbg_msg(fmt, ...) ({}) +#define ubi_dbg_dump_stack() ({}) +#define ubi_dbg_print(func, fmt, ...) ({}) +#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) +#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) +#define ubi_dbg_dump_vol_info(vol) ({}) +#define ubi_dbg_dump_vtbl_record(r, idx) ({}) +#define ubi_dbg_dump_sv(sv) ({}) +#define ubi_dbg_dump_seb(seb, type) ({}) +#define ubi_dbg_dump_mkvol_req(req) ({}) +#define ubi_dbg_hexdump(buf, size) ({}) + +#endif /* CONFIG_MTD_UBI_DEBUG_MSG */ + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA +/* Messages from the eraseblock association unit */ +#define dbg_eba(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG eba: %s: " fmt "\n", __FUNCTION__, \ + ##__VA_ARGS__) +#else +#define dbg_eba(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL +/* Messages from the wear-leveling unit */ +#define dbg_wl(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG wl: %s: " fmt "\n", __FUNCTION__, \ + ##__VA_ARGS__) +#else +#define dbg_wl(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO +/* Messages from the input/output unit */ +#define dbg_io(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG io: %s: " fmt "\n", __FUNCTION__, \ + ##__VA_ARGS__) +#else +#define dbg_io(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD +/* Initialization and build messages */ +#define dbg_bld(fmt, ...) \ + printk(KERN_DEBUG "UBI DBG bld: %s: " fmt "\n", __FUNCTION__, \ + ##__VA_ARGS__) +#else +#define dbg_bld(fmt, ...) ({}) +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS +/** + * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. + * + * Returns non-zero if a bit-flip should be emulated, otherwise returns zero. + */ +static inline int ubi_dbg_is_bitflip(void) +{ + return !(random32() % 200); +} +#else +#define ubi_dbg_is_bitflip() 0 +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES +/** + * ubi_dbg_is_write_failure - if it is time to emulate a write failure. + * + * Returns non-zero if a write failure should be emulated, otherwise returns + * zero. + */ +static inline int ubi_dbg_is_write_failure(void) +{ + return !(random32() % 500); +} +#else +#define ubi_dbg_is_write_failure() 0 +#endif + +#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES +/** + * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. + * + * Returns non-zero if an erase failure should be emulated, otherwise returns + * zero. + */ +static inline int ubi_dbg_is_erase_failure(void) +{ + return !(random32() % 400); +} +#else +#define ubi_dbg_is_erase_failure() 0 +#endif + +#endif /* !__UBI_DEBUG_H__ */ diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c new file mode 100644 index 00000000000..d847ee1da3d --- /dev/null +++ b/drivers/mtd/ubi/eba.c @@ -0,0 +1,1241 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * The UBI Eraseblock Association (EBA) unit. + * + * This unit is responsible for I/O to/from logical eraseblock. + * + * Although in this implementation the EBA table is fully kept and managed in + * RAM, which assumes poor scalability, it might be (partially) maintained on + * flash in future implementations. + * + * The EBA unit implements per-logical eraseblock locking. Before accessing a + * logical eraseblock it is locked for reading or writing. The per-logical + * eraseblock locking is implemented by means of the lock tree. The lock tree + * is an RB-tree which refers all the currently locked logical eraseblocks. The + * lock tree elements are &struct ltree_entry objects. They are indexed by + * (@vol_id, @lnum) pairs. + * + * EBA also maintains the global sequence counter which is incremented each + * time a logical eraseblock is mapped to a physical eraseblock and it is + * stored in the volume identifier header. This means that each VID header has + * a unique sequence number. The sequence number is only increased an we assume + * 64 bits is enough to never overflow. + */ + +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/err.h> +#include "ubi.h" + +/** + * struct ltree_entry - an entry in the lock tree. + * @rb: links RB-tree nodes + * @vol_id: volume ID of the locked logical eraseblock + * @lnum: locked logical eraseblock number + * @users: how many tasks are using this logical eraseblock or wait for it + * @mutex: read/write mutex to implement read/write access serialization to + * the (@vol_id, @lnum) logical eraseblock + * + * When a logical eraseblock is being locked - corresponding &struct ltree_entry + * object is inserted to the lock tree (@ubi->ltree). + */ +struct ltree_entry { + struct rb_node rb; + int vol_id; + int lnum; + int users; + struct rw_semaphore mutex; +}; + +/* Slab cache for lock-tree entries */ +static struct kmem_cache *ltree_slab; + +/** + * next_sqnum - get next sequence number. + * @ubi: UBI device description object + * + * This function returns next sequence number to use, which is just the current + * global sequence counter value. It also increases the global sequence + * counter. + */ +static unsigned long long next_sqnum(struct ubi_device *ubi) +{ + unsigned long long sqnum; + + spin_lock(&ubi->ltree_lock); + sqnum = ubi->global_sqnum++; + spin_unlock(&ubi->ltree_lock); + + return sqnum; +} + +/** + * ubi_get_compat - get compatibility flags of a volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * This function returns compatibility flags for an internal volume. User + * volumes have no compatibility flags, so %0 is returned. + */ +static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) +{ + if (vol_id == UBI_LAYOUT_VOL_ID) + return UBI_LAYOUT_VOLUME_COMPAT; + return 0; +} + +/** + * ltree_lookup - look up the lock tree. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function returns a pointer to the corresponding &struct ltree_entry + * object if the logical eraseblock is locked and %NULL if it is not. + * @ubi->ltree_lock has to be locked. + */ +static struct ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, + int lnum) +{ + struct rb_node *p; + + p = ubi->ltree.rb_node; + while (p) { + struct ltree_entry *le; + + le = rb_entry(p, struct ltree_entry, rb); + + if (vol_id < le->vol_id) + p = p->rb_left; + else if (vol_id > le->vol_id) + p = p->rb_right; + else { + if (lnum < le->lnum) + p = p->rb_left; + else if (lnum > le->lnum) + p = p->rb_right; + else + return le; + } + } + + return NULL; +} + +/** + * ltree_add_entry - add new entry to the lock tree. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the + * lock tree. If such entry is already there, its usage counter is increased. + * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation + * failed. + */ +static struct ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id, + int lnum) +{ + struct ltree_entry *le, *le1, *le_free; + + le = kmem_cache_alloc(ltree_slab, GFP_KERNEL); + if (!le) + return ERR_PTR(-ENOMEM); + + le->vol_id = vol_id; + le->lnum = lnum; + + spin_lock(&ubi->ltree_lock); + le1 = ltree_lookup(ubi, vol_id, lnum); + + if (le1) { + /* + * This logical eraseblock is already locked. The newly + * allocated lock entry is not needed. + */ + le_free = le; + le = le1; + } else { + struct rb_node **p, *parent = NULL; + + /* + * No lock entry, add the newly allocated one to the + * @ubi->ltree RB-tree. + */ + le_free = NULL; + + p = &ubi->ltree.rb_node; + while (*p) { + parent = *p; + le1 = rb_entry(parent, struct ltree_entry, rb); + + if (vol_id < le1->vol_id) + p = &(*p)->rb_left; + else if (vol_id > le1->vol_id) + p = &(*p)->rb_right; + else { + ubi_assert(lnum != le1->lnum); + if (lnum < le1->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + } + + rb_link_node(&le->rb, parent, p); + rb_insert_color(&le->rb, &ubi->ltree); + } + le->users += 1; + spin_unlock(&ubi->ltree_lock); + + if (le_free) + kmem_cache_free(ltree_slab, le_free); + + return le; +} + +/** + * leb_read_lock - lock logical eraseblock for reading. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for reading. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + down_read(&le->mutex); + return 0; +} + +/** + * leb_read_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + */ +static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) +{ + int free = 0; + struct ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + free = 1; + } + spin_unlock(&ubi->ltree_lock); + + up_read(&le->mutex); + if (free) + kmem_cache_free(ltree_slab, le); +} + +/** + * leb_write_lock - lock logical eraseblock for writing. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function locks a logical eraseblock for writing. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) +{ + struct ltree_entry *le; + + le = ltree_add_entry(ubi, vol_id, lnum); + if (IS_ERR(le)) + return PTR_ERR(le); + down_write(&le->mutex); + return 0; +} + +/** + * leb_write_unlock - unlock logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + */ +static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) +{ + int free; + struct ltree_entry *le; + + spin_lock(&ubi->ltree_lock); + le = ltree_lookup(ubi, vol_id, lnum); + le->users -= 1; + ubi_assert(le->users >= 0); + if (le->users == 0) { + rb_erase(&le->rb, &ubi->ltree); + free = 1; + } else + free = 0; + spin_unlock(&ubi->ltree_lock); + + up_write(&le->mutex); + if (free) + kmem_cache_free(ltree_slab, le); +} + +/** + * ubi_eba_unmap_leb - un-map logical eraseblock. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules corresponding + * physical eraseblock for erasure. Returns zero in case of success and a + * negative error code in case of failure. + */ +int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum) +{ + int idx = vol_id2idx(ubi, vol_id), err, pnum; + struct ubi_volume *vol = ubi->volumes[idx]; + + if (ubi->ro_mode) + return -EROFS; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum < 0) + /* This logical eraseblock is already unmapped */ + goto out_unlock; + + dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); + + vol->eba_tbl[lnum] = UBI_LEB_UNMAPPED; + err = ubi_wl_put_peb(ubi, pnum, 0); + +out_unlock: + leb_write_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * ubi_eba_read_leb - read data. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: buffer to store the read data + * @offset: offset from where to read + * @len: how many bytes to read + * @check: data CRC check flag + * + * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF + * bytes. The @check flag only makes sense for static volumes and forces + * eraseblock data CRC checking. + * + * In case of success this function returns zero. In case of a static volume, + * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be + * returned for any volume type if an ECC error was detected by the MTD device + * driver. Other negative error cored may be returned in case of other errors. + */ +int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, + int offset, int len, int check) +{ + int err, pnum, scrub = 0, idx = vol_id2idx(ubi, vol_id); + struct ubi_vid_hdr *vid_hdr; + struct ubi_volume *vol = ubi->volumes[idx]; + uint32_t crc, crc1; + + err = leb_read_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum < 0) { + /* + * The logical eraseblock is not mapped, fill the whole buffer + * with 0xFF bytes. The exception is static volumes for which + * it is an error to read unmapped logical eraseblocks. + */ + dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", + len, offset, vol_id, lnum); + leb_read_unlock(ubi, vol_id, lnum); + ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); + memset(buf, 0xFF, len); + return 0; + } + + dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + check = 0; + +retry: + if (check) { + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) { + err = -ENOMEM; + goto out_unlock; + } + + err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); + if (err && err != UBI_IO_BITFLIPS) { + if (err > 0) { + /* + * The header is either absent or corrupted. + * The former case means there is a bug - + * switch to read-only mode just in case. + * The latter case means a real corruption - we + * may try to recover data. FIXME: but this is + * not implemented. + */ + if (err == UBI_IO_BAD_VID_HDR) { + ubi_warn("bad VID header at PEB %d, LEB" + "%d:%d", pnum, vol_id, lnum); + err = -EBADMSG; + } else + ubi_ro_mode(ubi); + } + goto out_free; + } else if (err == UBI_IO_BITFLIPS) + scrub = 1; + + ubi_assert(lnum < ubi32_to_cpu(vid_hdr->used_ebs)); + ubi_assert(len == ubi32_to_cpu(vid_hdr->data_size)); + + crc = ubi32_to_cpu(vid_hdr->data_crc); + ubi_free_vid_hdr(ubi, vid_hdr); + } + + err = ubi_io_read_data(ubi, buf, pnum, offset, len); + if (err) { + if (err == UBI_IO_BITFLIPS) { + scrub = 1; + err = 0; + } else if (err == -EBADMSG) { + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + goto out_unlock; + scrub = 1; + if (!check) { + ubi_msg("force data checking"); + check = 1; + goto retry; + } + } else + goto out_unlock; + } + + if (check) { + crc1 = crc32(UBI_CRC32_INIT, buf, len); + if (crc1 != crc) { + ubi_warn("CRC error: calculated %#08x, must be %#08x", + crc1, crc); + err = -EBADMSG; + goto out_unlock; + } + } + + if (scrub) + err = ubi_wl_scrub_peb(ubi, pnum); + + leb_read_unlock(ubi, vol_id, lnum); + return err; + +out_free: + ubi_free_vid_hdr(ubi, vid_hdr); +out_unlock: + leb_read_unlock(ubi, vol_id, lnum); + return err; +} + +/** + * recover_peb - recover from write failure. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to recover + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: data which was not written because of the write failure + * @offset: offset of the failed write + * @len: how many bytes should have been written + * + * This function is called in case of a write failure and moves all good data + * from the potentially bad physical eraseblock to a good physical eraseblock. + * This function also writes the data which was not written due to the failure. + * Returns new physical eraseblock number in case of success, and a negative + * error code in case of failure. + */ +static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, + const void *buf, int offset, int len) +{ + int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0; + struct ubi_volume *vol = ubi->volumes[idx]; + struct ubi_vid_hdr *vid_hdr; + unsigned char *new_buf; + + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) { + return -ENOMEM; + } + +retry: + new_pnum = ubi_wl_get_peb(ubi, UBI_UNKNOWN); + if (new_pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + return new_pnum; + } + + ubi_msg("recover PEB %d, move data to PEB %d", pnum, new_pnum); + + err = ubi_io_read_vid_hdr(ubi, pnum, vid_hdr, 1); + if (err && err != UBI_IO_BITFLIPS) { + if (err > 0) + err = -EIO; + goto out_put; + } + + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr); + if (err) + goto write_error; + + data_size = offset + len; + new_buf = kmalloc(data_size, GFP_KERNEL); + if (!new_buf) { + err = -ENOMEM; + goto out_put; + } + memset(new_buf + offset, 0xFF, len); + + /* Read everything before the area where the write failure happened */ + if (offset > 0) { + err = ubi_io_read_data(ubi, new_buf, pnum, 0, offset); + if (err && err != UBI_IO_BITFLIPS) { + kfree(new_buf); + goto out_put; + } + } + + memcpy(new_buf + offset, buf, len); + + err = ubi_io_write_data(ubi, new_buf, new_pnum, 0, data_size); + if (err) { + kfree(new_buf); + goto write_error; + } + + kfree(new_buf); + ubi_free_vid_hdr(ubi, vid_hdr); + + vol->eba_tbl[lnum] = new_pnum; + ubi_wl_put_peb(ubi, pnum, 1); + + ubi_msg("data was successfully recovered"); + return 0; + +out_put: + ubi_wl_put_peb(ubi, new_pnum, 1); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + /* + * Bad luck? This physical eraseblock is bad too? Crud. Let's try to + * get another one. + */ + ubi_warn("failed to write to PEB %d", new_pnum); + ubi_wl_put_peb(ubi, new_pnum, 1); + if (++tries > UBI_IO_RETRIES) { + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + ubi_msg("try again"); + goto retry; +} + +/** + * ubi_eba_write_leb - write data to dynamic volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: the data to write + * @offset: offset within the logical eraseblock where to write + * @len: how many bytes to write + * @dtype: data type + * + * This function writes data to logical eraseblock @lnum of a dynamic volume + * @vol_id. Returns zero in case of success and a negative error code in case + * of failure. In case of error, it is possible that something was still + * written to the flash media, but may be some garbage. + */ +int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, + const void *buf, int offset, int len, int dtype) +{ + int idx = vol_id2idx(ubi, vol_id), err, pnum, tries = 0; + struct ubi_volume *vol = ubi->volumes[idx]; + struct ubi_vid_hdr *vid_hdr; + + if (ubi->ro_mode) + return -EROFS; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) + return err; + + pnum = vol->eba_tbl[lnum]; + if (pnum >= 0) { + dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + err = ubi_io_write_data(ubi, buf, pnum, offset, len); + if (err) { + ubi_warn("failed to write data to PEB %d", pnum); + if (err == -EIO && ubi->bad_allowed) + err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len); + if (err) + ubi_ro_mode(ubi); + } + leb_write_unlock(ubi, vol_id, lnum); + return err; + } + + /* + * The logical eraseblock is not mapped. We have to get a free physical + * eraseblock and write the volume identifier header there first. + */ + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) { + leb_write_unlock(ubi, vol_id, lnum); + return -ENOMEM; + } + + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_ubi32(vol_id); + vid_hdr->lnum = cpu_to_ubi32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad); + +retry: + pnum = ubi_wl_get_peb(ubi, dtype); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + return pnum; + } + + dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", + len, offset, vol_id, lnum, pnum); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, offset, len); + if (err) { + ubi_warn("failed to write %d bytes at offset %d of LEB %d:%d, " + "PEB %d", len, offset, vol_id, lnum, pnum); + goto write_error; + } + + vol->eba_tbl[lnum] = pnum; + + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + /* + * Fortunately, this is the first write operation to this physical + * eraseblock, so just put it and request a new one. We assume that if + * this physical eraseblock went bad, the erase code will handle that. + */ + err = ubi_wl_put_peb(ubi, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + ubi_msg("try another PEB"); + goto retry; +} + +/** + * ubi_eba_write_leb_st - write data to static volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write + * @dtype: data type + * @used_ebs: how many logical eraseblocks will this volume contain + * + * This function writes data to logical eraseblock @lnum of static volume + * @vol_id. The @used_ebs argument should contain total number of logical + * eraseblock in this static volume. + * + * When writing to the last logical eraseblock, the @len argument doesn't have + * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent + * to the real data size, although the @buf buffer has to contain the + * alignment. In all other cases, @len has to be aligned. + * + * It is prohibited to write more then once to logical eraseblocks of static + * volumes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, + const void *buf, int len, int dtype, int used_ebs) +{ + int err, pnum, tries = 0, data_size = len; + int idx = vol_id2idx(ubi, vol_id); + struct ubi_volume *vol = ubi->volumes[idx]; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + + if (lnum == used_ebs - 1) + /* If this is the last LEB @len may be unaligned */ + len = ALIGN(data_size, ubi->min_io_size); + else + ubi_assert(len % ubi->min_io_size == 0); + + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) + return -ENOMEM; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) { + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_ubi32(vol_id); + vid_hdr->lnum = cpu_to_ubi32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad); + + crc = crc32(UBI_CRC32_INIT, buf, data_size); + vid_hdr->vol_type = UBI_VID_STATIC; + vid_hdr->data_size = cpu_to_ubi32(data_size); + vid_hdr->used_ebs = cpu_to_ubi32(used_ebs); + vid_hdr->data_crc = cpu_to_ubi32(crc); + +retry: + pnum = ubi_wl_get_peb(ubi, dtype); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + return pnum; + } + + dbg_eba("write VID hdr and %d bytes at LEB %d:%d, PEB %d, used_ebs %d", + len, vol_id, lnum, pnum, used_ebs); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, 0, len); + if (err) { + ubi_warn("failed to write %d bytes of data to PEB %d", + len, pnum); + goto write_error; + } + + ubi_assert(vol->eba_tbl[lnum] < 0); + vol->eba_tbl[lnum] = pnum; + + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + /* + * This flash device does not admit of bad eraseblocks or + * something nasty and unexpected happened. Switch to read-only + * mode just in case. + */ + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + err = ubi_wl_put_peb(ubi, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + ubi_msg("try another PEB"); + goto retry; +} + +/* + * ubi_eba_atomic_leb_change - change logical eraseblock atomically. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: data to write + * @len: how many bytes to write + * @dtype: data type + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the + * data, which has to be aligned. This function guarantees that in case of an + * unclean reboot the old contents is preserved. Returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, + const void *buf, int len, int dtype) +{ + int err, pnum, tries = 0, idx = vol_id2idx(ubi, vol_id); + struct ubi_volume *vol = ubi->volumes[idx]; + struct ubi_vid_hdr *vid_hdr; + uint32_t crc; + + if (ubi->ro_mode) + return -EROFS; + + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) + return -ENOMEM; + + err = leb_write_lock(ubi, vol_id, lnum); + if (err) { + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + vid_hdr->vol_id = cpu_to_ubi32(vol_id); + vid_hdr->lnum = cpu_to_ubi32(lnum); + vid_hdr->compat = ubi_get_compat(ubi, vol_id); + vid_hdr->data_pad = cpu_to_ubi32(vol->data_pad); + + crc = crc32(UBI_CRC32_INIT, buf, len); + vid_hdr->vol_type = UBI_VID_STATIC; + vid_hdr->data_size = cpu_to_ubi32(len); + vid_hdr->copy_flag = 1; + vid_hdr->data_crc = cpu_to_ubi32(crc); + +retry: + pnum = ubi_wl_get_peb(ubi, dtype); + if (pnum < 0) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + return pnum; + } + + dbg_eba("change LEB %d:%d, PEB %d, write VID hdr to PEB %d", + vol_id, lnum, vol->eba_tbl[lnum], pnum); + + err = ubi_io_write_vid_hdr(ubi, pnum, vid_hdr); + if (err) { + ubi_warn("failed to write VID header to LEB %d:%d, PEB %d", + vol_id, lnum, pnum); + goto write_error; + } + + err = ubi_io_write_data(ubi, buf, pnum, 0, len); + if (err) { + ubi_warn("failed to write %d bytes of data to PEB %d", + len, pnum); + goto write_error; + } + + err = ubi_wl_put_peb(ubi, vol->eba_tbl[lnum], 1); + if (err) { + ubi_free_vid_hdr(ubi, vid_hdr); + leb_write_unlock(ubi, vol_id, lnum); + return err; + } + + vol->eba_tbl[lnum] = pnum; + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + +write_error: + if (err != -EIO || !ubi->bad_allowed) { + /* + * This flash device does not admit of bad eraseblocks or + * something nasty and unexpected happened. Switch to read-only + * mode just in case. + */ + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + err = ubi_wl_put_peb(ubi, pnum, 1); + if (err || ++tries > UBI_IO_RETRIES) { + ubi_ro_mode(ubi); + leb_write_unlock(ubi, vol_id, lnum); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + } + + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + ubi_msg("try another PEB"); + goto retry; +} + +/** + * ltree_entry_ctor - lock tree entries slab cache constructor. + * @obj: the lock-tree entry to construct + * @cache: the lock tree entry slab cache + * @flags: constructor flags + */ +static void ltree_entry_ctor(void *obj, struct kmem_cache *cache, + unsigned long flags) +{ + struct ltree_entry *le = obj; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) != + SLAB_CTOR_CONSTRUCTOR) + return; + + le->users = 0; + init_rwsem(&le->mutex); +} + +/** + * ubi_eba_copy_leb - copy logical eraseblock. + * @ubi: UBI device description object + * @from: physical eraseblock number from where to copy + * @to: physical eraseblock number where to copy + * @vid_hdr: VID header of the @from physical eraseblock + * + * This function copies logical eraseblock from physical eraseblock @from to + * physical eraseblock @to. The @vid_hdr buffer may be changed by this + * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation + * was canceled because bit-flips were detected at the target PEB, and a + * negative error code in case of failure. + */ +int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr) +{ + int err, vol_id, lnum, data_size, aldata_size, pnum, idx; + struct ubi_volume *vol; + uint32_t crc; + void *buf, *buf1 = NULL; + + vol_id = ubi32_to_cpu(vid_hdr->vol_id); + lnum = ubi32_to_cpu(vid_hdr->lnum); + + dbg_eba("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); + + if (vid_hdr->vol_type == UBI_VID_STATIC) { + data_size = ubi32_to_cpu(vid_hdr->data_size); + aldata_size = ALIGN(data_size, ubi->min_io_size); + } else + data_size = aldata_size = + ubi->leb_size - ubi32_to_cpu(vid_hdr->data_pad); + + buf = kmalloc(aldata_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * We do not want anybody to write to this logical eraseblock while we + * are moving it, so we lock it. + */ + err = leb_write_lock(ubi, vol_id, lnum); + if (err) { + kfree(buf); + return err; + } + + /* + * But the logical eraseblock might have been put by this time. + * Cancel if it is true. + */ + idx = vol_id2idx(ubi, vol_id); + + /* + * We may race with volume deletion/re-size, so we have to hold + * @ubi->volumes_lock. + */ + spin_lock(&ubi->volumes_lock); + vol = ubi->volumes[idx]; + if (!vol) { + dbg_eba("volume %d was removed meanwhile", vol_id); + spin_unlock(&ubi->volumes_lock); + goto out_unlock; + } + + pnum = vol->eba_tbl[lnum]; + if (pnum != from) { + dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " + "PEB %d, cancel", vol_id, lnum, from, pnum); + spin_unlock(&ubi->volumes_lock); + goto out_unlock; + } + spin_unlock(&ubi->volumes_lock); + + /* OK, now the LEB is locked and we can safely start moving it */ + + dbg_eba("read %d bytes of data", aldata_size); + err = ubi_io_read_data(ubi, buf, from, 0, aldata_size); + if (err && err != UBI_IO_BITFLIPS) { + ubi_warn("error %d while reading data from PEB %d", + err, from); + goto out_unlock; + } + + /* + * Now we have got to calculate how much data we have to to copy. In + * case of a static volume it is fairly easy - the VID header contains + * the data size. In case of a dynamic volume it is more difficult - we + * have to read the contents, cut 0xFF bytes from the end and copy only + * the first part. We must do this to avoid writing 0xFF bytes as it + * may have some side-effects. And not only this. It is important not + * to include those 0xFFs to CRC because later the they may be filled + * by data. + */ + if (vid_hdr->vol_type == UBI_VID_DYNAMIC) + aldata_size = data_size = + ubi_calc_data_len(ubi, buf, data_size); + + cond_resched(); + crc = crc32(UBI_CRC32_INIT, buf, data_size); + cond_resched(); + + /* + * It may turn out to me that the whole @from physical eraseblock + * contains only 0xFF bytes. Then we have to only write the VID header + * and do not write any data. This also means we should not set + * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. + */ + if (data_size > 0) { + vid_hdr->copy_flag = 1; + vid_hdr->data_size = cpu_to_ubi32(data_size); + vid_hdr->data_crc = cpu_to_ubi32(crc); + } + vid_hdr->sqnum = cpu_to_ubi64(next_sqnum(ubi)); + + err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); + if (err) + goto out_unlock; + + cond_resched(); + + /* Read the VID header back and check if it was written correctly */ + err = ubi_io_read_vid_hdr(ubi, to, vid_hdr, 1); + if (err) { + if (err != UBI_IO_BITFLIPS) + ubi_warn("cannot read VID header back from PEB %d", to); + goto out_unlock; + } + + if (data_size > 0) { + err = ubi_io_write_data(ubi, buf, to, 0, aldata_size); + if (err) + goto out_unlock; + + /* + * We've written the data and are going to read it back to make + * sure it was written correctly. + */ + buf1 = kmalloc(aldata_size, GFP_KERNEL); + if (!buf1) { + err = -ENOMEM; + goto out_unlock; + } + + cond_resched(); + + err = ubi_io_read_data(ubi, buf1, to, 0, aldata_size); + if (err) { + if (err != UBI_IO_BITFLIPS) + ubi_warn("cannot read data back from PEB %d", + to); + goto out_unlock; + } + + cond_resched(); + + if (memcmp(buf, buf1, aldata_size)) { + ubi_warn("read data back from PEB %d - it is different", + to); + goto out_unlock; + } + } + + ubi_assert(vol->eba_tbl[lnum] == from); + vol->eba_tbl[lnum] = to; + + leb_write_unlock(ubi, vol_id, lnum); + kfree(buf); + kfree(buf1); + + return 0; + +out_unlock: + leb_write_unlock(ubi, vol_id, lnum); + kfree(buf); + kfree(buf1); + return err; +} + +/** + * ubi_eba_init_scan - initialize the EBA unit using scanning information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int i, j, err, num_volumes; + struct ubi_scan_volume *sv; + struct ubi_volume *vol; + struct ubi_scan_leb *seb; + struct rb_node *rb; + + dbg_eba("initialize EBA unit"); + + spin_lock_init(&ubi->ltree_lock); + ubi->ltree = RB_ROOT; + + if (ubi_devices_cnt == 0) { + ltree_slab = kmem_cache_create("ubi_ltree_slab", + sizeof(struct ltree_entry), 0, + 0, <ree_entry_ctor, NULL); + if (!ltree_slab) + return -ENOMEM; + } + + ubi->global_sqnum = si->max_sqnum + 1; + num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + for (i = 0; i < num_volumes; i++) { + vol = ubi->volumes[i]; + if (!vol) + continue; + + cond_resched(); + + vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), + GFP_KERNEL); + if (!vol->eba_tbl) { + err = -ENOMEM; + goto out_free; + } + + for (j = 0; j < vol->reserved_pebs; j++) + vol->eba_tbl[j] = UBI_LEB_UNMAPPED; + + sv = ubi_scan_find_sv(si, idx2vol_id(ubi, i)); + if (!sv) + continue; + + ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { + if (seb->lnum >= vol->reserved_pebs) + /* + * This may happen in case of an unclean reboot + * during re-size. + */ + ubi_scan_move_to_list(sv, seb, &si->erase); + vol->eba_tbl[seb->lnum] = seb->pnum; + } + } + + if (ubi->bad_allowed) { + ubi_calculate_reserved(ubi); + + if (ubi->avail_pebs < ubi->beb_rsvd_level) { + /* No enough free physical eraseblocks */ + ubi->beb_rsvd_pebs = ubi->avail_pebs; + ubi_warn("cannot reserve enough PEBs for bad PEB " + "handling, reserved %d, need %d", + ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); + } else + ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; + + ubi->avail_pebs -= ubi->beb_rsvd_pebs; + ubi->rsvd_pebs += ubi->beb_rsvd_pebs; + } + + dbg_eba("EBA unit is initialized"); + return 0; + +out_free: + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + kfree(ubi->volumes[i]->eba_tbl); + } + if (ubi_devices_cnt == 0) + kmem_cache_destroy(ltree_slab); + return err; +} + +/** + * ubi_eba_close - close EBA unit. + * @ubi: UBI device description object + */ +void ubi_eba_close(const struct ubi_device *ubi) +{ + int i, num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; + + dbg_eba("close EBA unit"); + + for (i = 0; i < num_volumes; i++) { + if (!ubi->volumes[i]) + continue; + kfree(ubi->volumes[i]->eba_tbl); + } + if (ubi_devices_cnt == 1) + kmem_cache_destroy(ltree_slab); +} diff --git a/drivers/mtd/ubi/gluebi.c b/drivers/mtd/ubi/gluebi.c new file mode 100644 index 00000000000..fc9478d605f --- /dev/null +++ b/drivers/mtd/ubi/gluebi.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём), Joern Engel + */ + +/* + * This file includes implementation of fake MTD devices for each UBI volume. + * This sounds strange, but it is in fact quite useful to make MTD-oriented + * software (including all the legacy software) to work on top of UBI. + * + * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit + * size (mtd->writesize) is equivalent to the UBI minimal I/O unit. The + * eraseblock size is equivalent to the logical eraseblock size of the volume. + */ + +#include <asm/div64.h> +#include "ubi.h" + +/** + * gluebi_get_device - get MTD device reference. + * @mtd: the MTD device description object + * + * This function is called every time the MTD device is being opened and + * implements the MTD get_device() operation. Returns zero in case of success + * and a negative error code in case of failure. + */ +static int gluebi_get_device(struct mtd_info *mtd) +{ + struct ubi_volume *vol; + + vol = container_of(mtd, struct ubi_volume, gluebi_mtd); + + /* + * We do not introduce locks for gluebi reference count because the + * get_device()/put_device() calls are already serialized at MTD. + */ + if (vol->gluebi_refcount > 0) { + /* + * The MTD device is already referenced and this is just one + * more reference. MTD allows many users to open the same + * volume simultaneously and do not distinguish between + * readers/writers/exclusive openers as UBI does. So we do not + * open the UBI volume again - just increase the reference + * counter and return. + */ + vol->gluebi_refcount += 1; + return 0; + } + + /* + * This is the first reference to this UBI volume via the MTD device + * interface. Open the corresponding volume in read-write mode. + */ + vol->gluebi_desc = ubi_open_volume(vol->ubi->ubi_num, vol->vol_id, + UBI_READWRITE); + if (IS_ERR(vol->gluebi_desc)) + return PTR_ERR(vol->gluebi_desc); + vol->gluebi_refcount += 1; + return 0; +} + +/** + * gluebi_put_device - put MTD device reference. + * @mtd: the MTD device description object + * + * This function is called every time the MTD device is being put. Returns + * zero in case of success and a negative error code in case of failure. + */ +static void gluebi_put_device(struct mtd_info *mtd) +{ + struct ubi_volume *vol; + + vol = container_of(mtd, struct ubi_volume, gluebi_mtd); + vol->gluebi_refcount -= 1; + ubi_assert(vol->gluebi_refcount >= 0); + if (vol->gluebi_refcount == 0) + ubi_close_volume(vol->gluebi_desc); +} + +/** + * gluebi_read - read operation of emulated MTD devices. + * @mtd: MTD device description object + * @from: absolute offset from where to read + * @len: how many bytes to read + * @retlen: count of read bytes is returned here + * @buf: buffer to store the read data + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int gluebi_read(struct mtd_info *mtd, loff_t from, size_t len, + size_t *retlen, unsigned char *buf) +{ + int err = 0, lnum, offs, total_read; + struct ubi_volume *vol; + struct ubi_device *ubi; + uint64_t tmp = from; + + dbg_msg("read %zd bytes from offset %lld", len, from); + + if (len < 0 || from < 0 || from + len > mtd->size) + return -EINVAL; + + vol = container_of(mtd, struct ubi_volume, gluebi_mtd); + ubi = vol->ubi; + + offs = do_div(tmp, mtd->erasesize); + lnum = tmp; + + total_read = len; + while (total_read) { + size_t to_read = mtd->erasesize - offs; + + if (to_read > total_read) + to_read = total_read; + + err = ubi_eba_read_leb(ubi, vol->vol_id, lnum, buf, offs, + to_read, 0); + if (err) + break; + + lnum += 1; + offs = 0; + total_read -= to_read; + buf += to_read; + } + + *retlen = len - total_read; + return err; +} + +/** + * gluebi_write - write operation of emulated MTD devices. + * @mtd: MTD device description object + * @to: absolute offset where to write + * @len: how many bytes to write + * @retlen: count of written bytes is returned here + * @buf: buffer with data to write + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, + size_t *retlen, const u_char *buf) +{ + int err = 0, lnum, offs, total_written; + struct ubi_volume *vol; + struct ubi_device *ubi; + uint64_t tmp = to; + + dbg_msg("write %zd bytes to offset %lld", len, to); + + if (len < 0 || to < 0 || len + to > mtd->size) + return -EINVAL; + + vol = container_of(mtd, struct ubi_volume, gluebi_mtd); + ubi = vol->ubi; + + if (ubi->ro_mode) + return -EROFS; + + offs = do_div(tmp, mtd->erasesize); + lnum = tmp; + + if (len % mtd->writesize || offs % mtd->writesize) + return -EINVAL; + + total_written = len; + while (total_written) { + size_t to_write = mtd->erasesize - offs; + + if (to_write > total_written) + to_write = total_written; + + err = ubi_eba_write_leb(ubi, vol->vol_id, lnum, buf, offs, + to_write, UBI_UNKNOWN); + if (err) + break; + + lnum += 1; + offs = 0; + total_written -= to_write; + buf += to_write; + } + + *retlen = len - total_written; + return err; +} + +/** + * gluebi_erase - erase operation of emulated MTD devices. + * @mtd: the MTD device description object + * @instr: the erase operation description + * + * This function calls the erase callback when finishes. Returns zero in case + * of success and a negative error code in case of failure. + */ +static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr) +{ + int err, i, lnum, count; + struct ubi_volume *vol; + struct ubi_device *ubi; + + dbg_msg("erase %u bytes at offset %u", instr->len, instr->addr); + + if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) + return -EINVAL; + + if (instr->len < 0 || instr->addr + instr->len > mtd->size) + return -EINVAL; + + if (instr->addr % mtd->writesize || instr->len % mtd->writesize) + return -EINVAL; + + lnum = instr->addr / mtd->erasesize; + count = instr->len / mtd->erasesize; + + vol = container_of(mtd, struct ubi_volume, gluebi_mtd); + ubi = vol->ubi; + + if (ubi->ro_mode) + return -EROFS; + + for (i = 0; i < count; i++) { + err = ubi_eba_unmap_leb(ubi, vol->vol_id, lnum + i); + if (err) + goto out_err; + } + + /* + * MTD erase operations are synchronous, so we have to make sure the + * physical eraseblock is wiped out. + */ + err = ubi_wl_flush(ubi); + if (err) + goto out_err; + + instr->state = MTD_ERASE_DONE; + mtd_erase_callback(instr); + return 0; + +out_err: + instr->state = MTD_ERASE_FAILED; + instr->fail_addr = lnum * mtd->erasesize; + return err; +} + +/** + * ubi_create_gluebi - initialize gluebi for an UBI volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function is called when an UBI volume is created in order to create + * corresponding fake MTD device. Returns zero in case of success and a + * negative error code in case of failure. + */ +int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol) +{ + struct mtd_info *mtd = &vol->gluebi_mtd; + + mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL); + if (!mtd->name) + return -ENOMEM; + + mtd->type = MTD_UBIVOLUME; + if (!ubi->ro_mode) + mtd->flags = MTD_WRITEABLE; + mtd->writesize = ubi->min_io_size; + mtd->owner = THIS_MODULE; + mtd->size = vol->usable_leb_size * vol->reserved_pebs; + mtd->erasesize = vol->usable_leb_size; + mtd->read = gluebi_read; + mtd->write = gluebi_write; + mtd->erase = gluebi_erase; + mtd->get_device = gluebi_get_device; + mtd->put_device = gluebi_put_device; + + if (add_mtd_device(mtd)) { + ubi_err("cannot not add MTD device\n"); + kfree(mtd->name); + return -ENFILE; + } + + dbg_msg("added mtd%d (\"%s\"), size %u, EB size %u", + mtd->index, mtd->name, mtd->size, mtd->erasesize); + return 0; +} + +/** + * ubi_destroy_gluebi - close gluebi for an UBI volume. + * @vol: volume description object + * + * This function is called when an UBI volume is removed in order to remove + * corresponding fake MTD device. Returns zero in case of success and a + * negative error code in case of failure. + */ +int ubi_destroy_gluebi(struct ubi_volume *vol) +{ + int err; + struct mtd_info *mtd = &vol->gluebi_mtd; + + dbg_msg("remove mtd%d", mtd->index); + err = del_mtd_device(mtd); + if (err) + return err; + kfree(mtd->name); + return 0; +} diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c new file mode 100644 index 00000000000..438914d0515 --- /dev/null +++ b/drivers/mtd/ubi/io.c @@ -0,0 +1,1259 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * UBI input/output unit. + * + * This unit provides a uniform way to work with all kinds of the underlying + * MTD devices. It also implements handy functions for reading and writing UBI + * headers. + * + * We are trying to have a paranoid mindset and not to trust to what we read + * from the flash media in order to be more secure and robust. So this unit + * validates every single header it reads from the flash media. + * + * Some words about how the eraseblock headers are stored. + * + * The erase counter header is always stored at offset zero. By default, the + * VID header is stored after the EC header at the closest aligned offset + * (i.e. aligned to the minimum I/O unit size). Data starts next to the VID + * header at the closest aligned offset. But this default layout may be + * changed. For example, for different reasons (e.g., optimization) UBI may be + * asked to put the VID header at further offset, and even at an unaligned + * offset. Of course, if the offset of the VID header is unaligned, UBI adds + * proper padding in front of it. Data offset may also be changed but it has to + * be aligned. + * + * About minimal I/O units. In general, UBI assumes flash device model where + * there is only one minimal I/O unit size. E.g., in case of NOR flash it is 1, + * in case of NAND flash it is a NAND page, etc. This is reported by MTD in the + * @ubi->mtd->writesize field. But as an exception, UBI admits of using another + * (smaller) minimal I/O unit size for EC and VID headers to make it possible + * to do different optimizations. + * + * This is extremely useful in case of NAND flashes which admit of several + * write operations to one NAND page. In this case UBI can fit EC and VID + * headers at one NAND page. Thus, UBI may use "sub-page" size as the minimal + * I/O unit for the headers (the @ubi->hdrs_min_io_size field). But it still + * reports NAND page size (@ubi->min_io_size) as a minimal I/O unit for the UBI + * users. + * + * Example: some Samsung NANDs with 2KiB pages allow 4x 512-byte writes, so + * although the minimal I/O unit is 2K, UBI uses 512 bytes for EC and VID + * headers. + * + * Q: why not just to treat sub-page as a minimal I/O unit of this flash + * device, e.g., make @ubi->min_io_size = 512 in the example above? + * + * A: because when writing a sub-page, MTD still writes a full 2K page but the + * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing + * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we + * prefer to use sub-pages only for EV and VID headers. + * + * As it was noted above, the VID header may start at a non-aligned offset. + * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, + * the VID header may reside at offset 1984 which is the last 64 bytes of the + * last sub-page (EC header is always at offset zero). This causes some + * difficulties when reading and writing VID headers. + * + * Suppose we have a 64-byte buffer and we read a VID header at it. We change + * the data and want to write this VID header out. As we can only write in + * 512-byte chunks, we have to allocate one more buffer and copy our VID header + * to offset 448 of this buffer. + * + * The I/O unit does the following trick in order to avoid this extra copy. + * It always allocates a @ubi->vid_hdr_alsize bytes buffer for the VID header + * and returns a pointer to offset @ubi->vid_hdr_shift of this buffer. When the + * VID header is being written out, it shifts the VID header pointer back and + * writes the whole sub-page. + */ + +#include <linux/crc32.h> +#include <linux/err.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum); +static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); +static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr); +static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); +static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr); +static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum, + int offset, int len); +#else +#define paranoid_check_not_bad(ubi, pnum) 0 +#define paranoid_check_peb_ec_hdr(ubi, pnum) 0 +#define paranoid_check_ec_hdr(ubi, pnum, ec_hdr) 0 +#define paranoid_check_peb_vid_hdr(ubi, pnum) 0 +#define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0 +#define paranoid_check_all_ff(ubi, pnum, offset, len) 0 +#endif + +/** + * ubi_io_read - read data from a physical eraseblock. + * @ubi: UBI device description object + * @buf: buffer where to store the read data + * @pnum: physical eraseblock number to read from + * @offset: offset within the physical eraseblock from where to read + * @len: how many bytes to read + * + * This function reads data from offset @offset of physical eraseblock @pnum + * and stores the read data in the @buf buffer. The following return codes are + * possible: + * + * o %0 if all the requested data were successfully read; + * o %UBI_IO_BITFLIPS if all the requested data were successfully read, but + * correctable bit-flips were detected; this is harmless but may indicate + * that this eraseblock may become bad soon (but do not have to); + * o %-EBADMSG if the MTD subsystem reported about data data integrity + * problems, for example it can me an ECC error in case of NAND; this most + * probably means that the data is corrupted; + * o %-EIO if some I/O error occurred; + * o other negative error codes in case of other errors. + */ +int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + int len) +{ + int err, retries = 0; + size_t read; + loff_t addr; + + dbg_io("read %d bytes from PEB %d:%d", len, pnum, offset); + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); + ubi_assert(len > 0); + + err = paranoid_check_not_bad(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + + addr = (loff_t)pnum * ubi->peb_size + offset; +retry: + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + if (err) { + if (err == -EUCLEAN) { + /* + * -EUCLEAN is reported if there was a bit-flip which + * was corrected, so this is harmless. + */ + ubi_msg("fixable bit-flip detected at PEB %d", pnum); + ubi_assert(len == read); + return UBI_IO_BITFLIPS; + } + + if (read != len && retries++ < UBI_IO_RETRIES) { + dbg_io("error %d while reading %d bytes from PEB %d:%d, " + "read only %zd bytes, retry", + err, len, pnum, offset, read); + yield(); + goto retry; + } + + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + ubi_dbg_dump_stack(); + } else { + ubi_assert(len == read); + + if (ubi_dbg_is_bitflip()) { + dbg_msg("bit-flip (emulated)"); + err = UBI_IO_BITFLIPS; + } + } + + return err; +} + +/** + * ubi_io_write - write data to a physical eraseblock. + * @ubi: UBI device description object + * @buf: buffer with the data to write + * @pnum: physical eraseblock number to write to + * @offset: offset within the physical eraseblock where to write + * @len: how many bytes to write + * + * This function writes @len bytes of data from buffer @buf to offset @offset + * of physical eraseblock @pnum. If all the data were successfully written, + * zero is returned. If an error occurred, this function returns a negative + * error code. If %-EIO is returned, the physical eraseblock most probably went + * bad. + * + * Note, in case of an error, it is possible that something was still written + * to the flash media, but may be some garbage. + */ +int ubi_io_write(const struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len) +{ + int err; + size_t written; + loff_t addr; + + dbg_io("write %d bytes to PEB %d:%d", len, pnum, offset); + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + ubi_assert(offset >= 0 && offset + len <= ubi->peb_size); + ubi_assert(offset % ubi->hdrs_min_io_size == 0); + ubi_assert(len > 0 && len % ubi->hdrs_min_io_size == 0); + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + + /* The below has to be compiled out if paranoid checks are disabled */ + + err = paranoid_check_not_bad(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + + /* The area we are writing to has to contain all 0xFF bytes */ + err = paranoid_check_all_ff(ubi, pnum, offset, len); + if (err) + return err > 0 ? -EINVAL : err; + + if (offset >= ubi->leb_start) { + /* + * We write to the data area of the physical eraseblock. Make + * sure it has valid EC and VID headers. + */ + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + err = paranoid_check_peb_vid_hdr(ubi, pnum); + if (err) + return err > 0 ? -EINVAL : err; + } + + if (ubi_dbg_is_write_failure()) { + dbg_err("cannot write %d bytes to PEB %d:%d " + "(emulated)", len, pnum, offset); + ubi_dbg_dump_stack(); + return -EIO; + } + + addr = (loff_t)pnum * ubi->peb_size + offset; + err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf); + if (err) { + ubi_err("error %d while writing %d bytes to PEB %d:%d, written" + " %zd bytes", err, len, pnum, offset, written); + ubi_dbg_dump_stack(); + } else + ubi_assert(written == len); + + return err; +} + +/** + * erase_callback - MTD erasure call-back. + * @ei: MTD erase information object. + * + * Note, even though MTD erase interface is asynchronous, all the current + * implementations are synchronous anyway. + */ +static void erase_callback(struct erase_info *ei) +{ + wake_up_interruptible((wait_queue_head_t *)ei->priv); +} + +/** + * do_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to erase + * + * This function synchronously erases physical eraseblock @pnum and returns + * zero in case of success and a negative error code in case of failure. If + * %-EIO is returned, the physical eraseblock most probably went bad. + */ +static int do_sync_erase(const struct ubi_device *ubi, int pnum) +{ + int err, retries = 0; + struct erase_info ei; + wait_queue_head_t wq; + + dbg_io("erase PEB %d", pnum); + +retry: + init_waitqueue_head(&wq); + memset(&ei, 0, sizeof(struct erase_info)); + + ei.mtd = ubi->mtd; + ei.addr = pnum * ubi->peb_size; + ei.len = ubi->peb_size; + ei.callback = erase_callback; + ei.priv = (unsigned long)&wq; + + err = ubi->mtd->erase(ubi->mtd, &ei); + if (err) { + if (retries++ < UBI_IO_RETRIES) { + dbg_io("error %d while erasing PEB %d, retry", + err, pnum); + yield(); + goto retry; + } + ubi_err("cannot erase PEB %d, error %d", pnum, err); + ubi_dbg_dump_stack(); + return err; + } + + err = wait_event_interruptible(wq, ei.state == MTD_ERASE_DONE || + ei.state == MTD_ERASE_FAILED); + if (err) { + ubi_err("interrupted PEB %d erasure", pnum); + return -EINTR; + } + + if (ei.state == MTD_ERASE_FAILED) { + if (retries++ < UBI_IO_RETRIES) { + dbg_io("error while erasing PEB %d, retry", pnum); + yield(); + goto retry; + } + ubi_err("cannot erase PEB %d", pnum); + ubi_dbg_dump_stack(); + return -EIO; + } + + err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size); + if (err) + return err > 0 ? -EINVAL : err; + + if (ubi_dbg_is_erase_failure() && !err) { + dbg_err("cannot erase PEB %d (emulated)", pnum); + return -EIO; + } + + return 0; +} + +/** + * check_pattern - check if buffer contains only a certain byte pattern. + * @buf: buffer to check + * @patt: the pattern to check + * @size: buffer size in bytes + * + * This function returns %1 in there are only @patt bytes in @buf, and %0 if + * something else was also found. + */ +static int check_pattern(const void *buf, uint8_t patt, int size) +{ + int i; + + for (i = 0; i < size; i++) + if (((const uint8_t *)buf)[i] != patt) + return 0; + return 1; +} + +/* Patterns to write to a physical eraseblock when torturing it */ +static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; + +/** + * torture_peb - test a supposedly bad physical eraseblock. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to test + * + * This function returns %-EIO if the physical eraseblock did not pass the + * test, a positive number of erase operations done if the test was + * successfully passed, and other negative error codes in case of other errors. + */ +static int torture_peb(const struct ubi_device *ubi, int pnum) +{ + void *buf; + int err, i, patt_count; + + buf = kmalloc(ubi->peb_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + patt_count = ARRAY_SIZE(patterns); + ubi_assert(patt_count > 0); + + for (i = 0; i < patt_count; i++) { + err = do_sync_erase(ubi, pnum); + if (err) + goto out; + + /* Make sure the PEB contains only 0xFF bytes */ + err = ubi_io_read(ubi, buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + + err = check_pattern(buf, 0xFF, ubi->peb_size); + if (err == 0) { + ubi_err("erased PEB %d, but a non-0xFF byte found", + pnum); + err = -EIO; + goto out; + } + + /* Write a pattern and check it */ + memset(buf, patterns[i], ubi->peb_size); + err = ubi_io_write(ubi, buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + + memset(buf, ~patterns[i], ubi->peb_size); + err = ubi_io_read(ubi, buf, pnum, 0, ubi->peb_size); + if (err) + goto out; + + err = check_pattern(buf, patterns[i], ubi->peb_size); + if (err == 0) { + ubi_err("pattern %x checking failed for PEB %d", + patterns[i], pnum); + err = -EIO; + goto out; + } + } + + err = patt_count; + +out: + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) + /* + * If a bit-flip or data integrity error was detected, the test + * has not passed because it happened on a freshly erased + * physical eraseblock which means something is wrong with it. + */ + err = -EIO; + kfree(buf); + return err; +} + +/** + * ubi_io_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to erase + * @torture: if this physical eraseblock has to be tortured + * + * This function synchronously erases physical eraseblock @pnum. If @torture + * flag is not zero, the physical eraseblock is checked by means of writing + * different patterns to it and reading them back. If the torturing is enabled, + * the physical eraseblock is erased more then once. + * + * This function returns the number of erasures made in case of success, %-EIO + * if the erasure failed or the torturing test failed, and other negative error + * codes in case of other errors. Note, %-EIO means that the physical + * eraseblock is bad. + */ +int ubi_io_sync_erase(const struct ubi_device *ubi, int pnum, int torture) +{ + int err, ret = 0; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = paranoid_check_not_bad(ubi, pnum); + if (err != 0) + return err > 0 ? -EINVAL : err; + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + + if (torture) { + ret = torture_peb(ubi, pnum); + if (ret < 0) + return ret; + } + + err = do_sync_erase(ubi, pnum); + if (err) + return err; + + return ret + 1; +} + +/** + * ubi_io_is_bad - check if a physical eraseblock is bad. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns a positive number if the physical eraseblock is bad, + * zero if not, and a negative error code if an error occurred. + */ +int ubi_io_is_bad(const struct ubi_device *ubi, int pnum) +{ + struct mtd_info *mtd = ubi->mtd; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->bad_allowed) { + int ret; + + ret = mtd->block_isbad(mtd, (loff_t)pnum * ubi->peb_size); + if (ret < 0) + ubi_err("error %d while checking if PEB %d is bad", + ret, pnum); + else if (ret) + dbg_io("PEB %d is bad", pnum); + return ret; + } + + return 0; +} + +/** + * ubi_io_mark_bad - mark a physical eraseblock as bad. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to mark + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum) +{ + int err; + struct mtd_info *mtd = ubi->mtd; + + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + + if (!ubi->bad_allowed) + return 0; + + err = mtd->block_markbad(mtd, (loff_t)pnum * ubi->peb_size); + if (err) + ubi_err("cannot mark PEB %d bad, error %d", pnum, err); + return err; +} + +/** + * validate_ec_hdr - validate an erase counter header. + * @ubi: UBI device description object + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header is OK, and %1 if + * not. + */ +static int validate_ec_hdr(const struct ubi_device *ubi, + const struct ubi_ec_hdr *ec_hdr) +{ + long long ec; + int vid_hdr_offset, leb_start; + + ec = ubi64_to_cpu(ec_hdr->ec); + vid_hdr_offset = ubi32_to_cpu(ec_hdr->vid_hdr_offset); + leb_start = ubi32_to_cpu(ec_hdr->data_offset); + + if (ec_hdr->version != UBI_VERSION) { + ubi_err("node with incompatible UBI version found: " + "this UBI version is %d, image version is %d", + UBI_VERSION, (int)ec_hdr->version); + goto bad; + } + + if (vid_hdr_offset != ubi->vid_hdr_offset) { + ubi_err("bad VID header offset %d, expected %d", + vid_hdr_offset, ubi->vid_hdr_offset); + goto bad; + } + + if (leb_start != ubi->leb_start) { + ubi_err("bad data offset %d, expected %d", + leb_start, ubi->leb_start); + goto bad; + } + + if (ec < 0 || ec > UBI_MAX_ERASECOUNTER) { + ubi_err("bad erase counter %lld", ec); + goto bad; + } + + return 0; + +bad: + ubi_err("bad EC header"); + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); + return 1; +} + +/** + * ubi_io_read_ec_hdr - read and check an erase counter header. + * @ubi: UBI device description object + * @pnum: physical eraseblock to read from + * @ec_hdr: a &struct ubi_ec_hdr object where to store the read erase counter + * header + * @verbose: be verbose if the header is corrupted or was not found + * + * This function reads erase counter header from physical eraseblock @pnum and + * stores it in @ec_hdr. This function also checks CRC checksum of the read + * erase counter header. The following codes may be returned: + * + * o %0 if the CRC checksum is correct and the header was successfully read; + * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected + * and corrected by the flash driver; this is harmless but may indicate that + * this eraseblock may become bad soon (but may be not); + * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error); + * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty; + * o a negative error code in case of failure. + */ +int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose) +{ + int err, read_err = 0; + uint32_t crc, magic, hdr_crc; + + dbg_io("read EC header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (err) { + if (err != UBI_IO_BITFLIPS && err != -EBADMSG) + return err; + + /* + * We read all the data, but either a correctable bit-flip + * occurred, or MTD reported about some data integrity error, + * like an ECC error in case of NAND. The former is harmless, + * the later may mean that the read data is corrupted. But we + * have a CRC check-sum and we will detect this. If the EC + * header is still OK, we just report this as there was a + * bit-flip. + */ + read_err = err; + } + + magic = ubi32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + /* + * The magic field is wrong. Let's check if we have read all + * 0xFF. If yes, this physical eraseblock is assumed to be + * empty. + * + * But if there was a read error, we do not test it for all + * 0xFFs. Even if it does contain all 0xFFs, this error + * indicates that something is still wrong with this physical + * eraseblock and we anyway cannot treat it as empty. + */ + if (read_err != -EBADMSG && + check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { + /* The physical eraseblock is supposedly empty */ + + /* + * The below is just a paranoid check, it has to be + * compiled out if paranoid checks are disabled. + */ + err = paranoid_check_all_ff(ubi, pnum, 0, + ubi->peb_size); + if (err) + return err > 0 ? UBI_IO_BAD_EC_HDR : err; + + if (verbose) + ubi_warn("no EC header found at PEB %d, " + "only 0xFF bytes", pnum); + return UBI_IO_PEB_EMPTY; + } + + /* + * This is not a valid erase counter header, and these are not + * 0xFF bytes. Report that the header is corrupted. + */ + if (verbose) { + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dbg_dump_ec_hdr(ec_hdr); + } + return UBI_IO_BAD_EC_HDR; + } + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc); + + if (hdr_crc != crc) { + if (verbose) { + ubi_warn("bad EC header CRC at PEB %d, calculated %#08x," + " read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_ec_hdr(ec_hdr); + } + return UBI_IO_BAD_EC_HDR; + } + + /* And of course validate what has just been read from the media */ + err = validate_ec_hdr(ubi, ec_hdr); + if (err) { + ubi_err("validation failed for PEB %d", pnum); + return -EINVAL; + } + + return read_err ? UBI_IO_BITFLIPS : 0; +} + +/** + * ubi_io_write_ec_hdr - write an erase counter header. + * @ubi: UBI device description object + * @pnum: physical eraseblock to write to + * @ec_hdr: the erase counter header to write + * + * This function writes erase counter header described by @ec_hdr to physical + * eraseblock @pnum. It also fills most fields of @ec_hdr before writing, so + * the caller do not have to fill them. Callers must only fill the @ec_hdr->ec + * field. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If %-EIO is returned, the physical eraseblock most probably + * went bad. + */ +int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr) +{ + int err; + uint32_t crc; + + dbg_io("write EC header to PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + ec_hdr->magic = cpu_to_ubi32(UBI_EC_HDR_MAGIC); + ec_hdr->version = UBI_VERSION; + ec_hdr->vid_hdr_offset = cpu_to_ubi32(ubi->vid_hdr_offset); + ec_hdr->data_offset = cpu_to_ubi32(ubi->leb_start); + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + ec_hdr->hdr_crc = cpu_to_ubi32(crc); + + err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + if (err) + return -EINVAL; + + err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); + return err; +} + +/** + * validate_vid_hdr - validate a volume identifier header. + * @ubi: UBI device description object + * @vid_hdr: the volume identifier header to check + * + * This function checks that data stored in the volume identifier header + * @vid_hdr. Returns zero if the VID header is OK and %1 if not. + */ +static int validate_vid_hdr(const struct ubi_device *ubi, + const struct ubi_vid_hdr *vid_hdr) +{ + int vol_type = vid_hdr->vol_type; + int copy_flag = vid_hdr->copy_flag; + int vol_id = ubi32_to_cpu(vid_hdr->vol_id); + int lnum = ubi32_to_cpu(vid_hdr->lnum); + int compat = vid_hdr->compat; + int data_size = ubi32_to_cpu(vid_hdr->data_size); + int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs); + int data_pad = ubi32_to_cpu(vid_hdr->data_pad); + int data_crc = ubi32_to_cpu(vid_hdr->data_crc); + int usable_leb_size = ubi->leb_size - data_pad; + + if (copy_flag != 0 && copy_flag != 1) { + dbg_err("bad copy_flag"); + goto bad; + } + + if (vol_id < 0 || lnum < 0 || data_size < 0 || used_ebs < 0 || + data_pad < 0) { + dbg_err("negative values"); + goto bad; + } + + if (vol_id >= UBI_MAX_VOLUMES && vol_id < UBI_INTERNAL_VOL_START) { + dbg_err("bad vol_id"); + goto bad; + } + + if (vol_id < UBI_INTERNAL_VOL_START && compat != 0) { + dbg_err("bad compat"); + goto bad; + } + + if (vol_id >= UBI_INTERNAL_VOL_START && compat != UBI_COMPAT_DELETE && + compat != UBI_COMPAT_RO && compat != UBI_COMPAT_PRESERVE && + compat != UBI_COMPAT_REJECT) { + dbg_err("bad compat"); + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { + dbg_err("bad vol_type"); + goto bad; + } + + if (data_pad >= ubi->leb_size / 2) { + dbg_err("bad data_pad"); + goto bad; + } + + if (vol_type == UBI_VID_STATIC) { + /* + * Although from high-level point of view static volumes may + * contain zero bytes of data, but no VID headers can contain + * zero at these fields, because they empty volumes do not have + * mapped logical eraseblocks. + */ + if (used_ebs == 0) { + dbg_err("zero used_ebs"); + goto bad; + } + if (data_size == 0) { + dbg_err("zero data_size"); + goto bad; + } + if (lnum < used_ebs - 1) { + if (data_size != usable_leb_size) { + dbg_err("bad data_size"); + goto bad; + } + } else if (lnum == used_ebs - 1) { + if (data_size == 0) { + dbg_err("bad data_size at last LEB"); + goto bad; + } + } else { + dbg_err("too high lnum"); + goto bad; + } + } else { + if (copy_flag == 0) { + if (data_crc != 0) { + dbg_err("non-zero data CRC"); + goto bad; + } + if (data_size != 0) { + dbg_err("non-zero data_size"); + goto bad; + } + } else { + if (data_size == 0) { + dbg_err("zero data_size of copy"); + goto bad; + } + } + if (used_ebs != 0) { + dbg_err("bad used_ebs"); + goto bad; + } + } + + return 0; + +bad: + ubi_err("bad VID header"); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); + return 1; +} + +/** + * ubi_io_read_vid_hdr - read and check a volume identifier header. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to read from + * @vid_hdr: &struct ubi_vid_hdr object where to store the read volume + * identifier header + * @verbose: be verbose if the header is corrupted or wasn't found + * + * This function reads the volume identifier header from physical eraseblock + * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read + * volume identifier header. The following codes may be returned: + * + * o %0 if the CRC checksum is correct and the header was successfully read; + * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected + * and corrected by the flash driver; this is harmless but may indicate that + * this eraseblock may become bad soon; + * o %UBI_IO_BAD_VID_HRD if the volume identifier header is corrupted (a CRC + * error detected); + * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID + * header there); + * o a negative error code in case of failure. + */ +int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose) +{ + int err, read_err = 0; + uint32_t crc, magic, hdr_crc; + void *p; + + dbg_io("read VID header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + if (err) { + if (err != UBI_IO_BITFLIPS && err != -EBADMSG) + return err; + + /* + * We read all the data, but either a correctable bit-flip + * occurred, or MTD reported about some data integrity error, + * like an ECC error in case of NAND. The former is harmless, + * the later may mean the read data is corrupted. But we have a + * CRC check-sum and we will identify this. If the VID header is + * still OK, we just report this as there was a bit-flip. + */ + read_err = err; + } + + magic = ubi32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + /* + * If we have read all 0xFF bytes, the VID header probably does + * not exist and the physical eraseblock is assumed to be free. + * + * But if there was a read error, we do not test the data for + * 0xFFs. Even if it does contain all 0xFFs, this error + * indicates that something is still wrong with this physical + * eraseblock and it cannot be regarded as free. + */ + if (read_err != -EBADMSG && + check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { + /* The physical eraseblock is supposedly free */ + + /* + * The below is just a paranoid check, it has to be + * compiled out if paranoid checks are disabled. + */ + err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start, + ubi->leb_size); + if (err) + return err > 0 ? UBI_IO_BAD_VID_HDR : err; + + if (verbose) + ubi_warn("no VID header found at PEB %d, " + "only 0xFF bytes", pnum); + return UBI_IO_PEB_FREE; + } + + /* + * This is not a valid VID header, and these are not 0xFF + * bytes. Report that the header is corrupted. + */ + if (verbose) { + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dbg_dump_vid_hdr(vid_hdr); + } + return UBI_IO_BAD_VID_HDR; + } + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); + hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc); + + if (hdr_crc != crc) { + if (verbose) { + ubi_warn("bad CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_vid_hdr(vid_hdr); + } + return UBI_IO_BAD_VID_HDR; + } + + /* Validate the VID header that we have just read */ + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err("validation failed for PEB %d", pnum); + return -EINVAL; + } + + return read_err ? UBI_IO_BITFLIPS : 0; +} + +/** + * ubi_io_write_vid_hdr - write a volume identifier header. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to write to + * @vid_hdr: the volume identifier header to write + * + * This function writes the volume identifier header described by @vid_hdr to + * physical eraseblock @pnum. This function automatically fills the + * @vid_hdr->magic and the @vid_hdr->version fields, as well as calculates + * header CRC checksum and stores it at vid_hdr->hdr_crc. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If %-EIO is returned, the physical eraseblock probably went + * bad. + */ +int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr) +{ + int err; + uint32_t crc; + void *p; + + dbg_io("write VID header to PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) + return err > 0 ? -EINVAL: err; + + vid_hdr->magic = cpu_to_ubi32(UBI_VID_HDR_MAGIC); + vid_hdr->version = UBI_VERSION; + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); + vid_hdr->hdr_crc = cpu_to_ubi32(crc); + + err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + if (err) + return -EINVAL; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + return err; +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_not_bad - ensure that a physical eraseblock is not bad. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to check + * + * This function returns zero if the physical eraseblock is good, a positive + * number if it is bad and a negative error code if an error occurred. + */ +static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) +{ + int err; + + err = ubi_io_is_bad(ubi, pnum); + if (!err) + return err; + + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_stack(); + return err; +} + +/** + * paranoid_check_ec_hdr - check if an erase counter header is all right. + * @ubi: UBI device description object + * @pnum: physical eraseblock number the erase counter header belongs to + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header contains valid + * values, and %1 if not. + */ +static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr) +{ + int err; + uint32_t magic; + + magic = ubi32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + ubi_err("bad magic %#08x, must be %#08x", + magic, UBI_EC_HDR_MAGIC); + goto fail; + } + + err = validate_ec_hdr(ubi, ec_hdr); + if (err) { + ubi_err("paranoid check failed for PEB %d", pnum); + goto fail; + } + + return 0; + +fail: + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); + return 1; +} + +/** + * paranoid_check_peb_ec_hdr - check that the erase counter header of a + * physical eraseblock is in-place and is all right. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the erase counter header is all right, %1 if + * not, and a negative error code if an error occurred. + */ +static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) +{ + int err; + uint32_t crc, hdr_crc; + struct ubi_ec_hdr *ec_hdr; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); + if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto exit; + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = ubi32_to_cpu(ec_hdr->hdr_crc); + if (hdr_crc != crc) { + ubi_err("bad CRC, calculated %#08x, read %#08x", crc, hdr_crc); + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); + err = 1; + goto exit; + } + + err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + +exit: + kfree(ec_hdr); + return err; +} + +/** + * paranoid_check_vid_hdr - check that a volume identifier header is all right. + * @ubi: UBI device description object + * @pnum: physical eraseblock number the volume identifier header belongs to + * @vid_hdr: the volume identifier header to check + * + * This function returns zero if the volume identifier header is all right, and + * %1 if not. + */ +static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + int err; + uint32_t magic; + + magic = ubi32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x", + magic, pnum, UBI_VID_HDR_MAGIC); + goto fail; + } + + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err("paranoid check failed for PEB %d", pnum); + goto fail; + } + + return err; + +fail: + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); + return 1; + +} + +/** + * paranoid_check_peb_vid_hdr - check that the volume identifier header of a + * physical eraseblock is in-place and is all right. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the volume identifier header is all right, + * %1 if not, and a negative error code if an error occurred. + */ +static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) +{ + int err; + uint32_t crc, hdr_crc; + struct ubi_vid_hdr *vid_hdr; + void *p; + + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) + return -ENOMEM; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); + if (err && err != UBI_IO_BITFLIPS && err != -EBADMSG) + goto exit; + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_EC_HDR_SIZE_CRC); + hdr_crc = ubi32_to_cpu(vid_hdr->hdr_crc); + if (hdr_crc != crc) { + ubi_err("bad VID header CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); + err = 1; + goto exit; + } + + err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + +exit: + ubi_free_vid_hdr(ubi, vid_hdr); + return err; +} + +/** + * paranoid_check_all_ff - check that a region of flash is empty. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @offset: the starting offset within the physical eraseblock to check + * @len: the length of the region to check + * + * This function returns zero if only 0xFF bytes are present at offset + * @offset of the physical eraseblock @pnum, %1 if not, and a negative error + * code if an error occurred. + */ +static int paranoid_check_all_ff(const struct ubi_device *ubi, int pnum, + int offset, int len) +{ + size_t read; + int err; + void *buf; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + + buf = kzalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + if (err && err != -EUCLEAN) { + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + goto error; + } + + err = check_pattern(buf, 0xFF, len); + if (err == 0) { + ubi_err("flash region at PEB %d:%d, length %d does not " + "contain all 0xFF bytes", pnum, offset, len); + goto fail; + } + + kfree(buf); + return 0; + +fail: + ubi_err("paranoid check failed for PEB %d", pnum); + dbg_msg("hex dump of the %d-%d region", offset, offset + len); + ubi_dbg_hexdump(buf, len); + err = 1; +error: + ubi_dbg_dump_stack(); + kfree(buf); + return err; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c new file mode 100644 index 00000000000..d352c4575c3 --- /dev/null +++ b/drivers/mtd/ubi/kapi.c @@ -0,0 +1,575 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* This file mostly implements UBI kernel API functions */ + +#include <linux/module.h> +#include <linux/err.h> +#include <asm/div64.h> +#include "ubi.h" + +/** + * ubi_get_device_info - get information about UBI device. + * @ubi_num: UBI device number + * @di: the information is stored here + * + * This function returns %0 in case of success and a %-ENODEV if there is no + * such UBI device. + */ +int ubi_get_device_info(int ubi_num, struct ubi_device_info *di) +{ + const struct ubi_device *ubi; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || + !ubi_devices[ubi_num]) { + module_put(THIS_MODULE); + return -ENODEV; + } + + ubi = ubi_devices[ubi_num]; + di->ubi_num = ubi->ubi_num; + di->leb_size = ubi->leb_size; + di->min_io_size = ubi->min_io_size; + di->ro_mode = ubi->ro_mode; + di->cdev = MKDEV(ubi->major, 0); + module_put(THIS_MODULE); + return 0; +} +EXPORT_SYMBOL_GPL(ubi_get_device_info); + +/** + * ubi_get_volume_info - get information about UBI volume. + * @desc: volume descriptor + * @vi: the information is stored here + */ +void ubi_get_volume_info(struct ubi_volume_desc *desc, + struct ubi_volume_info *vi) +{ + const struct ubi_volume *vol = desc->vol; + const struct ubi_device *ubi = vol->ubi; + + vi->vol_id = vol->vol_id; + vi->ubi_num = ubi->ubi_num; + vi->size = vol->reserved_pebs; + vi->used_bytes = vol->used_bytes; + vi->vol_type = vol->vol_type; + vi->corrupted = vol->corrupted; + vi->upd_marker = vol->upd_marker; + vi->alignment = vol->alignment; + vi->usable_leb_size = vol->usable_leb_size; + vi->name_len = vol->name_len; + vi->name = vol->name; + vi->cdev = MKDEV(ubi->major, vi->vol_id + 1); +} +EXPORT_SYMBOL_GPL(ubi_get_volume_info); + +/** + * ubi_open_volume - open UBI volume. + * @ubi_num: UBI device number + * @vol_id: volume ID + * @mode: open mode + * + * The @mode parameter specifies if the volume should be opened in read-only + * mode, read-write mode, or exclusive mode. The exclusive mode guarantees that + * nobody else will be able to open this volume. UBI allows to have many volume + * readers and one writer at a time. + * + * If a static volume is being opened for the first time since boot, it will be + * checked by this function, which means it will be fully read and the CRC + * checksum of each logical eraseblock will be checked. + * + * This function returns volume descriptor in case of success and a negative + * error code in case of failure. + */ +struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode) +{ + int err; + struct ubi_volume_desc *desc; + struct ubi_device *ubi = ubi_devices[ubi_num]; + struct ubi_volume *vol; + + dbg_msg("open device %d volume %d, mode %d", ubi_num, vol_id, mode); + + err = -ENODEV; + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(err); + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi) + goto out_put; + + err = -EINVAL; + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + goto out_put; + if (mode != UBI_READONLY && mode != UBI_READWRITE && + mode != UBI_EXCLUSIVE) + goto out_put; + + desc = kmalloc(sizeof(struct ubi_volume_desc), GFP_KERNEL); + if (!desc) { + err = -ENOMEM; + goto out_put; + } + + spin_lock(&ubi->volumes_lock); + vol = ubi->volumes[vol_id]; + if (!vol) { + err = -ENODEV; + goto out_unlock; + } + + err = -EBUSY; + switch (mode) { + case UBI_READONLY: + if (vol->exclusive) + goto out_unlock; + vol->readers += 1; + break; + + case UBI_READWRITE: + if (vol->exclusive || vol->writers > 0) + goto out_unlock; + vol->writers += 1; + break; + + case UBI_EXCLUSIVE: + if (vol->exclusive || vol->writers || vol->readers) + goto out_unlock; + vol->exclusive = 1; + break; + } + spin_unlock(&ubi->volumes_lock); + + desc->vol = vol; + desc->mode = mode; + + /* + * To prevent simultaneous checks of the same volume we use @vtbl_mutex, + * although it is not the purpose it was introduced for. + */ + mutex_lock(&ubi->vtbl_mutex); + if (!vol->checked) { + /* This is the first open - check the volume */ + err = ubi_check_volume(ubi, vol_id); + if (err < 0) { + mutex_unlock(&ubi->vtbl_mutex); + ubi_close_volume(desc); + return ERR_PTR(err); + } + if (err == 1) { + ubi_warn("volume %d on UBI device %d is corrupted", + vol_id, ubi->ubi_num); + vol->corrupted = 1; + } + vol->checked = 1; + } + mutex_unlock(&ubi->vtbl_mutex); + return desc; + +out_unlock: + spin_unlock(&ubi->volumes_lock); + kfree(desc); +out_put: + module_put(THIS_MODULE); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(ubi_open_volume); + +/** + * ubi_open_volume_nm - open UBI volume by name. + * @ubi_num: UBI device number + * @name: volume name + * @mode: open mode + * + * This function is similar to 'ubi_open_volume()', but opens a volume by name. + */ +struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, + int mode) +{ + int i, vol_id = -1, len; + struct ubi_volume_desc *ret; + struct ubi_device *ubi; + + dbg_msg("open volume %s, mode %d", name, mode); + + if (!name) + return ERR_PTR(-EINVAL); + + len = strnlen(name, UBI_VOL_NAME_MAX + 1); + if (len > UBI_VOL_NAME_MAX) + return ERR_PTR(-EINVAL); + + ret = ERR_PTR(-ENODEV); + if (!try_module_get(THIS_MODULE)) + return ret; + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES || !ubi_devices[ubi_num]) + goto out_put; + + ubi = ubi_devices[ubi_num]; + + spin_lock(&ubi->volumes_lock); + /* Walk all volumes of this UBI device */ + for (i = 0; i < ubi->vtbl_slots; i++) { + struct ubi_volume *vol = ubi->volumes[i]; + + if (vol && len == vol->name_len && !strcmp(name, vol->name)) { + vol_id = i; + break; + } + } + spin_unlock(&ubi->volumes_lock); + + if (vol_id < 0) + goto out_put; + + ret = ubi_open_volume(ubi_num, vol_id, mode); + +out_put: + module_put(THIS_MODULE); + return ret; +} +EXPORT_SYMBOL_GPL(ubi_open_volume_nm); + +/** + * ubi_close_volume - close UBI volume. + * @desc: volume descriptor + */ +void ubi_close_volume(struct ubi_volume_desc *desc) +{ + struct ubi_volume *vol = desc->vol; + + dbg_msg("close volume %d, mode %d", vol->vol_id, desc->mode); + + spin_lock(&vol->ubi->volumes_lock); + switch (desc->mode) { + case UBI_READONLY: + vol->readers -= 1; + break; + case UBI_READWRITE: + vol->writers -= 1; + break; + case UBI_EXCLUSIVE: + vol->exclusive = 0; + } + spin_unlock(&vol->ubi->volumes_lock); + + kfree(desc); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(ubi_close_volume); + +/** + * ubi_leb_read - read data. + * @desc: volume descriptor + * @lnum: logical eraseblock number to read from + * @buf: buffer where to store the read data + * @offset: offset within the logical eraseblock to read from + * @len: how many bytes to read + * @check: whether UBI has to check the read data's CRC or not. + * + * This function reads data from offset @offset of logical eraseblock @lnum and + * stores the data at @buf. When reading from static volumes, @check specifies + * whether the data has to be checked or not. If yes, the whole logical + * eraseblock will be read and its CRC checksum will be checked (i.e., the CRC + * checksum is per-eraseblock). So checking may substantially slow down the + * read speed. The @check argument is ignored for dynamic volumes. + * + * In case of success, this function returns zero. In case of failure, this + * function returns a negative error code. + * + * %-EBADMSG error code is returned: + * o for both static and dynamic volumes if MTD driver has detected a data + * integrity problem (unrecoverable ECC checksum mismatch in case of NAND); + * o for static volumes in case of data CRC mismatch. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF error code. + */ +int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, vol_id = vol->vol_id; + + dbg_msg("read %d bytes from LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots || lnum < 0 || + lnum >= vol->used_ebs || offset < 0 || len < 0 || + offset + len > vol->usable_leb_size) + return -EINVAL; + + if (vol->vol_type == UBI_STATIC_VOLUME && lnum == vol->used_ebs - 1 && + offset + len > vol->last_eb_bytes) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + if (len == 0) + return 0; + + err = ubi_eba_read_leb(ubi, vol_id, lnum, buf, offset, len, check); + if (err && err == -EBADMSG && vol->vol_type == UBI_STATIC_VOLUME) { + ubi_warn("mark volume %d as corrupted", vol_id); + vol->corrupted = 1; + } + + return err; +} +EXPORT_SYMBOL_GPL(ubi_leb_read); + +/** + * ubi_leb_write - write data. + * @desc: volume descriptor + * @lnum: logical eraseblock number to write to + * @buf: data to write + * @offset: offset within the logical eraseblock where to write + * @len: how many bytes to write + * @dtype: expected data type + * + * This function writes @len bytes of data from @buf to offset @offset of + * logical eraseblock @lnum. The @dtype argument describes expected lifetime of + * the data. + * + * This function takes care of physical eraseblock write failures. If write to + * the physical eraseblock write operation fails, the logical eraseblock is + * re-mapped to another physical eraseblock, the data is recovered, and the + * write finishes. UBI has a pool of reserved physical eraseblocks for this. + * + * If all the data were successfully written, zero is returned. If an error + * occurred and UBI has not been able to recover from it, this function returns + * a negative error code. Note, in case of an error, it is possible that + * something was still written to the flash media, but that may be some + * garbage. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF code. + */ +int ubi_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, + int offset, int len, int dtype) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_msg("write %d bytes to LEB %d:%d:%d", len, vol_id, lnum, offset); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || offset < 0 || len < 0 || + offset + len > vol->usable_leb_size || offset % ubi->min_io_size || + len % ubi->min_io_size) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && + dtype != UBI_UNKNOWN) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (len == 0) + return 0; + + return ubi_eba_write_leb(ubi, vol_id, lnum, buf, offset, len, dtype); +} +EXPORT_SYMBOL_GPL(ubi_leb_write); + +/* + * ubi_leb_change - change logical eraseblock atomically. + * @desc: volume descriptor + * @lnum: logical eraseblock number to change + * @buf: data to write + * @len: how many bytes to write + * @dtype: expected data type + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the + * data, which has to be aligned. The length may be shorter then the logical + * eraseblock size, ant the logical eraseblock may be appended to more times + * later on. This function guarantees that in case of an unclean reboot the old + * contents is preserved. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, + int len, int dtype) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_msg("atomically write %d bytes to LEB %d:%d", len, vol_id, lnum); + + if (vol_id < 0 || vol_id >= ubi->vtbl_slots) + return -EINVAL; + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs || len < 0 || + len > vol->usable_leb_size || len % ubi->min_io_size) + return -EINVAL; + + if (dtype != UBI_LONGTERM && dtype != UBI_SHORTTERM && + dtype != UBI_UNKNOWN) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + if (len == 0) + return 0; + + return ubi_eba_atomic_leb_change(ubi, vol_id, lnum, buf, len, dtype); +} +EXPORT_SYMBOL_GPL(ubi_leb_change); + +/** + * ubi_leb_erase - erase logical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and synchronously erases the + * correspondent physical eraseblock. Returns zero in case of success and a + * negative error code in case of failure. + * + * If the volume is damaged because of an interrupted update this function just + * returns immediately with %-EBADF code. + */ +int ubi_leb_erase(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int err, vol_id = vol->vol_id; + + dbg_msg("erase LEB %d:%d", vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + err = ubi_eba_unmap_leb(ubi, vol_id, lnum); + if (err) + return err; + + return ubi_wl_flush(ubi); +} +EXPORT_SYMBOL_GPL(ubi_leb_erase); + +/** + * ubi_leb_unmap - un-map logical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function un-maps logical eraseblock @lnum and schedules the + * corresponding physical eraseblock for erasure, so that it will eventually be + * physically erased in background. This operation is much faster then the + * erase operation. + * + * Unlike erase, the un-map operation does not guarantee that the logical + * eraseblock will contain all 0xFF bytes when UBI is initialized again. For + * example, if several logical eraseblocks are un-mapped, and an unclean reboot + * happens after this, the logical eraseblocks will not necessarily be + * un-mapped again when this MTD device is attached. They may actually be + * mapped to the same physical eraseblocks again. So, this function has to be + * used with care. + * + * In other words, when un-mapping a logical eraseblock, UBI does not store + * any information about this on the flash media, it just marks the logical + * eraseblock as "un-mapped" in RAM. If UBI is detached before the physical + * eraseblock is physically erased, it will be mapped again to the same logical + * eraseblock when the MTD device is attached again. + * + * The main and obvious use-case of this function is when the contents of a + * logical eraseblock has to be re-written. Then it is much more efficient to + * first un-map it, then write new data, rather then first erase it, then write + * new data. Note, once new data has been written to the logical eraseblock, + * UBI guarantees that the old contents has gone forever. In other words, if an + * unclean reboot happens after the logical eraseblock has been un-mapped and + * then written to, it will contain the last written data. + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the volume is damaged because of an interrupted update + * this function just returns immediately with %-EBADF code. + */ +int ubi_leb_unmap(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int vol_id = vol->vol_id; + + dbg_msg("unmap LEB %d:%d", vol_id, lnum); + + if (desc->mode == UBI_READONLY || vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + return ubi_eba_unmap_leb(ubi, vol_id, lnum); +} +EXPORT_SYMBOL_GPL(ubi_leb_unmap); + +/** + * ubi_is_mapped - check if logical eraseblock is mapped. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * + * This function checks if logical eraseblock @lnum is mapped to a physical + * eraseblock. If a logical eraseblock is un-mapped, this does not necessarily + * mean it will still be un-mapped after the UBI device is re-attached. The + * logical eraseblock may become mapped to the physical eraseblock it was last + * mapped to. + * + * This function returns %1 if the LEB is mapped, %0 if not, and a negative + * error code in case of failure. If the volume is damaged because of an + * interrupted update this function just returns immediately with %-EBADF error + * code. + */ +int ubi_is_mapped(struct ubi_volume_desc *desc, int lnum) +{ + struct ubi_volume *vol = desc->vol; + + dbg_msg("test LEB %d:%d", vol->vol_id, lnum); + + if (lnum < 0 || lnum >= vol->reserved_pebs) + return -EINVAL; + + if (vol->upd_marker) + return -EBADF; + + return vol->eba_tbl[lnum] >= 0; +} +EXPORT_SYMBOL_GPL(ubi_is_mapped); diff --git a/drivers/mtd/ubi/misc.c b/drivers/mtd/ubi/misc.c new file mode 100644 index 00000000000..38d4e6757dc --- /dev/null +++ b/drivers/mtd/ubi/misc.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* Here we keep miscellaneous functions which are used all over the UBI code */ + +#include "ubi.h" + +/** + * calc_data_len - calculate how much real data is stored in a buffer. + * @ubi: UBI device description object + * @buf: a buffer with the contents of the physical eraseblock + * @length: the buffer length + * + * This function calculates how much "real data" is stored in @buf and returnes + * the length. Continuous 0xFF bytes at the end of the buffer are not + * considered as "real data". + */ +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, + int length) +{ + int i; + + ubi_assert(length % ubi->min_io_size == 0); + + for (i = length - 1; i >= 0; i--) + if (((const uint8_t *)buf)[i] != 0xFF) + break; + + /* The resulting length must be aligned to the minimum flash I/O size */ + length = ALIGN(i + 1, ubi->min_io_size); + return length; +} + +/** + * ubi_check_volume - check the contents of a static volume. + * @ubi: UBI device description object + * @vol_id: ID of the volume to check + * + * This function checks if static volume @vol_id is corrupted by fully reading + * it and checking data CRC. This function returns %0 if the volume is not + * corrupted, %1 if it is corrupted and a negative error code in case of + * failure. Dynamic volumes are not checked and zero is returned immediately. + */ +int ubi_check_volume(struct ubi_device *ubi, int vol_id) +{ + void *buf; + int err = 0, i; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + if (vol->vol_type != UBI_STATIC_VOLUME) + return 0; + + buf = kmalloc(vol->usable_leb_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; i < vol->used_ebs; i++) { + int size; + + if (i == vol->used_ebs - 1) + size = vol->last_eb_bytes; + else + size = vol->usable_leb_size; + + err = ubi_eba_read_leb(ubi, vol_id, i, buf, 0, size, 1); + if (err) { + if (err == -EBADMSG) + err = 1; + break; + } + } + + kfree(buf); + return err; +} + +/** + * ubi_calculate_rsvd_pool - calculate how many PEBs must be reserved for bad + * eraseblock handling. + * @ubi: UBI device description object + */ +void ubi_calculate_reserved(struct ubi_device *ubi) +{ + ubi->beb_rsvd_level = ubi->good_peb_count/100; + ubi->beb_rsvd_level *= CONFIG_MTD_UBI_BEB_RESERVE; + if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS) + ubi->beb_rsvd_level = MIN_RESEVED_PEBS; +} diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c new file mode 100644 index 00000000000..473f3200b86 --- /dev/null +++ b/drivers/mtd/ubi/scan.c @@ -0,0 +1,1368 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * UBI scanning unit. + * + * This unit is responsible for scanning the flash media, checking UBI + * headers and providing complete information about the UBI flash image. + * + * The scanning information is reoresented by a &struct ubi_scan_info' object. + * Information about found volumes is represented by &struct ubi_scan_volume + * objects which are kept in volume RB-tree with root at the @volumes field. + * The RB-tree is indexed by the volume ID. + * + * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. + * These objects are kept in per-volume RB-trees with the root at the + * corresponding &struct ubi_scan_volume object. To put it differently, we keep + * an RB-tree of per-volume objects and each of these objects is the root of + * RB-tree of per-eraseblock objects. + * + * Corrupted physical eraseblocks are put to the @corr list, free physical + * eraseblocks are put to the @free list and the physical eraseblock to be + * erased are put to the @erase list. + */ + +#include <linux/err.h> +#include <linux/crc32.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_si(const struct ubi_device *ubi, + struct ubi_scan_info *si); +#else +#define paranoid_check_si(ubi, si) 0 +#endif + +/* Temporary variables used during scanning */ +static struct ubi_ec_hdr *ech; +static struct ubi_vid_hdr *vidh; + +int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec, + struct list_head *list) +{ + struct ubi_scan_leb *seb; + + if (list == &si->free) + dbg_bld("add to free: PEB %d, EC %d", pnum, ec); + else if (list == &si->erase) + dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); + else if (list == &si->corr) + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); + else if (list == &si->alien) + dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); + else + BUG(); + + seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); + if (!seb) + return -ENOMEM; + + seb->pnum = pnum; + seb->ec = ec; + list_add_tail(&seb->u.list, list); + return 0; +} + +/** + * commit_to_mean_value - commit intermediate results to the final mean erase + * counter value. + * @si: scanning information + * + * This is a helper function which calculates partial mean erase counter mean + * value and adds it to the resulting mean value. As we can work only in + * integer arithmetic and we want to calculate the mean value of erase counter + * accurately, we first sum erase counter values in @si->ec_sum variable and + * count these components in @si->ec_count. If this temporary @si->ec_sum is + * going to overflow, we calculate the partial mean value + * (@si->ec_sum/@si->ec_count) and add it to @si->mean_ec. + */ +static void commit_to_mean_value(struct ubi_scan_info *si) +{ + si->ec_sum /= si->ec_count; + if (si->ec_sum % si->ec_count >= si->ec_count / 2) + si->mean_ec += 1; + si->mean_ec += si->ec_sum; +} + +/** + * validate_vid_hdr - check that volume identifier header is correct and + * consistent. + * @vid_hdr: the volume identifier header to check + * @sv: information about the volume this logical eraseblock belongs to + * @pnum: physical eraseblock number the VID header came from + * + * This function checks that data stored in @vid_hdr is consistent. Returns + * non-zero if an inconsistency was found and zero if not. + * + * Note, UBI does sanity check of everything it reads from the flash media. + * Most of the checks are done in the I/O unit. Here we check that the + * information in the VID header is consistent to the information in other VID + * headers of the same volume. + */ +static int validate_vid_hdr(const struct ubi_vid_hdr *vid_hdr, + const struct ubi_scan_volume *sv, int pnum) +{ + int vol_type = vid_hdr->vol_type; + int vol_id = ubi32_to_cpu(vid_hdr->vol_id); + int used_ebs = ubi32_to_cpu(vid_hdr->used_ebs); + int data_pad = ubi32_to_cpu(vid_hdr->data_pad); + + if (sv->leb_count != 0) { + int sv_vol_type; + + /* + * This is not the first logical eraseblock belonging to this + * volume. Ensure that the data in its VID header is consistent + * to the data in previous logical eraseblock headers. + */ + + if (vol_id != sv->vol_id) { + dbg_err("inconsistent vol_id"); + goto bad; + } + + if (sv->vol_type == UBI_STATIC_VOLUME) + sv_vol_type = UBI_VID_STATIC; + else + sv_vol_type = UBI_VID_DYNAMIC; + + if (vol_type != sv_vol_type) { + dbg_err("inconsistent vol_type"); + goto bad; + } + + if (used_ebs != sv->used_ebs) { + dbg_err("inconsistent used_ebs"); + goto bad; + } + + if (data_pad != sv->data_pad) { + dbg_err("inconsistent data_pad"); + goto bad; + } + } + + return 0; + +bad: + ubi_err("inconsistent VID header at PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_sv(sv); + return -EINVAL; +} + +/** + * add_volume - add volume to the scanning information. + * @si: scanning information + * @vol_id: ID of the volume to add + * @pnum: physical eraseblock number + * @vid_hdr: volume identifier header + * + * If the volume corresponding to the @vid_hdr logical eraseblock is already + * present in the scanning information, this function does nothing. Otherwise + * it adds corresponding volume to the scanning information. Returns a pointer + * to the scanning volume object in case of success and a negative error code + * in case of failure. + */ +static struct ubi_scan_volume *add_volume(struct ubi_scan_info *si, int vol_id, + int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + struct ubi_scan_volume *sv; + struct rb_node **p = &si->volumes.rb_node, *parent = NULL; + + ubi_assert(vol_id == ubi32_to_cpu(vid_hdr->vol_id)); + + /* Walk the volume RB-tree to look if this volume is already present */ + while (*p) { + parent = *p; + sv = rb_entry(parent, struct ubi_scan_volume, rb); + + if (vol_id == sv->vol_id) + return sv; + + if (vol_id > sv->vol_id) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + /* The volume is absent - add it */ + sv = kmalloc(sizeof(struct ubi_scan_volume), GFP_KERNEL); + if (!sv) + return ERR_PTR(-ENOMEM); + + sv->highest_lnum = sv->leb_count = 0; + si->max_sqnum = 0; + sv->vol_id = vol_id; + sv->root = RB_ROOT; + sv->used_ebs = ubi32_to_cpu(vid_hdr->used_ebs); + sv->data_pad = ubi32_to_cpu(vid_hdr->data_pad); + sv->compat = vid_hdr->compat; + sv->vol_type = vid_hdr->vol_type == UBI_VID_DYNAMIC ? UBI_DYNAMIC_VOLUME + : UBI_STATIC_VOLUME; + if (vol_id > si->highest_vol_id) + si->highest_vol_id = vol_id; + + rb_link_node(&sv->rb, parent, p); + rb_insert_color(&sv->rb, &si->volumes); + si->vols_found += 1; + dbg_bld("added volume %d", vol_id); + return sv; +} + +/** + * compare_lebs - find out which logical eraseblock is newer. + * @ubi: UBI device description object + * @seb: first logical eraseblock to compare + * @pnum: physical eraseblock number of the second logical eraseblock to + * compare + * @vid_hdr: volume identifier header of the second logical eraseblock + * + * This function compares 2 copies of a LEB and informs which one is newer. In + * case of success this function returns a positive value, in case of failure, a + * negative error code is returned. The success return codes use the following + * bits: + * o bit 0 is cleared: the first PEB (described by @seb) is newer then the + * second PEB (described by @pnum and @vid_hdr); + * o bit 0 is set: the second PEB is newer; + * o bit 1 is cleared: no bit-flips were detected in the newer LEB; + * o bit 1 is set: bit-flips were detected in the newer LEB; + * o bit 2 is cleared: the older LEB is not corrupted; + * o bit 2 is set: the older LEB is corrupted. + */ +static int compare_lebs(const struct ubi_device *ubi, + const struct ubi_scan_leb *seb, int pnum, + const struct ubi_vid_hdr *vid_hdr) +{ + void *buf; + int len, err, second_is_newer, bitflips = 0, corrupted = 0; + uint32_t data_crc, crc; + struct ubi_vid_hdr *vidh = NULL; + unsigned long long sqnum2 = ubi64_to_cpu(vid_hdr->sqnum); + + if (seb->sqnum == 0 && sqnum2 == 0) { + long long abs, v1 = seb->leb_ver, v2 = ubi32_to_cpu(vid_hdr->leb_ver); + + /* + * UBI constantly increases the logical eraseblock version + * number and it can overflow. Thus, we have to bear in mind + * that versions that are close to %0xFFFFFFFF are less then + * versions that are close to %0. + * + * The UBI WL unit guarantees that the number of pending tasks + * is not greater then %0x7FFFFFFF. So, if the difference + * between any two versions is greater or equivalent to + * %0x7FFFFFFF, there was an overflow and the logical + * eraseblock with lower version is actually newer then the one + * with higher version. + * + * FIXME: but this is anyway obsolete and will be removed at + * some point. + */ + + dbg_bld("using old crappy leb_ver stuff"); + + abs = v1 - v2; + if (abs < 0) + abs = -abs; + + if (abs < 0x7FFFFFFF) + /* Non-overflow situation */ + second_is_newer = (v2 > v1); + else + second_is_newer = (v2 < v1); + } else + /* Obviously the LEB with lower sequence counter is older */ + second_is_newer = sqnum2 > seb->sqnum; + + /* + * Now we know which copy is newer. If the copy flag of the PEB with + * newer version is not set, then we just return, otherwise we have to + * check data CRC. For the second PEB we already have the VID header, + * for the first one - we'll need to re-read it from flash. + * + * FIXME: this may be optimized so that we wouldn't read twice. + */ + + if (second_is_newer) { + if (!vid_hdr->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("second PEB %d is newer, copy_flag is unset", + pnum); + return 1; + } + } else { + pnum = seb->pnum; + + vidh = ubi_zalloc_vid_hdr(ubi); + if (!vidh) + return -ENOMEM; + + err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); + if (err) { + if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else { + dbg_err("VID of PEB %d header is bad, but it " + "was OK earlier", pnum); + if (err > 0) + err = -EIO; + + goto out_free_vidh; + } + } + + if (!vidh->copy_flag) { + /* It is not a copy, so it is newer */ + dbg_bld("first PEB %d is newer, copy_flag is unset", + pnum); + err = bitflips << 1; + goto out_free_vidh; + } + + vid_hdr = vidh; + } + + /* Read the data of the copy and check the CRC */ + + len = ubi32_to_cpu(vid_hdr->data_size); + buf = kmalloc(len, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto out_free_vidh; + } + + err = ubi_io_read_data(ubi, buf, pnum, 0, len); + if (err && err != UBI_IO_BITFLIPS) + goto out_free_buf; + + data_crc = ubi32_to_cpu(vid_hdr->data_crc); + crc = crc32(UBI_CRC32_INIT, buf, len); + if (crc != data_crc) { + dbg_bld("PEB %d CRC error: calculated %#08x, must be %#08x", + pnum, crc, data_crc); + corrupted = 1; + bitflips = 0; + second_is_newer = !second_is_newer; + } else { + dbg_bld("PEB %d CRC is OK", pnum); + bitflips = !!err; + } + + kfree(buf); + ubi_free_vid_hdr(ubi, vidh); + + if (second_is_newer) + dbg_bld("second PEB %d is newer, copy_flag is set", pnum); + else + dbg_bld("first PEB %d is newer, copy_flag is set", pnum); + + return second_is_newer | (bitflips << 1) | (corrupted << 2); + +out_free_buf: + kfree(buf); +out_free_vidh: + ubi_free_vid_hdr(ubi, vidh); + ubi_assert(err < 0); + return err; +} + +/** + * ubi_scan_add_used - add information about a physical eraseblock to the + * scanning information. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: the physical eraseblock number + * @ec: erase counter + * @vid_hdr: the volume identifier header + * @bitflips: if bit-flips were detected when this physical eraseblock was read + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si, + int pnum, int ec, const struct ubi_vid_hdr *vid_hdr, + int bitflips) +{ + int err, vol_id, lnum; + uint32_t leb_ver; + unsigned long long sqnum; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb; + struct rb_node **p, *parent = NULL; + + vol_id = ubi32_to_cpu(vid_hdr->vol_id); + lnum = ubi32_to_cpu(vid_hdr->lnum); + sqnum = ubi64_to_cpu(vid_hdr->sqnum); + leb_ver = ubi32_to_cpu(vid_hdr->leb_ver); + + dbg_bld("PEB %d, LEB %d:%d, EC %d, sqnum %llu, ver %u, bitflips %d", + pnum, vol_id, lnum, ec, sqnum, leb_ver, bitflips); + + sv = add_volume(si, vol_id, pnum, vid_hdr); + if (IS_ERR(sv) < 0) + return PTR_ERR(sv); + + /* + * Walk the RB-tree of logical eraseblocks of volume @vol_id to look + * if this is the first instance of this logical eraseblock or not. + */ + p = &sv->root.rb_node; + while (*p) { + int cmp_res; + + parent = *p; + seb = rb_entry(parent, struct ubi_scan_leb, u.rb); + if (lnum != seb->lnum) { + if (lnum < seb->lnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + continue; + } + + /* + * There is already a physical eraseblock describing the same + * logical eraseblock present. + */ + + dbg_bld("this LEB already exists: PEB %d, sqnum %llu, " + "LEB ver %u, EC %d", seb->pnum, seb->sqnum, + seb->leb_ver, seb->ec); + + /* + * Make sure that the logical eraseblocks have different + * versions. Otherwise the image is bad. + */ + if (seb->leb_ver == leb_ver && leb_ver != 0) { + ubi_err("two LEBs with same version %u", leb_ver); + ubi_dbg_dump_seb(seb, 0); + ubi_dbg_dump_vid_hdr(vid_hdr); + return -EINVAL; + } + + /* + * Make sure that the logical eraseblocks have different + * sequence numbers. Otherwise the image is bad. + * + * FIXME: remove 'sqnum != 0' check when leb_ver is removed. + */ + if (seb->sqnum == sqnum && sqnum != 0) { + ubi_err("two LEBs with same sequence number %llu", + sqnum); + ubi_dbg_dump_seb(seb, 0); + ubi_dbg_dump_vid_hdr(vid_hdr); + return -EINVAL; + } + + /* + * Now we have to drop the older one and preserve the newer + * one. + */ + cmp_res = compare_lebs(ubi, seb, pnum, vid_hdr); + if (cmp_res < 0) + return cmp_res; + + if (cmp_res & 1) { + /* + * This logical eraseblock is newer then the one + * found earlier. + */ + err = validate_vid_hdr(vid_hdr, sv, pnum); + if (err) + return err; + + if (cmp_res & 4) + err = ubi_scan_add_to_list(si, seb->pnum, + seb->ec, &si->corr); + else + err = ubi_scan_add_to_list(si, seb->pnum, + seb->ec, &si->erase); + if (err) + return err; + + seb->ec = ec; + seb->pnum = pnum; + seb->scrub = ((cmp_res & 2) || bitflips); + seb->sqnum = sqnum; + seb->leb_ver = leb_ver; + + if (sv->highest_lnum == lnum) + sv->last_data_size = + ubi32_to_cpu(vid_hdr->data_size); + + return 0; + } else { + /* + * This logical eraseblock is older then the one found + * previously. + */ + if (cmp_res & 4) + return ubi_scan_add_to_list(si, pnum, ec, + &si->corr); + else + return ubi_scan_add_to_list(si, pnum, ec, + &si->erase); + } + } + + /* + * We've met this logical eraseblock for the first time, add it to the + * scanning information. + */ + + err = validate_vid_hdr(vid_hdr, sv, pnum); + if (err) + return err; + + seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); + if (!seb) + return -ENOMEM; + + seb->ec = ec; + seb->pnum = pnum; + seb->lnum = lnum; + seb->sqnum = sqnum; + seb->scrub = bitflips; + seb->leb_ver = leb_ver; + + if (sv->highest_lnum <= lnum) { + sv->highest_lnum = lnum; + sv->last_data_size = ubi32_to_cpu(vid_hdr->data_size); + } + + if (si->max_sqnum < sqnum) + si->max_sqnum = sqnum; + + sv->leb_count += 1; + rb_link_node(&seb->u.rb, parent, p); + rb_insert_color(&seb->u.rb, &sv->root); + return 0; +} + +/** + * ubi_scan_find_sv - find information about a particular volume in the + * scanning information. + * @si: scanning information + * @vol_id: the requested volume ID + * + * This function returns a pointer to the volume description or %NULL if there + * are no data about this volume in the scanning information. + */ +struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, + int vol_id) +{ + struct ubi_scan_volume *sv; + struct rb_node *p = si->volumes.rb_node; + + while (p) { + sv = rb_entry(p, struct ubi_scan_volume, rb); + + if (vol_id == sv->vol_id) + return sv; + + if (vol_id > sv->vol_id) + p = p->rb_left; + else + p = p->rb_right; + } + + return NULL; +} + +/** + * ubi_scan_find_seb - find information about a particular logical + * eraseblock in the volume scanning information. + * @sv: a pointer to the volume scanning information + * @lnum: the requested logical eraseblock + * + * This function returns a pointer to the scanning logical eraseblock or %NULL + * if there are no data about it in the scanning volume information. + */ +struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv, + int lnum) +{ + struct ubi_scan_leb *seb; + struct rb_node *p = sv->root.rb_node; + + while (p) { + seb = rb_entry(p, struct ubi_scan_leb, u.rb); + + if (lnum == seb->lnum) + return seb; + + if (lnum > seb->lnum) + p = p->rb_left; + else + p = p->rb_right; + } + + return NULL; +} + +/** + * ubi_scan_rm_volume - delete scanning information about a volume. + * @si: scanning information + * @sv: the volume scanning information to delete + */ +void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv) +{ + struct rb_node *rb; + struct ubi_scan_leb *seb; + + dbg_bld("remove scanning information about volume %d", sv->vol_id); + + while ((rb = rb_first(&sv->root))) { + seb = rb_entry(rb, struct ubi_scan_leb, u.rb); + rb_erase(&seb->u.rb, &sv->root); + list_add_tail(&seb->u.list, &si->erase); + } + + rb_erase(&sv->rb, &si->volumes); + kfree(sv); + si->vols_found -= 1; +} + +/** + * ubi_scan_erase_peb - erase a physical eraseblock. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: physical eraseblock number to erase; + * @ec: erase counter value to write (%UBI_SCAN_UNKNOWN_EC if it is unknown) + * + * This function erases physical eraseblock 'pnum', and writes the erase + * counter header to it. This function should only be used on UBI device + * initialization stages, when the EBA unit had not been yet initialized. This + * function returns zero in case of success and a negative error code in case + * of failure. + */ +int ubi_scan_erase_peb(const struct ubi_device *ubi, + const struct ubi_scan_info *si, int pnum, int ec) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + if ((long long)ec >= UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err("erase counter overflow at PEB %d, EC %d", pnum, ec); + return -EINVAL; + } + + ec_hdr->ec = cpu_to_ubi64(ec); + + err = ubi_io_sync_erase(ubi, pnum, 0); + if (err < 0) + goto out_free; + + err = ubi_io_write_ec_hdr(ubi, pnum, ec_hdr); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * ubi_scan_get_free_peb - get a free physical eraseblock. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns a free physical eraseblock. It is supposed to be + * called on the UBI initialization stages when the wear-leveling unit is not + * initialized yet. This function picks a physical eraseblocks from one of the + * lists, writes the EC header if it is needed, and removes it from the list. + * + * This function returns scanning physical eraseblock information in case of + * success and an error code in case of failure. + */ +struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int err = 0, i; + struct ubi_scan_leb *seb; + + if (!list_empty(&si->free)) { + seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); + list_del(&seb->u.list); + dbg_bld("return free PEB %d, EC %d", seb->pnum, seb->ec); + return seb; + } + + for (i = 0; i < 2; i++) { + struct list_head *head; + struct ubi_scan_leb *tmp_seb; + + if (i == 0) + head = &si->erase; + else + head = &si->corr; + + /* + * We try to erase the first physical eraseblock from the @head + * list and pick it if we succeed, or try to erase the + * next one if not. And so forth. We don't want to take care + * about bad eraseblocks here - they'll be handled later. + */ + list_for_each_entry_safe(seb, tmp_seb, head, u.list) { + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); + if (err) + continue; + + seb->ec += 1; + list_del(&seb->u.list); + dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); + return seb; + } + } + + ubi_err("no eraseblocks found"); + return ERR_PTR(-ENOSPC); +} + +/** + * process_eb - read UBI headers, check them and add corresponding data + * to the scanning information. + * @ubi: UBI device description object + * @si: scanning information + * @pnum: the physical eraseblock number + * + * This function returns a zero if the physical eraseblock was succesfully + * handled and a negative error code in case of failure. + */ +static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si, int pnum) +{ + long long ec; + int err, bitflips = 0, vol_id, ec_corr = 0; + + dbg_bld("scan PEB %d", pnum); + + /* Skip bad physical eraseblocks */ + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) + return err; + else if (err) { + /* + * FIXME: this is actually duty of the I/O unit to initialize + * this, but MTD does not provide enough information. + */ + si->bad_peb_count += 1; + return 0; + } + + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err < 0) + return err; + else if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else if (err == UBI_IO_PEB_EMPTY) + return ubi_scan_add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, + &si->erase); + else if (err == UBI_IO_BAD_EC_HDR) { + /* + * We have to also look at the VID header, possibly it is not + * corrupted. Set %bitflips flag in order to make this PEB be + * moved and EC be re-created. + */ + ec_corr = 1; + ec = UBI_SCAN_UNKNOWN_EC; + bitflips = 1; + } + + si->is_empty = 0; + + if (!ec_corr) { + /* Make sure UBI version is OK */ + if (ech->version != UBI_VERSION) { + ubi_err("this UBI version is %d, image version is %d", + UBI_VERSION, (int)ech->version); + return -EINVAL; + } + + ec = ubi64_to_cpu(ech->ec); + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. The EC headers have 64 bits + * reserved, but we anyway make use of only 31 bit + * values, as this seems to be enough for any existing + * flash. Upgrade UBI and use 64-bit erase counters + * internally. + */ + ubi_err("erase counter overflow, max is %d", + UBI_MAX_ERASECOUNTER); + ubi_dbg_dump_ec_hdr(ech); + return -EINVAL; + } + } + + /* OK, we've done with the EC header, let's look at the VID header */ + + err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); + if (err < 0) + return err; + else if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else if (err == UBI_IO_BAD_VID_HDR || + (err == UBI_IO_PEB_FREE && ec_corr)) { + /* VID header is corrupted */ + err = ubi_scan_add_to_list(si, pnum, ec, &si->corr); + if (err) + return err; + goto adjust_mean_ec; + } else if (err == UBI_IO_PEB_FREE) { + /* No VID header - the physical eraseblock is free */ + err = ubi_scan_add_to_list(si, pnum, ec, &si->free); + if (err) + return err; + goto adjust_mean_ec; + } + + vol_id = ubi32_to_cpu(vidh->vol_id); + if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOL_ID) { + int lnum = ubi32_to_cpu(vidh->lnum); + + /* Unsupported internal volume */ + switch (vidh->compat) { + case UBI_COMPAT_DELETE: + ubi_msg("\"delete\" compatible internal volume %d:%d" + " found, remove it", vol_id, lnum); + err = ubi_scan_add_to_list(si, pnum, ec, &si->corr); + if (err) + return err; + break; + + case UBI_COMPAT_RO: + ubi_msg("read-only compatible internal volume %d:%d" + " found, switch to read-only mode", + vol_id, lnum); + ubi->ro_mode = 1; + break; + + case UBI_COMPAT_PRESERVE: + ubi_msg("\"preserve\" compatible internal volume %d:%d" + " found", vol_id, lnum); + err = ubi_scan_add_to_list(si, pnum, ec, &si->alien); + if (err) + return err; + si->alien_peb_count += 1; + return 0; + + case UBI_COMPAT_REJECT: + ubi_err("incompatible internal volume %d:%d found", + vol_id, lnum); + return -EINVAL; + } + } + + /* Both UBI headers seem to be fine */ + err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips); + if (err) + return err; + +adjust_mean_ec: + if (!ec_corr) { + if (si->ec_sum + ec < ec) { + commit_to_mean_value(si); + si->ec_sum = 0; + si->ec_count = 0; + } else { + si->ec_sum += ec; + si->ec_count += 1; + } + + if (ec > si->max_ec) + si->max_ec = ec; + if (ec < si->min_ec) + si->min_ec = ec; + } + + return 0; +} + +/** + * ubi_scan - scan an MTD device. + * @ubi: UBI device description object + * + * This function does full scanning of an MTD device and returns complete + * information about it. In case of failure, an error code is returned. + */ +struct ubi_scan_info *ubi_scan(struct ubi_device *ubi) +{ + int err, pnum; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb; + struct ubi_scan_info *si; + + si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL); + if (!si) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&si->corr); + INIT_LIST_HEAD(&si->free); + INIT_LIST_HEAD(&si->erase); + INIT_LIST_HEAD(&si->alien); + si->volumes = RB_ROOT; + si->is_empty = 1; + + err = -ENOMEM; + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) + goto out_si; + + vidh = ubi_zalloc_vid_hdr(ubi); + if (!vidh) + goto out_ech; + + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + cond_resched(); + + dbg_msg("process PEB %d", pnum); + err = process_eb(ubi, si, pnum); + if (err < 0) + goto out_vidh; + } + + dbg_msg("scanning is finished"); + + /* Finish mean erase counter calculations */ + if (si->ec_count) + commit_to_mean_value(si); + + if (si->is_empty) + ubi_msg("empty MTD device detected"); + + /* + * In case of unknown erase counter we use the mean erase counter + * value. + */ + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + } + + list_for_each_entry(seb, &si->free, u.list) { + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + } + + list_for_each_entry(seb, &si->corr, u.list) + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + list_for_each_entry(seb, &si->erase, u.list) + if (seb->ec == UBI_SCAN_UNKNOWN_EC) + seb->ec = si->mean_ec; + + err = paranoid_check_si(ubi, si); + if (err) { + if (err > 0) + err = -EINVAL; + goto out_vidh; + } + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); + + return si; + +out_vidh: + ubi_free_vid_hdr(ubi, vidh); +out_ech: + kfree(ech); +out_si: + ubi_scan_destroy_si(si); + return ERR_PTR(err); +} + +/** + * destroy_sv - free the scanning volume information + * @sv: scanning volume information + * + * This function destroys the volume RB-tree (@sv->root) and the scanning + * volume information. + */ +static void destroy_sv(struct ubi_scan_volume *sv) +{ + struct ubi_scan_leb *seb; + struct rb_node *this = sv->root.rb_node; + + while (this) { + if (this->rb_left) + this = this->rb_left; + else if (this->rb_right) + this = this->rb_right; + else { + seb = rb_entry(this, struct ubi_scan_leb, u.rb); + this = rb_parent(this); + if (this) { + if (this->rb_left == &seb->u.rb) + this->rb_left = NULL; + else + this->rb_right = NULL; + } + + kfree(seb); + } + } + kfree(sv); +} + +/** + * ubi_scan_destroy_si - destroy scanning information. + * @si: scanning information + */ +void ubi_scan_destroy_si(struct ubi_scan_info *si) +{ + struct ubi_scan_leb *seb, *seb_tmp; + struct ubi_scan_volume *sv; + struct rb_node *rb; + + list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) { + list_del(&seb->u.list); + kfree(seb); + } + + /* Destroy the volume RB-tree */ + rb = si->volumes.rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + sv = rb_entry(rb, struct ubi_scan_volume, rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &sv->rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + destroy_sv(sv); + } + } + + kfree(si); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_si - check if the scanning information is correct and + * consistent. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero if the scanning information is all right, %1 if + * not and a negative error code if an error occurred. + */ +static int paranoid_check_si(const struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int pnum, err, vols_found = 0; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb, *last_seb; + uint8_t *buf; + + /* + * At first, check that scanning information is ok. + */ + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + int leb_count = 0; + + cond_resched(); + + vols_found += 1; + + if (si->is_empty) { + ubi_err("bad is_empty flag"); + goto bad_sv; + } + + if (sv->vol_id < 0 || sv->highest_lnum < 0 || + sv->leb_count < 0 || sv->vol_type < 0 || sv->used_ebs < 0 || + sv->data_pad < 0 || sv->last_data_size < 0) { + ubi_err("negative values"); + goto bad_sv; + } + + if (sv->vol_id >= UBI_MAX_VOLUMES && + sv->vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("bad vol_id"); + goto bad_sv; + } + + if (sv->vol_id > si->highest_vol_id) { + ubi_err("highest_vol_id is %d, but vol_id %d is there", + si->highest_vol_id, sv->vol_id); + goto out; + } + + if (sv->vol_type != UBI_DYNAMIC_VOLUME && + sv->vol_type != UBI_STATIC_VOLUME) { + ubi_err("bad vol_type"); + goto bad_sv; + } + + if (sv->data_pad > ubi->leb_size / 2) { + ubi_err("bad data_pad"); + goto bad_sv; + } + + last_seb = NULL; + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + cond_resched(); + + last_seb = seb; + leb_count += 1; + + if (seb->pnum < 0 || seb->ec < 0) { + ubi_err("negative values"); + goto bad_seb; + } + + if (seb->ec < si->min_ec) { + ubi_err("bad si->min_ec (%d), %d found", + si->min_ec, seb->ec); + goto bad_seb; + } + + if (seb->ec > si->max_ec) { + ubi_err("bad si->max_ec (%d), %d found", + si->max_ec, seb->ec); + goto bad_seb; + } + + if (seb->pnum >= ubi->peb_count) { + ubi_err("too high PEB number %d, total PEBs %d", + seb->pnum, ubi->peb_count); + goto bad_seb; + } + + if (sv->vol_type == UBI_STATIC_VOLUME) { + if (seb->lnum >= sv->used_ebs) { + ubi_err("bad lnum or used_ebs"); + goto bad_seb; + } + } else { + if (sv->used_ebs != 0) { + ubi_err("non-zero used_ebs"); + goto bad_seb; + } + } + + if (seb->lnum > sv->highest_lnum) { + ubi_err("incorrect highest_lnum or lnum"); + goto bad_seb; + } + } + + if (sv->leb_count != leb_count) { + ubi_err("bad leb_count, %d objects in the tree", + leb_count); + goto bad_sv; + } + + if (!last_seb) + continue; + + seb = last_seb; + + if (seb->lnum != sv->highest_lnum) { + ubi_err("bad highest_lnum"); + goto bad_seb; + } + } + + if (vols_found != si->vols_found) { + ubi_err("bad si->vols_found %d, should be %d", + si->vols_found, vols_found); + goto out; + } + + /* Check that scanning information is correct */ + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + last_seb = NULL; + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + int vol_type; + + cond_resched(); + + last_seb = seb; + + err = ubi_io_read_vid_hdr(ubi, seb->pnum, vidh, 1); + if (err && err != UBI_IO_BITFLIPS) { + ubi_err("VID header is not OK (%d)", err); + if (err > 0) + err = -EIO; + return err; + } + + vol_type = vidh->vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + if (sv->vol_type != vol_type) { + ubi_err("bad vol_type"); + goto bad_vid_hdr; + } + + if (seb->sqnum != ubi64_to_cpu(vidh->sqnum)) { + ubi_err("bad sqnum %llu", seb->sqnum); + goto bad_vid_hdr; + } + + if (sv->vol_id != ubi32_to_cpu(vidh->vol_id)) { + ubi_err("bad vol_id %d", sv->vol_id); + goto bad_vid_hdr; + } + + if (sv->compat != vidh->compat) { + ubi_err("bad compat %d", vidh->compat); + goto bad_vid_hdr; + } + + if (seb->lnum != ubi32_to_cpu(vidh->lnum)) { + ubi_err("bad lnum %d", seb->lnum); + goto bad_vid_hdr; + } + + if (sv->used_ebs != ubi32_to_cpu(vidh->used_ebs)) { + ubi_err("bad used_ebs %d", sv->used_ebs); + goto bad_vid_hdr; + } + + if (sv->data_pad != ubi32_to_cpu(vidh->data_pad)) { + ubi_err("bad data_pad %d", sv->data_pad); + goto bad_vid_hdr; + } + + if (seb->leb_ver != ubi32_to_cpu(vidh->leb_ver)) { + ubi_err("bad leb_ver %u", seb->leb_ver); + goto bad_vid_hdr; + } + } + + if (!last_seb) + continue; + + if (sv->highest_lnum != ubi32_to_cpu(vidh->lnum)) { + ubi_err("bad highest_lnum %d", sv->highest_lnum); + goto bad_vid_hdr; + } + + if (sv->last_data_size != ubi32_to_cpu(vidh->data_size)) { + ubi_err("bad last_data_size %d", sv->last_data_size); + goto bad_vid_hdr; + } + } + + /* + * Make sure that all the physical eraseblocks are in one of the lists + * or trees. + */ + buf = kmalloc(ubi->peb_count, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + memset(buf, 1, ubi->peb_count); + for (pnum = 0; pnum < ubi->peb_count; pnum++) { + err = ubi_io_is_bad(ubi, pnum); + if (err < 0) + return err; + else if (err) + buf[pnum] = 0; + } + + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) + buf[seb->pnum] = 0; + + list_for_each_entry(seb, &si->free, u.list) + buf[seb->pnum] = 0; + + list_for_each_entry(seb, &si->corr, u.list) + buf[seb->pnum] = 0; + + list_for_each_entry(seb, &si->erase, u.list) + buf[seb->pnum] = 0; + + list_for_each_entry(seb, &si->alien, u.list) + buf[seb->pnum] = 0; + + err = 0; + for (pnum = 0; pnum < ubi->peb_count; pnum++) + if (buf[pnum]) { + ubi_err("PEB %d is not referred", pnum); + err = 1; + } + + kfree(buf); + if (err) + goto out; + return 0; + +bad_seb: + ubi_err("bad scanning information about LEB %d", seb->lnum); + ubi_dbg_dump_seb(seb, 0); + ubi_dbg_dump_sv(sv); + goto out; + +bad_sv: + ubi_err("bad scanning information about volume %d", sv->vol_id); + ubi_dbg_dump_sv(sv); + goto out; + +bad_vid_hdr: + ubi_err("bad scanning information about volume %d", sv->vol_id); + ubi_dbg_dump_sv(sv); + ubi_dbg_dump_vid_hdr(vidh); + +out: + ubi_dbg_dump_stack(); + return 1; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/scan.h b/drivers/mtd/ubi/scan.h new file mode 100644 index 00000000000..3949f6192c7 --- /dev/null +++ b/drivers/mtd/ubi/scan.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +#ifndef __UBI_SCAN_H__ +#define __UBI_SCAN_H__ + +/* The erase counter value for this physical eraseblock is unknown */ +#define UBI_SCAN_UNKNOWN_EC (-1) + +/** + * struct ubi_scan_leb - scanning information about a physical eraseblock. + * @ec: erase counter (%UBI_SCAN_UNKNOWN_EC if it is unknown) + * @pnum: physical eraseblock number + * @lnum: logical eraseblock number + * @scrub: if this physical eraseblock needs scrubbing + * @sqnum: sequence number + * @u: unions RB-tree or @list links + * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects + * @u.list: link in one of the eraseblock lists + * @leb_ver: logical eraseblock version (obsolete) + * + * One object of this type is allocated for each physical eraseblock during + * scanning. + */ +struct ubi_scan_leb { + int ec; + int pnum; + int lnum; + int scrub; + unsigned long long sqnum; + union { + struct rb_node rb; + struct list_head list; + } u; + uint32_t leb_ver; +}; + +/** + * struct ubi_scan_volume - scanning information about a volume. + * @vol_id: volume ID + * @highest_lnum: highest logical eraseblock number in this volume + * @leb_count: number of logical eraseblocks in this volume + * @vol_type: volume type + * @used_ebs: number of used logical eraseblocks in this volume (only for + * static volumes) + * @last_data_size: amount of data in the last logical eraseblock of this + * volume (always equivalent to the usable logical eraseblock size in case of + * dynamic volumes) + * @data_pad: how many bytes at the end of logical eraseblocks of this volume + * are not used (due to volume alignment) + * @compat: compatibility flags of this volume + * @rb: link in the volume RB-tree + * @root: root of the RB-tree containing all the eraseblock belonging to this + * volume (&struct ubi_scan_leb objects) + * + * One object of this type is allocated for each volume during scanning. + */ +struct ubi_scan_volume { + int vol_id; + int highest_lnum; + int leb_count; + int vol_type; + int used_ebs; + int last_data_size; + int data_pad; + int compat; + struct rb_node rb; + struct rb_root root; +}; + +/** + * struct ubi_scan_info - UBI scanning information. + * @volumes: root of the volume RB-tree + * @corr: list of corrupted physical eraseblocks + * @free: list of free physical eraseblocks + * @erase: list of physical eraseblocks which have to be erased + * @alien: list of physical eraseblocks which should not be used by UBI (e.g., + * @bad_peb_count: count of bad physical eraseblocks + * those belonging to "preserve"-compatible internal volumes) + * @vols_found: number of volumes found during scanning + * @highest_vol_id: highest volume ID + * @alien_peb_count: count of physical eraseblocks in the @alien list + * @is_empty: flag indicating whether the MTD device is empty or not + * @min_ec: lowest erase counter value + * @max_ec: highest erase counter value + * @max_sqnum: highest sequence number value + * @mean_ec: mean erase counter value + * @ec_sum: a temporary variable used when calculating @mean_ec + * @ec_count: a temporary variable used when calculating @mean_ec + * + * This data structure contains the result of scanning and may be used by other + * UBI units to build final UBI data structures, further error-recovery and so + * on. + */ +struct ubi_scan_info { + struct rb_root volumes; + struct list_head corr; + struct list_head free; + struct list_head erase; + struct list_head alien; + int bad_peb_count; + int vols_found; + int highest_vol_id; + int alien_peb_count; + int is_empty; + int min_ec; + int max_ec; + unsigned long long max_sqnum; + int mean_ec; + int ec_sum; + int ec_count; +}; + +struct ubi_device; +struct ubi_vid_hdr; + +/* + * ubi_scan_move_to_list - move a physical eraseblock from the volume tree to a + * list. + * + * @sv: volume scanning information + * @seb: scanning eraseblock infprmation + * @list: the list to move to + */ +static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv, + struct ubi_scan_leb *seb, + struct list_head *list) +{ + rb_erase(&seb->u.rb, &sv->root); + list_add_tail(&seb->u.list, list); +} + +int ubi_scan_add_to_list(struct ubi_scan_info *si, int pnum, int ec, + struct list_head *list); +int ubi_scan_add_used(const struct ubi_device *ubi, struct ubi_scan_info *si, + int pnum, int ec, const struct ubi_vid_hdr *vid_hdr, + int bitflips); +struct ubi_scan_volume *ubi_scan_find_sv(const struct ubi_scan_info *si, + int vol_id); +struct ubi_scan_leb *ubi_scan_find_seb(const struct ubi_scan_volume *sv, + int lnum); +void ubi_scan_rm_volume(struct ubi_scan_info *si, struct ubi_scan_volume *sv); +struct ubi_scan_leb *ubi_scan_get_free_peb(const struct ubi_device *ubi, + struct ubi_scan_info *si); +int ubi_scan_erase_peb(const struct ubi_device *ubi, + const struct ubi_scan_info *si, int pnum, int ec); +struct ubi_scan_info *ubi_scan(struct ubi_device *ubi); +void ubi_scan_destroy_si(struct ubi_scan_info *si); + +#endif /* !__UBI_SCAN_H__ */ diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h new file mode 100644 index 00000000000..feb647f108f --- /dev/null +++ b/drivers/mtd/ubi/ubi.h @@ -0,0 +1,535 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +#ifndef __UBI_UBI_H__ +#define __UBI_UBI_H__ + +#include <linux/init.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +#include <linux/spinlock.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/string.h> +#include <linux/mtd/mtd.h> + +#include <mtd/ubi-header.h> +#include <linux/mtd/ubi.h> + +#include "scan.h" +#include "debug.h" + +/* Maximum number of supported UBI devices */ +#define UBI_MAX_DEVICES 32 + +/* UBI name used for character devices, sysfs, etc */ +#define UBI_NAME_STR "ubi" + +/* Normal UBI messages */ +#define ubi_msg(fmt, ...) printk(KERN_NOTICE "UBI: " fmt "\n", ##__VA_ARGS__) +/* UBI warning messages */ +#define ubi_warn(fmt, ...) printk(KERN_WARNING "UBI warning: %s: " fmt "\n", \ + __FUNCTION__, ##__VA_ARGS__) +/* UBI error messages */ +#define ubi_err(fmt, ...) printk(KERN_ERR "UBI error: %s: " fmt "\n", \ + __FUNCTION__, ##__VA_ARGS__) + +/* Lowest number PEBs reserved for bad PEB handling */ +#define MIN_RESEVED_PEBS 2 + +/* Background thread name pattern */ +#define UBI_BGT_NAME_PATTERN "ubi_bgt%dd" + +/* This marker in the EBA table means that the LEB is um-mapped */ +#define UBI_LEB_UNMAPPED -1 + +/* + * In case of errors, UBI tries to repeat the operation several times before + * returning error. The below constant defines how many times UBI re-tries. + */ +#define UBI_IO_RETRIES 3 + +/* + * Error codes returned by the I/O unit. + * + * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only + * 0xFF bytes + * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a + * valid erase counter header, and the rest are %0xFF bytes + * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) + * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or + * CRC) + * UBI_IO_BITFLIPS: bit-flips were detected and corrected + */ +enum { + UBI_IO_PEB_EMPTY = 1, + UBI_IO_PEB_FREE, + UBI_IO_BAD_EC_HDR, + UBI_IO_BAD_VID_HDR, + UBI_IO_BITFLIPS +}; + +extern int ubi_devices_cnt; +extern struct ubi_device *ubi_devices[]; + +struct ubi_volume_desc; + +/** + * struct ubi_volume - UBI volume description data structure. + * @dev: device object to make use of the the Linux device model + * @cdev: character device object to create character device + * @ubi: reference to the UBI device description object + * @vol_id: volume ID + * @readers: number of users holding this volume in read-only mode + * @writers: number of users holding this volume in read-write mode + * @exclusive: whether somebody holds this volume in exclusive mode + * @removed: if the volume was removed + * @checked: if this static volume was checked + * + * @reserved_pebs: how many physical eraseblocks are reserved for this volume + * @vol_type: volume type (%UBI_DYNAMIC_VOLUME or %UBI_STATIC_VOLUME) + * @usable_leb_size: logical eraseblock size without padding + * @used_ebs: how many logical eraseblocks in this volume contain data + * @last_eb_bytes: how many bytes are stored in the last logical eraseblock + * @used_bytes: how many bytes of data this volume contains + * @upd_marker: non-zero if the update marker is set for this volume + * @corrupted: non-zero if the volume is corrupted (static volumes only) + * @alignment: volume alignment + * @data_pad: how many bytes are not used at the end of physical eraseblocks to + * satisfy the requested alignment + * @name_len: volume name length + * @name: volume name + * + * @updating: whether the volume is being updated + * @upd_ebs: how many eraseblocks are expected to be updated + * @upd_bytes: how many bytes are expected to be received + * @upd_received: how many update bytes were already received + * @upd_buf: update buffer which is used to collect update data + * + * @eba_tbl: EBA table of this volume (LEB->PEB mapping) + * + * @gluebi_desc: gluebi UBI volume descriptor + * @gluebi_refcount: reference count of the gluebi MTD device + * @gluebi_mtd: MTD device description object of the gluebi MTD device + * + * The @corrupted field indicates that the volume's contents is corrupted. + * Since UBI protects only static volumes, this field is not relevant to + * dynamic volumes - it is user's responsibility to assure their data + * integrity. + * + * The @upd_marker flag indicates that this volume is either being updated at + * the moment or is damaged because of an unclean reboot. + */ +struct ubi_volume { + struct device dev; + struct cdev cdev; + struct ubi_device *ubi; + int vol_id; + int readers; + int writers; + int exclusive; + int removed; + int checked; + + int reserved_pebs; + int vol_type; + int usable_leb_size; + int used_ebs; + int last_eb_bytes; + long long used_bytes; + int upd_marker; + int corrupted; + int alignment; + int data_pad; + int name_len; + char name[UBI_VOL_NAME_MAX+1]; + + int updating; + int upd_ebs; + long long upd_bytes; + long long upd_received; + void *upd_buf; + + int *eba_tbl; + +#ifdef CONFIG_MTD_UBI_GLUEBI + /* Gluebi-related stuff may be compiled out */ + struct ubi_volume_desc *gluebi_desc; + int gluebi_refcount; + struct mtd_info gluebi_mtd; +#endif +}; + +/** + * struct ubi_volume_desc - descriptor of the UBI volume returned when it is + * opened. + * @vol: reference to the corresponding volume description object + * @mode: open mode (%UBI_READONLY, %UBI_READWRITE, or %UBI_EXCLUSIVE) + */ +struct ubi_volume_desc { + struct ubi_volume *vol; + int mode; +}; + +struct ubi_wl_entry; + +/** + * struct ubi_device - UBI device description structure + * @dev: class device object to use the the Linux device model + * @cdev: character device object to create character device + * @ubi_num: UBI device number + * @ubi_name: UBI device name + * @major: character device major number + * @vol_count: number of volumes in this UBI device + * @volumes: volumes of this UBI device + * @volumes_lock: protects @volumes, @rsvd_pebs, @avail_pebs, beb_rsvd_pebs, + * @beb_rsvd_level, @bad_peb_count, @good_peb_count, @vol_count, @vol->readers, + * @vol->writers, @vol->exclusive, @vol->removed, @vol->mapping and + * @vol->eba_tbl. + * + * @rsvd_pebs: count of reserved physical eraseblocks + * @avail_pebs: count of available physical eraseblocks + * @beb_rsvd_pebs: how many physical eraseblocks are reserved for bad PEB + * handling + * @beb_rsvd_level: normal level of PEBs reserved for bad PEB handling + * + * @vtbl_slots: how many slots are available in the volume table + * @vtbl_size: size of the volume table in bytes + * @vtbl: in-RAM volume table copy + * + * @max_ec: current highest erase counter value + * @mean_ec: current mean erase counter value + * + * global_sqnum: global sequence number + * @ltree_lock: protects the lock tree and @global_sqnum + * @ltree: the lock tree + * @vtbl_mutex: protects on-flash volume table + * + * @used: RB-tree of used physical eraseblocks + * @free: RB-tree of free physical eraseblocks + * @scrub: RB-tree of physical eraseblocks which need scrubbing + * @prot: protection trees + * @prot.pnum: protection tree indexed by physical eraseblock numbers + * @prot.aec: protection tree indexed by absolute erase counter value + * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, + * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works + * fields + * @wl_scheduled: non-zero if the wear-leveling was scheduled + * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any + * physical eraseblock + * @abs_ec: absolute erase counter + * @move_from: physical eraseblock from where the data is being moved + * @move_to: physical eraseblock where the data is being moved to + * @move_from_put: if the "from" PEB was put + * @move_to_put: if the "to" PEB was put + * @works: list of pending works + * @works_count: count of pending works + * @bgt_thread: background thread description object + * @thread_enabled: if the background thread is enabled + * @bgt_name: background thread name + * + * @flash_size: underlying MTD device size (in bytes) + * @peb_count: count of physical eraseblocks on the MTD device + * @peb_size: physical eraseblock size + * @bad_peb_count: count of bad physical eraseblocks + * @good_peb_count: count of good physical eraseblocks + * @min_io_size: minimal input/output unit size of the underlying MTD device + * @hdrs_min_io_size: minimal I/O unit size used for VID and EC headers + * @ro_mode: if the UBI device is in read-only mode + * @leb_size: logical eraseblock size + * @leb_start: starting offset of logical eraseblocks within physical + * eraseblocks + * @ec_hdr_alsize: size of the EC header aligned to @hdrs_min_io_size + * @vid_hdr_alsize: size of the VID header aligned to @hdrs_min_io_size + * @vid_hdr_offset: starting offset of the volume identifier header (might be + * unaligned) + * @vid_hdr_aloffset: starting offset of the VID header aligned to + * @hdrs_min_io_size + * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset + * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or + * not + * @mtd: MTD device descriptor + */ +struct ubi_device { + struct cdev cdev; + struct device dev; + int ubi_num; + char ubi_name[sizeof(UBI_NAME_STR)+5]; + int major; + int vol_count; + struct ubi_volume *volumes[UBI_MAX_VOLUMES+UBI_INT_VOL_COUNT]; + spinlock_t volumes_lock; + + int rsvd_pebs; + int avail_pebs; + int beb_rsvd_pebs; + int beb_rsvd_level; + + int vtbl_slots; + int vtbl_size; + struct ubi_vtbl_record *vtbl; + struct mutex vtbl_mutex; + + int max_ec; + int mean_ec; + + /* EBA unit's stuff */ + unsigned long long global_sqnum; + spinlock_t ltree_lock; + struct rb_root ltree; + + /* Wear-leveling unit's stuff */ + struct rb_root used; + struct rb_root free; + struct rb_root scrub; + struct { + struct rb_root pnum; + struct rb_root aec; + } prot; + spinlock_t wl_lock; + int wl_scheduled; + struct ubi_wl_entry **lookuptbl; + unsigned long long abs_ec; + struct ubi_wl_entry *move_from; + struct ubi_wl_entry *move_to; + int move_from_put; + int move_to_put; + struct list_head works; + int works_count; + struct task_struct *bgt_thread; + int thread_enabled; + char bgt_name[sizeof(UBI_BGT_NAME_PATTERN)+2]; + + /* I/O unit's stuff */ + long long flash_size; + int peb_count; + int peb_size; + int bad_peb_count; + int good_peb_count; + int min_io_size; + int hdrs_min_io_size; + int ro_mode; + int leb_size; + int leb_start; + int ec_hdr_alsize; + int vid_hdr_alsize; + int vid_hdr_offset; + int vid_hdr_aloffset; + int vid_hdr_shift; + int bad_allowed; + struct mtd_info *mtd; +}; + +extern struct file_operations ubi_cdev_operations; +extern struct file_operations ubi_vol_cdev_operations; +extern struct class *ubi_class; + +/* vtbl.c */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec); +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si); + +/* vmt.c */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req); +int ubi_remove_volume(struct ubi_volume_desc *desc); +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs); +int ubi_add_volume(struct ubi_device *ubi, int vol_id); +void ubi_free_volume(struct ubi_device *ubi, int vol_id); + +/* upd.c */ +int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes); +int ubi_more_update_data(struct ubi_device *ubi, int vol_id, + const void __user *buf, int count); + +/* misc.c */ +int ubi_calc_data_len(const struct ubi_device *ubi, const void *buf, int length); +int ubi_check_volume(struct ubi_device *ubi, int vol_id); +void ubi_calculate_reserved(struct ubi_device *ubi); + +/* gluebi.c */ +#ifdef CONFIG_MTD_UBI_GLUEBI +int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol); +int ubi_destroy_gluebi(struct ubi_volume *vol); +#else +#define ubi_create_gluebi(ubi, vol) 0 +#define ubi_destroy_gluebi(vol) 0 +#endif + +/* eba.c */ +int ubi_eba_unmap_leb(struct ubi_device *ubi, int vol_id, int lnum); +int ubi_eba_read_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, + int offset, int len, int check); +int ubi_eba_write_leb(struct ubi_device *ubi, int vol_id, int lnum, + const void *buf, int offset, int len, int dtype); +int ubi_eba_write_leb_st(struct ubi_device *ubi, int vol_id, int lnum, + const void *buf, int len, int dtype, + int used_ebs); +int ubi_eba_atomic_leb_change(struct ubi_device *ubi, int vol_id, int lnum, + const void *buf, int len, int dtype); +int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, + struct ubi_vid_hdr *vid_hdr); +int ubi_eba_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); +void ubi_eba_close(const struct ubi_device *ubi); + +/* wl.c */ +int ubi_wl_get_peb(struct ubi_device *ubi, int dtype); +int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture); +int ubi_wl_flush(struct ubi_device *ubi); +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum); +int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si); +void ubi_wl_close(struct ubi_device *ubi); + +/* io.c */ +int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, + int len); +int ubi_io_write(const struct ubi_device *ubi, const void *buf, int pnum, + int offset, int len); +int ubi_io_sync_erase(const struct ubi_device *ubi, int pnum, int torture); +int ubi_io_is_bad(const struct ubi_device *ubi, int pnum); +int ubi_io_mark_bad(const struct ubi_device *ubi, int pnum); +int ubi_io_read_ec_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose); +int ubi_io_write_ec_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr); +int ubi_io_read_vid_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose); +int ubi_io_write_vid_hdr(const struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr); + +/* + * ubi_rb_for_each_entry - walk an RB-tree. + * @rb: a pointer to type 'struct rb_node' to to use as a loop counter + * @pos: a pointer to RB-tree entry type to use as a loop counter + * @root: RB-tree's root + * @member: the name of the 'struct rb_node' within the RB-tree entry + */ +#define ubi_rb_for_each_entry(rb, pos, root, member) \ + for (rb = rb_first(root), \ + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \ + rb; \ + rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member)) + +/** + * ubi_zalloc_vid_hdr - allocate a volume identifier header object. + * @ubi: UBI device description object + * + * This function returns a pointer to the newly allocated and zero-filled + * volume identifier header object in case of success and %NULL in case of + * failure. + */ +static inline struct ubi_vid_hdr *ubi_zalloc_vid_hdr(const struct ubi_device *ubi) +{ + void *vid_hdr; + + vid_hdr = kzalloc(ubi->vid_hdr_alsize, GFP_KERNEL); + if (!vid_hdr) + return NULL; + + /* + * VID headers may be stored at un-aligned flash offsets, so we shift + * the pointer. + */ + return vid_hdr + ubi->vid_hdr_shift; +} + +/** + * ubi_free_vid_hdr - free a volume identifier header object. + * @ubi: UBI device description object + * @vid_hdr: the object to free + */ +static inline void ubi_free_vid_hdr(const struct ubi_device *ubi, + struct ubi_vid_hdr *vid_hdr) +{ + void *p = vid_hdr; + + if (!p) + return; + + kfree(p - ubi->vid_hdr_shift); +} + +/* + * This function is equivalent to 'ubi_io_read()', but @offset is relative to + * the beginning of the logical eraseblock, not to the beginning of the + * physical eraseblock. + */ +static inline int ubi_io_read_data(const struct ubi_device *ubi, void *buf, + int pnum, int offset, int len) +{ + ubi_assert(offset >= 0); + return ubi_io_read(ubi, buf, pnum, offset + ubi->leb_start, len); +} + +/* + * This function is equivalent to 'ubi_io_write()', but @offset is relative to + * the beginning of the logical eraseblock, not to the beginning of the + * physical eraseblock. + */ +static inline int ubi_io_write_data(const struct ubi_device *ubi, const void *buf, + int pnum, int offset, int len) +{ + ubi_assert(offset >= 0); + return ubi_io_write(ubi, buf, pnum, offset + ubi->leb_start, len); +} + +/** + * ubi_ro_mode - switch to read-only mode. + * @ubi: UBI device description object + */ +static inline void ubi_ro_mode(struct ubi_device *ubi) +{ + ubi->ro_mode = 1; + ubi_warn("switch to read-only mode"); +} + +/** + * vol_id2idx - get table index by volume ID. + * @ubi: UBI device description object + * @vol_id: volume ID + */ +static inline int vol_id2idx(const struct ubi_device *ubi, int vol_id) +{ + if (vol_id >= UBI_INTERNAL_VOL_START) + return vol_id - UBI_INTERNAL_VOL_START + ubi->vtbl_slots; + else + return vol_id; +} + +/** + * idx2vol_id - get volume ID by table index. + * @ubi: UBI device description object + * @idx: table index + */ +static inline int idx2vol_id(const struct ubi_device *ubi, int idx) +{ + if (idx >= ubi->vtbl_slots) + return idx - ubi->vtbl_slots + UBI_INTERNAL_VOL_START; + else + return idx; +} + +#endif /* !__UBI_UBI_H__ */ diff --git a/drivers/mtd/ubi/upd.c b/drivers/mtd/ubi/upd.c new file mode 100644 index 00000000000..8925b977e3d --- /dev/null +++ b/drivers/mtd/ubi/upd.c @@ -0,0 +1,348 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + * + * Jan 2007: Alexander Schmidt, hacked per-volume update. + */ + +/* + * This file contains implementation of the volume update functionality. + * + * The update operation is based on the per-volume update marker which is + * stored in the volume table. The update marker is set before the update + * starts, and removed after the update has been finished. So if the update was + * interrupted by an unclean re-boot or due to some other reasons, the update + * marker stays on the flash media and UBI finds it when it attaches the MTD + * device next time. If the update marker is set for a volume, the volume is + * treated as damaged and most I/O operations are prohibited. Only a new update + * operation is allowed. + * + * Note, in general it is possible to implement the update operation as a + * transaction with a roll-back capability. + */ + +#include <linux/err.h> +#include <asm/uaccess.h> +#include <asm/div64.h> +#include "ubi.h" + +/** + * set_update_marker - set update marker. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * This function sets the update marker flag for volume @vol_id. Returns zero + * in case of success and a negative error code in case of failure. + */ +static int set_update_marker(struct ubi_device *ubi, int vol_id) +{ + int err; + struct ubi_vtbl_record vtbl_rec; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + dbg_msg("set update marker for volume %d", vol_id); + + if (vol->upd_marker) { + ubi_assert(ubi->vtbl[vol_id].upd_marker); + dbg_msg("already set"); + return 0; + } + + memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + vtbl_rec.upd_marker = 1; + + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + vol->upd_marker = 1; + return err; +} + +/** + * clear_update_marker - clear update marker. + * @ubi: UBI device description object + * @vol_id: volume ID + * @bytes: new data size in bytes + * + * This function clears the update marker for volume @vol_id, sets new volume + * data size and clears the "corrupted" flag (static volumes only). Returns + * zero in case of success and a negative error code in case of failure. + */ +static int clear_update_marker(struct ubi_device *ubi, int vol_id, long long bytes) +{ + int err; + uint64_t tmp; + struct ubi_vtbl_record vtbl_rec; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + dbg_msg("clear update marker for volume %d", vol_id); + + memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + ubi_assert(vol->upd_marker && vtbl_rec.upd_marker); + vtbl_rec.upd_marker = 0; + + if (vol->vol_type == UBI_STATIC_VOLUME) { + vol->corrupted = 0; + vol->used_bytes = tmp = bytes; + vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); + vol->used_ebs = tmp; + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + vol->upd_marker = 0; + return err; +} + +/** + * ubi_start_update - start volume update. + * @ubi: UBI device description object + * @vol_id: volume ID + * @bytes: update bytes + * + * This function starts volume update operation. If @bytes is zero, the volume + * is just wiped out. Returns zero in case of success and a negative error code + * in case of failure. + */ +int ubi_start_update(struct ubi_device *ubi, int vol_id, long long bytes) +{ + int i, err; + uint64_t tmp; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + dbg_msg("start update of volume %d, %llu bytes", vol_id, bytes); + vol->updating = 1; + + err = set_update_marker(ubi, vol_id); + if (err) + return err; + + /* Before updating - wipe out the volume */ + for (i = 0; i < vol->reserved_pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol_id, i); + if (err) + return err; + } + + if (bytes == 0) { + err = clear_update_marker(ubi, vol_id, 0); + if (err) + return err; + err = ubi_wl_flush(ubi); + if (!err) + vol->updating = 0; + } + + vol->upd_buf = kmalloc(ubi->leb_size, GFP_KERNEL); + if (!vol->upd_buf) + return -ENOMEM; + + tmp = bytes; + vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); + vol->upd_ebs += tmp; + vol->upd_bytes = bytes; + vol->upd_received = 0; + return 0; +} + +/** + * write_leb - write update data. + * @ubi: UBI device description object + * @vol_id: volume ID + * @lnum: logical eraseblock number + * @buf: data to write + * @len: data size + * @used_ebs: how many logical eraseblocks will this volume contain (static + * volumes only) + * + * This function writes update data to corresponding logical eraseblock. In + * case of dynamic volume, this function checks if the data contains 0xFF bytes + * at the end. If yes, the 0xFF bytes are cut and not written. So if the whole + * buffer contains only 0xFF bytes, the LEB is left unmapped. + * + * The reason why we skip the trailing 0xFF bytes in case of dynamic volume is + * that we want to make sure that more data may be appended to the logical + * eraseblock in future. Indeed, writing 0xFF bytes may have side effects and + * this PEB won't be writable anymore. So if one writes the file-system image + * to the UBI volume where 0xFFs mean free space - UBI makes sure this free + * space is writable after the update. + * + * We do not do this for static volumes because they are read-only. But this + * also cannot be done because we have to store per-LEB CRC and the correct + * data length. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int write_leb(struct ubi_device *ubi, int vol_id, int lnum, void *buf, + int len, int used_ebs) +{ + int err, l; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + l = ALIGN(len, ubi->min_io_size); + memset(buf + len, 0xFF, l - len); + + l = ubi_calc_data_len(ubi, buf, l); + if (l == 0) { + dbg_msg("all %d bytes contain 0xFF - skip", len); + return 0; + } + if (len != l) + dbg_msg("skip last %d bytes (0xFF)", len - l); + + err = ubi_eba_write_leb(ubi, vol_id, lnum, buf, 0, l, + UBI_UNKNOWN); + } else { + /* + * When writing static volume, and this is the last logical + * eraseblock, the length (@len) does not have to be aligned to + * the minimal flash I/O unit. The 'ubi_eba_write_leb_st()' + * function accepts exact (unaligned) length and stores it in + * the VID header. And it takes care of proper alignment by + * padding the buffer. Here we just make sure the padding will + * contain zeros, not random trash. + */ + memset(buf + len, 0, vol->usable_leb_size - len); + err = ubi_eba_write_leb_st(ubi, vol_id, lnum, buf, len, + UBI_UNKNOWN, used_ebs); + } + + return err; +} + +/** + * ubi_more_update_data - write more update data. + * @vol: volume description object + * @buf: write data (user-space memory buffer) + * @count: how much bytes to write + * + * This function writes more data to the volume which is being updated. It may + * be called arbitrary number of times until all of the update data arrive. + * This function returns %0 in case of success, number of bytes written during + * the last call if the whole volume update was successfully finished, and a + * negative error code in case of failure. + */ +int ubi_more_update_data(struct ubi_device *ubi, int vol_id, + const void __user *buf, int count) +{ + uint64_t tmp; + struct ubi_volume *vol = ubi->volumes[vol_id]; + int lnum, offs, err = 0, len, to_write = count; + + dbg_msg("write %d of %lld bytes, %lld already passed", + count, vol->upd_bytes, vol->upd_received); + + if (ubi->ro_mode) + return -EROFS; + + tmp = vol->upd_received; + offs = do_div(tmp, vol->usable_leb_size); + lnum = tmp; + + if (vol->upd_received + count > vol->upd_bytes) + to_write = count = vol->upd_bytes - vol->upd_received; + + /* + * When updating volumes, we accumulate whole logical eraseblock of + * data and write it at once. + */ + if (offs != 0) { + /* + * This is a write to the middle of the logical eraseblock. We + * copy the data to our update buffer and wait for more data or + * flush it if the whole eraseblock is written or the update + * is finished. + */ + + len = vol->usable_leb_size - offs; + if (len > count) + len = count; + + err = copy_from_user(vol->upd_buf + offs, buf, len); + if (err) + return -EFAULT; + + if (offs + len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { + int flush_len = offs + len; + + /* + * OK, we gathered either the whole eraseblock or this + * is the last chunk, it's time to flush the buffer. + */ + ubi_assert(flush_len <= vol->usable_leb_size); + err = write_leb(ubi, vol_id, lnum, vol->upd_buf, + flush_len, vol->upd_ebs); + if (err) + return err; + } + + vol->upd_received += len; + count -= len; + buf += len; + lnum += 1; + } + + /* + * If we've got more to write, let's continue. At this point we know we + * are starting from the beginning of an eraseblock. + */ + while (count) { + if (count > vol->usable_leb_size) + len = vol->usable_leb_size; + else + len = count; + + err = copy_from_user(vol->upd_buf, buf, len); + if (err) + return -EFAULT; + + if (len == vol->usable_leb_size || + vol->upd_received + len == vol->upd_bytes) { + err = write_leb(ubi, vol_id, lnum, vol->upd_buf, len, + vol->upd_ebs); + if (err) + break; + } + + vol->upd_received += len; + count -= len; + lnum += 1; + buf += len; + } + + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { + /* The update is finished, clear the update marker */ + err = clear_update_marker(ubi, vol_id, vol->upd_bytes); + if (err) + return err; + err = ubi_wl_flush(ubi); + if (err == 0) { + err = to_write; + kfree(vol->upd_buf); + vol->updating = 0; + } + } + + return err; +} diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c new file mode 100644 index 00000000000..622d0d18952 --- /dev/null +++ b/drivers/mtd/ubi/vmt.c @@ -0,0 +1,809 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file contains implementation of volume creation, deletion, updating and + * resizing. + */ + +#include <linux/err.h> +#include <asm/div64.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static void paranoid_check_volumes(struct ubi_device *ubi); +#else +#define paranoid_check_volumes(ubi) +#endif + +static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf); + +/* Device attributes corresponding to files in '/<sysfs>/class/ubi/ubiX_Y' */ +static struct device_attribute vol_reserved_ebs = + __ATTR(reserved_ebs, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_type = + __ATTR(type, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_name = + __ATTR(name, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_corrupted = + __ATTR(corrupted, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_alignment = + __ATTR(alignment, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_usable_eb_size = + __ATTR(usable_eb_size, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_data_bytes = + __ATTR(data_bytes, S_IRUGO, vol_attribute_show, NULL); +static struct device_attribute vol_upd_marker = + __ATTR(upd_marker, S_IRUGO, vol_attribute_show, NULL); + +/* + * "Show" method for files in '/<sysfs>/class/ubi/ubiX_Y/'. + * + * Consider a situation: + * A. process 1 opens a sysfs file related to volume Y, say + * /<sysfs>/class/ubi/ubiX_Y/reserved_ebs; + * B. process 2 removes volume Y; + * C. process 1 starts reading the /<sysfs>/class/ubi/ubiX_Y/reserved_ebs file; + * + * What we want to do in a situation like that is to return error when the file + * is read. This is done by means of the 'removed' flag and the 'vol_lock' of + * the UBI volume description object. + */ +static ssize_t vol_attribute_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int ret; + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + + spin_lock(&vol->ubi->volumes_lock); + if (vol->removed) { + spin_unlock(&vol->ubi->volumes_lock); + return -ENODEV; + } + if (attr == &vol_reserved_ebs) + ret = sprintf(buf, "%d\n", vol->reserved_pebs); + else if (attr == &vol_type) { + const char *tp; + tp = vol->vol_type == UBI_DYNAMIC_VOLUME ? "dynamic" : "static"; + ret = sprintf(buf, "%s\n", tp); + } else if (attr == &vol_name) + ret = sprintf(buf, "%s\n", vol->name); + else if (attr == &vol_corrupted) + ret = sprintf(buf, "%d\n", vol->corrupted); + else if (attr == &vol_alignment) + ret = sprintf(buf, "%d\n", vol->alignment); + else if (attr == &vol_usable_eb_size) { + ret = sprintf(buf, "%d\n", vol->usable_leb_size); + } else if (attr == &vol_data_bytes) + ret = sprintf(buf, "%lld\n", vol->used_bytes); + else if (attr == &vol_upd_marker) + ret = sprintf(buf, "%d\n", vol->upd_marker); + else + BUG(); + spin_unlock(&vol->ubi->volumes_lock); + return ret; +} + +/* Release method for volume devices */ +static void vol_release(struct device *dev) +{ + struct ubi_volume *vol = container_of(dev, struct ubi_volume, dev); + ubi_assert(vol->removed); + kfree(vol); +} + +/** + * volume_sysfs_init - initialize sysfs for new volume. + * @ubi: UBI device description object + * @vol: volume description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + * + * Note, this function does not free allocated resources in case of failure - + * the caller does it. This is because this would cause release() here and the + * caller would oops. + */ +static int volume_sysfs_init(struct ubi_device *ubi, struct ubi_volume *vol) +{ + int err; + + err = device_create_file(&vol->dev, &vol_reserved_ebs); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_type); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_name); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_corrupted); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_alignment); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_usable_eb_size); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_data_bytes); + if (err) + return err; + err = device_create_file(&vol->dev, &vol_upd_marker); + if (err) + return err; + return 0; +} + +/** + * volume_sysfs_close - close sysfs for a volume. + * @vol: volume description object + */ +static void volume_sysfs_close(struct ubi_volume *vol) +{ + device_remove_file(&vol->dev, &vol_upd_marker); + device_remove_file(&vol->dev, &vol_data_bytes); + device_remove_file(&vol->dev, &vol_usable_eb_size); + device_remove_file(&vol->dev, &vol_alignment); + device_remove_file(&vol->dev, &vol_corrupted); + device_remove_file(&vol->dev, &vol_name); + device_remove_file(&vol->dev, &vol_type); + device_remove_file(&vol->dev, &vol_reserved_ebs); + device_unregister(&vol->dev); +} + +/** + * ubi_create_volume - create volume. + * @ubi: UBI device description object + * @req: volume creation request + * + * This function creates volume described by @req. If @req->vol_id id + * %UBI_VOL_NUM_AUTO, this function automatically assigne ID to the new volume + * and saves it in @req->vol_id. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) +{ + int i, err, vol_id = req->vol_id; + struct ubi_volume *vol; + struct ubi_vtbl_record vtbl_rec; + uint64_t bytes; + + if (ubi->ro_mode) + return -EROFS; + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + spin_lock(&ubi->volumes_lock); + + if (vol_id == UBI_VOL_NUM_AUTO) { + /* Find unused volume ID */ + dbg_msg("search for vacant volume ID"); + for (i = 0; i < ubi->vtbl_slots; i++) + if (!ubi->volumes[i]) { + vol_id = i; + break; + } + + if (vol_id == UBI_VOL_NUM_AUTO) { + dbg_err("out of volume IDs"); + err = -ENFILE; + goto out_unlock; + } + req->vol_id = vol_id; + } + + dbg_msg("volume ID %d, %llu bytes, type %d, name %s", + vol_id, (unsigned long long)req->bytes, + (int)req->vol_type, req->name); + + /* Ensure that this volume does not exist */ + err = -EEXIST; + if (ubi->volumes[vol_id]) { + dbg_err("volume %d already exists", vol_id); + goto out_unlock; + } + + /* Ensure that the name is unique */ + for (i = 0; i < ubi->vtbl_slots; i++) + if (ubi->volumes[i] && + ubi->volumes[i]->name_len == req->name_len && + strcmp(ubi->volumes[i]->name, req->name) == 0) { + dbg_err("volume \"%s\" exists (ID %d)", req->name, i); + goto out_unlock; + } + + /* Calculate how many eraseblocks are requested */ + vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; + bytes = req->bytes; + if (do_div(bytes, vol->usable_leb_size)) + vol->reserved_pebs = 1; + vol->reserved_pebs += bytes; + + /* Reserve physical eraseblocks */ + if (vol->reserved_pebs > ubi->avail_pebs) { + dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); + spin_unlock(&ubi->volumes_lock); + err = -ENOSPC; + goto out_unlock; + } + ubi->avail_pebs -= vol->reserved_pebs; + ubi->rsvd_pebs += vol->reserved_pebs; + + vol->vol_id = vol_id; + vol->alignment = req->alignment; + vol->data_pad = ubi->leb_size % vol->alignment; + vol->vol_type = req->vol_type; + vol->name_len = req->name_len; + memcpy(vol->name, req->name, vol->name_len + 1); + vol->exclusive = 1; + vol->ubi = ubi; + ubi->volumes[vol_id] = vol; + spin_unlock(&ubi->volumes_lock); + + /* + * Finish all pending erases because there may be some LEBs belonging + * to the same volume ID. + */ + err = ubi_wl_flush(ubi); + if (err) + goto out_acc; + + vol->eba_tbl = kmalloc(vol->reserved_pebs * sizeof(int), GFP_KERNEL); + if (!vol->eba_tbl) { + err = -ENOMEM; + goto out_acc; + } + + for (i = 0; i < vol->reserved_pebs; i++) + vol->eba_tbl[i] = UBI_LEB_UNMAPPED; + + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = vol->used_ebs * vol->usable_leb_size; + } else { + bytes = vol->used_bytes; + vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); + vol->used_ebs = bytes; + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; + err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol_id + 1), 1); + if (err) { + ubi_err("cannot add character device for volume %d", vol_id); + goto out_mapping; + } + + err = ubi_create_gluebi(ubi, vol); + if (err) + goto out_cdev; + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1); + vol->dev.class = ubi_class; + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) + goto out_gluebi; + + err = volume_sysfs_init(ubi, vol); + if (err) + goto out_sysfs; + + /* Fill volume table record */ + memset(&vtbl_rec, 0, sizeof(struct ubi_vtbl_record)); + vtbl_rec.reserved_pebs = cpu_to_ubi32(vol->reserved_pebs); + vtbl_rec.alignment = cpu_to_ubi32(vol->alignment); + vtbl_rec.data_pad = cpu_to_ubi32(vol->data_pad); + vtbl_rec.name_len = cpu_to_ubi16(vol->name_len); + if (vol->vol_type == UBI_DYNAMIC_VOLUME) + vtbl_rec.vol_type = UBI_VID_DYNAMIC; + else + vtbl_rec.vol_type = UBI_VID_STATIC; + memcpy(vtbl_rec.name, vol->name, vol->name_len + 1); + + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_sysfs; + + spin_lock(&ubi->volumes_lock); + ubi->vol_count += 1; + vol->exclusive = 0; + spin_unlock(&ubi->volumes_lock); + + paranoid_check_volumes(ubi); + return 0; + +out_gluebi: + err = ubi_destroy_gluebi(vol); +out_cdev: + cdev_del(&vol->cdev); +out_mapping: + kfree(vol->eba_tbl); +out_acc: + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= vol->reserved_pebs; + ubi->avail_pebs += vol->reserved_pebs; +out_unlock: + spin_unlock(&ubi->volumes_lock); + kfree(vol); + return err; + + /* + * We are registered, so @vol is destroyed in the release function and + * we have to de-initialize differently. + */ +out_sysfs: + err = ubi_destroy_gluebi(vol); + cdev_del(&vol->cdev); + kfree(vol->eba_tbl); + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= vol->reserved_pebs; + ubi->avail_pebs += vol->reserved_pebs; + spin_unlock(&ubi->volumes_lock); + volume_sysfs_close(vol); + return err; +} + +/** + * ubi_remove_volume - remove volume. + * @desc: volume descriptor + * + * This function removes volume described by @desc. The volume has to be opened + * in "exclusive" mode. Returns zero in case of success and a negative error + * code in case of failure. + */ +int ubi_remove_volume(struct ubi_volume_desc *desc) +{ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; + + dbg_msg("remove UBI volume %d", vol_id); + ubi_assert(desc->mode == UBI_EXCLUSIVE); + ubi_assert(vol == ubi->volumes[vol_id]); + + if (ubi->ro_mode) + return -EROFS; + + err = ubi_destroy_gluebi(vol); + if (err) + return err; + + err = ubi_change_vtbl_record(ubi, vol_id, NULL); + if (err) + return err; + + for (i = 0; i < vol->reserved_pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol_id, i); + if (err) + return err; + } + + spin_lock(&ubi->volumes_lock); + vol->removed = 1; + ubi->volumes[vol_id] = NULL; + spin_unlock(&ubi->volumes_lock); + + kfree(vol->eba_tbl); + vol->eba_tbl = NULL; + cdev_del(&vol->cdev); + volume_sysfs_close(vol); + kfree(desc); + + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= reserved_pebs; + ubi->avail_pebs += reserved_pebs; + i = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; + if (i > 0) { + i = ubi->avail_pebs >= i ? i : ubi->avail_pebs; + ubi->avail_pebs -= i; + ubi->rsvd_pebs += i; + ubi->beb_rsvd_pebs += i; + if (i > 0) + ubi_msg("reserve more %d PEBs", i); + } + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); + + paranoid_check_volumes(ubi); + module_put(THIS_MODULE); + return 0; +} + +/** + * ubi_resize_volume - re-size volume. + * @desc: volume descriptor + * @reserved_pebs: new size in physical eraseblocks + * + * This function returns zero in case of success, and a negative error code in + * case of failure. + */ +int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) +{ + int i, err, pebs, *new_mapping; + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + struct ubi_vtbl_record vtbl_rec; + int vol_id = vol->vol_id; + + if (ubi->ro_mode) + return -EROFS; + + dbg_msg("re-size volume %d to from %d to %d PEBs", + vol_id, vol->reserved_pebs, reserved_pebs); + ubi_assert(desc->mode == UBI_EXCLUSIVE); + ubi_assert(vol == ubi->volumes[vol_id]); + + if (vol->vol_type == UBI_STATIC_VOLUME && + reserved_pebs < vol->used_ebs) { + dbg_err("too small size %d, %d LEBs contain data", + reserved_pebs, vol->used_ebs); + return -EINVAL; + } + + /* If the size is the same, we have nothing to do */ + if (reserved_pebs == vol->reserved_pebs) + return 0; + + new_mapping = kmalloc(reserved_pebs * sizeof(int), GFP_KERNEL); + if (!new_mapping) + return -ENOMEM; + + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = UBI_LEB_UNMAPPED; + + /* Reserve physical eraseblocks */ + pebs = reserved_pebs - vol->reserved_pebs; + if (pebs > 0) { + spin_lock(&ubi->volumes_lock); + if (pebs > ubi->avail_pebs) { + dbg_err("not enough PEBs: requested %d, available %d", + pebs, ubi->avail_pebs); + spin_unlock(&ubi->volumes_lock); + err = -ENOSPC; + goto out_free; + } + ubi->avail_pebs -= pebs; + ubi->rsvd_pebs += pebs; + for (i = 0; i < vol->reserved_pebs; i++) + new_mapping[i] = vol->eba_tbl[i]; + kfree(vol->eba_tbl); + vol->eba_tbl = new_mapping; + spin_unlock(&ubi->volumes_lock); + } + + /* Change volume table record */ + memcpy(&vtbl_rec, &ubi->vtbl[vol_id], sizeof(struct ubi_vtbl_record)); + vtbl_rec.reserved_pebs = cpu_to_ubi32(reserved_pebs); + err = ubi_change_vtbl_record(ubi, vol_id, &vtbl_rec); + if (err) + goto out_acc; + + if (pebs < 0) { + for (i = 0; i < -pebs; i++) { + err = ubi_eba_unmap_leb(ubi, vol_id, reserved_pebs + i); + if (err) + goto out_acc; + } + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs += pebs; + ubi->avail_pebs -= pebs; + pebs = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs; + if (pebs > 0) { + pebs = ubi->avail_pebs >= pebs ? pebs : ubi->avail_pebs; + ubi->avail_pebs -= pebs; + ubi->rsvd_pebs += pebs; + ubi->beb_rsvd_pebs += pebs; + if (pebs > 0) + ubi_msg("reserve more %d PEBs", pebs); + } + for (i = 0; i < reserved_pebs; i++) + new_mapping[i] = vol->eba_tbl[i]; + kfree(vol->eba_tbl); + vol->eba_tbl = new_mapping; + spin_unlock(&ubi->volumes_lock); + } + + vol->reserved_pebs = reserved_pebs; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = vol->used_ebs * vol->usable_leb_size; + } + + paranoid_check_volumes(ubi); + return 0; + +out_acc: + if (pebs > 0) { + spin_lock(&ubi->volumes_lock); + ubi->rsvd_pebs -= pebs; + ubi->avail_pebs += pebs; + spin_unlock(&ubi->volumes_lock); + } +out_free: + kfree(new_mapping); + return err; +} + +/** + * ubi_add_volume - add volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * This function adds an existin volume and initializes all its data + * structures. Returnes zero in case of success and a negative error code in + * case of failure. + */ +int ubi_add_volume(struct ubi_device *ubi, int vol_id) +{ + int err; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + dbg_msg("add volume %d", vol_id); + ubi_dbg_dump_vol_info(vol); + ubi_assert(vol); + + /* Register character device for the volume */ + cdev_init(&vol->cdev, &ubi_vol_cdev_operations); + vol->cdev.owner = THIS_MODULE; + err = cdev_add(&vol->cdev, MKDEV(ubi->major, vol->vol_id + 1), 1); + if (err) { + ubi_err("cannot add character device for volume %d", vol_id); + return err; + } + + err = ubi_create_gluebi(ubi, vol); + if (err) + goto out_cdev; + + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = MKDEV(ubi->major, vol->vol_id + 1); + vol->dev.class = ubi_class; + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) + goto out_gluebi; + + err = volume_sysfs_init(ubi, vol); + if (err) { + cdev_del(&vol->cdev); + err = ubi_destroy_gluebi(vol); + volume_sysfs_close(vol); + return err; + } + + paranoid_check_volumes(ubi); + return 0; + +out_gluebi: + err = ubi_destroy_gluebi(vol); +out_cdev: + cdev_del(&vol->cdev); + return err; +} + +/** + * ubi_free_volume - free volume. + * @ubi: UBI device description object + * @vol_id: volume ID + * + * This function frees all resources for volume @vol_id but does not remove it. + * Used only when the UBI device is detached. + */ +void ubi_free_volume(struct ubi_device *ubi, int vol_id) +{ + int err; + struct ubi_volume *vol = ubi->volumes[vol_id]; + + dbg_msg("free volume %d", vol_id); + ubi_assert(vol); + + vol->removed = 1; + err = ubi_destroy_gluebi(vol); + ubi->volumes[vol_id] = NULL; + cdev_del(&vol->cdev); + volume_sysfs_close(vol); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_volume - check volume information. + * @ubi: UBI device description object + * @vol_id: volume ID + */ +static void paranoid_check_volume(const struct ubi_device *ubi, int vol_id) +{ + int idx = vol_id2idx(ubi, vol_id); + int reserved_pebs, alignment, data_pad, vol_type, name_len, upd_marker; + const struct ubi_volume *vol = ubi->volumes[idx]; + long long n; + const char *name; + + reserved_pebs = ubi32_to_cpu(ubi->vtbl[vol_id].reserved_pebs); + + if (!vol) { + if (reserved_pebs) { + ubi_err("no volume info, but volume exists"); + goto fail; + } + return; + } + + if (vol->reserved_pebs < 0 || vol->alignment < 0 || vol->data_pad < 0 || + vol->name_len < 0) { + ubi_err("negative values"); + goto fail; + } + if (vol->alignment > ubi->leb_size || vol->alignment == 0) { + ubi_err("bad alignment"); + goto fail; + } + + n = vol->alignment % ubi->min_io_size; + if (vol->alignment != 1 && n) { + ubi_err("alignment is not multiple of min I/O unit"); + goto fail; + } + + n = ubi->leb_size % vol->alignment; + if (vol->data_pad != n) { + ubi_err("bad data_pad, has to be %lld", n); + goto fail; + } + + if (vol->vol_type != UBI_DYNAMIC_VOLUME && + vol->vol_type != UBI_STATIC_VOLUME) { + ubi_err("bad vol_type"); + goto fail; + } + + if (vol->upd_marker != 0 && vol->upd_marker != 1) { + ubi_err("bad upd_marker"); + goto fail; + } + + if (vol->upd_marker && vol->corrupted) { + dbg_err("update marker and corrupted simultaneously"); + goto fail; + } + + if (vol->reserved_pebs > ubi->good_peb_count) { + ubi_err("too large reserved_pebs"); + goto fail; + } + + n = ubi->leb_size - vol->data_pad; + if (vol->usable_leb_size != ubi->leb_size - vol->data_pad) { + ubi_err("bad usable_leb_size, has to be %lld", n); + goto fail; + } + + if (vol->name_len > UBI_VOL_NAME_MAX) { + ubi_err("too long volume name, max is %d", UBI_VOL_NAME_MAX); + goto fail; + } + + if (!vol->name) { + ubi_err("NULL volume name"); + goto fail; + } + + n = strnlen(vol->name, vol->name_len + 1); + if (n != vol->name_len) { + ubi_err("bad name_len %lld", n); + goto fail; + } + + n = vol->used_ebs * vol->usable_leb_size; + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + if (vol->corrupted != 0) { + ubi_err("corrupted dynamic volume"); + goto fail; + } + if (vol->used_ebs != vol->reserved_pebs) { + ubi_err("bad used_ebs"); + goto fail; + } + if (vol->last_eb_bytes != vol->usable_leb_size) { + ubi_err("bad last_eb_bytes"); + goto fail; + } + if (vol->used_bytes != n) { + ubi_err("bad used_bytes"); + goto fail; + } + } else { + if (vol->corrupted != 0 && vol->corrupted != 1) { + ubi_err("bad corrupted"); + goto fail; + } + if (vol->used_ebs < 0 || vol->used_ebs > vol->reserved_pebs) { + ubi_err("bad used_ebs"); + goto fail; + } + if (vol->last_eb_bytes < 0 || + vol->last_eb_bytes > vol->usable_leb_size) { + ubi_err("bad last_eb_bytes"); + goto fail; + } + if (vol->used_bytes < 0 || vol->used_bytes > n || + vol->used_bytes < n - vol->usable_leb_size) { + ubi_err("bad used_bytes"); + goto fail; + } + } + + alignment = ubi32_to_cpu(ubi->vtbl[vol_id].alignment); + data_pad = ubi32_to_cpu(ubi->vtbl[vol_id].data_pad); + name_len = ubi16_to_cpu(ubi->vtbl[vol_id].name_len); + upd_marker = ubi->vtbl[vol_id].upd_marker; + name = &ubi->vtbl[vol_id].name[0]; + if (ubi->vtbl[vol_id].vol_type == UBI_VID_DYNAMIC) + vol_type = UBI_DYNAMIC_VOLUME; + else + vol_type = UBI_STATIC_VOLUME; + + if (alignment != vol->alignment || data_pad != vol->data_pad || + upd_marker != vol->upd_marker || vol_type != vol->vol_type || + name_len!= vol->name_len || strncmp(name, vol->name, name_len)) { + ubi_err("volume info is different"); + goto fail; + } + + return; + +fail: + ubi_err("paranoid check failed"); + ubi_dbg_dump_vol_info(vol); + ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); + BUG(); +} + +/** + * paranoid_check_volumes - check information about all volumes. + * @ubi: UBI device description object + */ +static void paranoid_check_volumes(struct ubi_device *ubi) +{ + int i; + + mutex_lock(&ubi->vtbl_mutex); + spin_lock(&ubi->volumes_lock); + for (i = 0; i < ubi->vtbl_slots; i++) + paranoid_check_volume(ubi, i); + spin_unlock(&ubi->volumes_lock); + mutex_unlock(&ubi->vtbl_mutex); +} +#endif diff --git a/drivers/mtd/ubi/vtbl.c b/drivers/mtd/ubi/vtbl.c new file mode 100644 index 00000000000..b6fd6bbd941 --- /dev/null +++ b/drivers/mtd/ubi/vtbl.c @@ -0,0 +1,809 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * Copyright (c) Nokia Corporation, 2006, 2007 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Author: Artem Bityutskiy (Битюцкий Артём) + */ + +/* + * This file includes volume table manipulation code. The volume table is an + * on-flash table containing volume meta-data like name, number of reserved + * physical eraseblocks, type, etc. The volume table is stored in the so-called + * "layout volume". + * + * The layout volume is an internal volume which is organized as follows. It + * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical + * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each + * other. This redundancy guarantees robustness to unclean reboots. The volume + * table is basically an array of volume table records. Each record contains + * full information about the volume and protected by a CRC checksum. + * + * The volume table is changed, it is first changed in RAM. Then LEB 0 is + * erased, and the updated volume table is written back to LEB 0. Then same for + * LEB 1. This scheme guarantees recoverability from unclean reboots. + * + * In this UBI implementation the on-flash volume table does not contain any + * information about how many data static volumes contain. This information may + * be found from the scanning data. + * + * But it would still be beneficial to store this information in the volume + * table. For example, suppose we have a static volume X, and all its physical + * eraseblocks became bad for some reasons. Suppose we are attaching the + * corresponding MTD device, the scanning has found no logical eraseblocks + * corresponding to the volume X. According to the volume table volume X does + * exist. So we don't know whether it is just empty or all its physical + * eraseblocks went bad. So we cannot alarm the user about this corruption. + * + * The volume table also stores so-called "update marker", which is used for + * volume updates. Before updating the volume, the update marker is set, and + * after the update operation is finished, the update marker is cleared. So if + * the update operation was interrupted (e.g. by an unclean reboot) - the + * update marker is still there and we know that the volume's contents is + * damaged. + */ + +#include <linux/crc32.h> +#include <linux/err.h> +#include <asm/div64.h> +#include "ubi.h" + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static void paranoid_vtbl_check(const struct ubi_device *ubi); +#else +#define paranoid_vtbl_check(ubi) +#endif + +/* Empty volume table record */ +static struct ubi_vtbl_record empty_vtbl_record; + +/** + * ubi_change_vtbl_record - change volume table record. + * @ubi: UBI device description object + * @idx: table index to change + * @vtbl_rec: new volume table record + * + * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty + * volume table record is written. The caller does not have to calculate CRC of + * the record as it is done by this function. Returns zero in case of success + * and a negative error code in case of failure. + */ +int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, + struct ubi_vtbl_record *vtbl_rec) +{ + int i, err; + uint32_t crc; + + ubi_assert(idx >= 0 && idx < ubi->vtbl_slots); + + if (!vtbl_rec) + vtbl_rec = &empty_vtbl_record; + else { + crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC); + vtbl_rec->crc = cpu_to_ubi32(crc); + } + + dbg_msg("change record %d", idx); + ubi_dbg_dump_vtbl_record(vtbl_rec, idx); + + mutex_lock(&ubi->vtbl_mutex); + memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record)); + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + err = ubi_eba_unmap_leb(ubi, UBI_LAYOUT_VOL_ID, i); + if (err) { + mutex_unlock(&ubi->vtbl_mutex); + return err; + } + err = ubi_eba_write_leb(ubi, UBI_LAYOUT_VOL_ID, i, ubi->vtbl, 0, + ubi->vtbl_size, UBI_LONGTERM); + if (err) { + mutex_unlock(&ubi->vtbl_mutex); + return err; + } + } + + paranoid_vtbl_check(ubi); + mutex_unlock(&ubi->vtbl_mutex); + return ubi_wl_flush(ubi); +} + +/** + * vol_til_check - check if volume table is not corrupted and contains sensible + * data. + * + * @ubi: UBI device description object + * @vtbl: volume table + * + * This function returns zero if @vtbl is all right, %1 if CRC is incorrect, + * and %-EINVAL if it contains inconsistent data. + */ +static int vtbl_check(const struct ubi_device *ubi, + const struct ubi_vtbl_record *vtbl) +{ + int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len; + int upd_marker; + uint32_t crc; + const char *name; + + for (i = 0; i < ubi->vtbl_slots; i++) { + cond_resched(); + + reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs); + alignment = ubi32_to_cpu(vtbl[i].alignment); + data_pad = ubi32_to_cpu(vtbl[i].data_pad); + upd_marker = vtbl[i].upd_marker; + vol_type = vtbl[i].vol_type; + name_len = ubi16_to_cpu(vtbl[i].name_len); + name = &vtbl[i].name[0]; + + crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC); + if (ubi32_to_cpu(vtbl[i].crc) != crc) { + ubi_err("bad CRC at record %u: %#08x, not %#08x", + i, crc, ubi32_to_cpu(vtbl[i].crc)); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + return 1; + } + + if (reserved_pebs == 0) { + if (memcmp(&vtbl[i], &empty_vtbl_record, + UBI_VTBL_RECORD_SIZE)) { + dbg_err("bad empty record"); + goto bad; + } + continue; + } + + if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 || + name_len < 0) { + dbg_err("negative values"); + goto bad; + } + + if (alignment > ubi->leb_size || alignment == 0) { + dbg_err("bad alignment"); + goto bad; + } + + n = alignment % ubi->min_io_size; + if (alignment != 1 && n) { + dbg_err("alignment is not multiple of min I/O unit"); + goto bad; + } + + n = ubi->leb_size % alignment; + if (data_pad != n) { + dbg_err("bad data_pad, has to be %d", n); + goto bad; + } + + if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) { + dbg_err("bad vol_type"); + goto bad; + } + + if (upd_marker != 0 && upd_marker != 1) { + dbg_err("bad upd_marker"); + goto bad; + } + + if (reserved_pebs > ubi->good_peb_count) { + dbg_err("too large reserved_pebs, good PEBs %d", + ubi->good_peb_count); + goto bad; + } + + if (name_len > UBI_VOL_NAME_MAX) { + dbg_err("too long volume name, max %d", + UBI_VOL_NAME_MAX); + goto bad; + } + + if (name[0] == '\0') { + dbg_err("NULL volume name"); + goto bad; + } + + if (name_len != strnlen(name, name_len + 1)) { + dbg_err("bad name_len"); + goto bad; + } + } + + /* Checks that all names are unique */ + for (i = 0; i < ubi->vtbl_slots - 1; i++) { + for (n = i + 1; n < ubi->vtbl_slots; n++) { + int len1 = ubi16_to_cpu(vtbl[i].name_len); + int len2 = ubi16_to_cpu(vtbl[n].name_len); + + if (len1 > 0 && len1 == len2 && + !strncmp(vtbl[i].name, vtbl[n].name, len1)) { + ubi_err("volumes %d and %d have the same name" + " \"%s\"", i, n, vtbl[i].name); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + ubi_dbg_dump_vtbl_record(&vtbl[n], n); + return -EINVAL; + } + } + } + + return 0; + +bad: + ubi_err("volume table check failed, record %d", i); + ubi_dbg_dump_vtbl_record(&vtbl[i], i); + return -EINVAL; +} + +/** + * create_vtbl - create a copy of volume table. + * @ubi: UBI device description object + * @si: scanning information + * @copy: number of the volume table copy + * @vtbl: contents of the volume table + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int create_vtbl(const struct ubi_device *ubi, struct ubi_scan_info *si, + int copy, void *vtbl) +{ + int err, tries = 0; + static struct ubi_vid_hdr *vid_hdr; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *new_seb, *old_seb = NULL; + + ubi_msg("create volume table (copy #%d)", copy + 1); + + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) + return -ENOMEM; + + /* + * Check if there is a logical eraseblock which would have to contain + * this volume table copy was found during scanning. It has to be wiped + * out. + */ + sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID); + if (sv) + old_seb = ubi_scan_find_seb(sv, copy); + +retry: + new_seb = ubi_scan_get_free_peb(ubi, si); + if (IS_ERR(new_seb)) { + err = PTR_ERR(new_seb); + goto out_free; + } + + vid_hdr->vol_type = UBI_VID_DYNAMIC; + vid_hdr->vol_id = cpu_to_ubi32(UBI_LAYOUT_VOL_ID); + vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT; + vid_hdr->data_size = vid_hdr->used_ebs = + vid_hdr->data_pad = cpu_to_ubi32(0); + vid_hdr->lnum = cpu_to_ubi32(copy); + vid_hdr->sqnum = cpu_to_ubi64(++si->max_sqnum); + vid_hdr->leb_ver = cpu_to_ubi32(old_seb ? old_seb->leb_ver + 1: 0); + + /* The EC header is already there, write the VID header */ + err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr); + if (err) + goto write_error; + + /* Write the layout volume contents */ + err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size); + if (err) + goto write_error; + + /* + * And add it to the scanning information. Don't delete the old + * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'. + */ + err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec, + vid_hdr, 0); + kfree(new_seb); + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +write_error: + kfree(new_seb); + /* May be this physical eraseblock went bad, try to pick another one */ + if (++tries <= 5) { + err = ubi_scan_add_to_list(si, new_seb->pnum, new_seb->ec, + &si->corr); + if (!err) + goto retry; + } +out_free: + ubi_free_vid_hdr(ubi, vid_hdr); + return err; + +} + +/** + * process_lvol - process the layout volume. + * @ubi: UBI device description object + * @si: scanning information + * @sv: layout volume scanning information + * + * This function is responsible for reading the layout volume, ensuring it is + * not corrupted, and recovering from corruptions if needed. Returns volume + * table in case of success and a negative error code in case of failure. + */ +static struct ubi_vtbl_record *process_lvol(const struct ubi_device *ubi, + struct ubi_scan_info *si, + struct ubi_scan_volume *sv) +{ + int err; + struct rb_node *rb; + struct ubi_scan_leb *seb; + struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL }; + int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1}; + + /* + * UBI goes through the following steps when it changes the layout + * volume: + * a. erase LEB 0; + * b. write new data to LEB 0; + * c. erase LEB 1; + * d. write new data to LEB 1. + * + * Before the change, both LEBs contain the same data. + * + * Due to unclean reboots, the contents of LEB 0 may be lost, but there + * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not. + * Similarly, LEB 1 may be lost, but there should be LEB 0. And + * finally, unclean reboots may result in a situation when neither LEB + * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB + * 0 contains more recent information. + * + * So the plan is to first check LEB 0. Then + * a. if LEB 0 is OK, it must be containing the most resent data; then + * we compare it with LEB 1, and if they are different, we copy LEB + * 0 to LEB 1; + * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 + * to LEB 0. + */ + + dbg_msg("check layout volume"); + + /* Read both LEB 0 and LEB 1 into memory */ + ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) { + leb[seb->lnum] = kzalloc(ubi->vtbl_size, GFP_KERNEL); + if (!leb[seb->lnum]) { + err = -ENOMEM; + goto out_free; + } + + err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0, + ubi->vtbl_size); + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) + /* Scrub the PEB later */ + seb->scrub = 1; + else if (err) + goto out_free; + } + + err = -EINVAL; + if (leb[0]) { + leb_corrupted[0] = vtbl_check(ubi, leb[0]); + if (leb_corrupted[0] < 0) + goto out_free; + } + + if (!leb_corrupted[0]) { + /* LEB 0 is OK */ + if (leb[1]) + leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size); + if (leb_corrupted[1]) { + ubi_warn("volume table copy #2 is corrupted"); + err = create_vtbl(ubi, si, 1, leb[0]); + if (err) + goto out_free; + ubi_msg("volume table was restored"); + } + + /* Both LEB 1 and LEB 2 are OK and consistent */ + kfree(leb[1]); + return leb[0]; + } else { + /* LEB 0 is corrupted or does not exist */ + if (leb[1]) { + leb_corrupted[1] = vtbl_check(ubi, leb[1]); + if (leb_corrupted[1] < 0) + goto out_free; + } + if (leb_corrupted[1]) { + /* Both LEB 0 and LEB 1 are corrupted */ + ubi_err("both volume tables are corrupted"); + goto out_free; + } + + ubi_warn("volume table copy #1 is corrupted"); + err = create_vtbl(ubi, si, 0, leb[1]); + if (err) + goto out_free; + ubi_msg("volume table was restored"); + + kfree(leb[0]); + return leb[1]; + } + +out_free: + kfree(leb[0]); + kfree(leb[1]); + return ERR_PTR(err); +} + +/** + * create_empty_lvol - create empty layout volume. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns volume table contents in case of success and a + * negative error code in case of failure. + */ +static struct ubi_vtbl_record *create_empty_lvol(const struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int i; + struct ubi_vtbl_record *vtbl; + + vtbl = kzalloc(ubi->vtbl_size, GFP_KERNEL); + if (!vtbl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < ubi->vtbl_slots; i++) + memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE); + + for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) { + int err; + + err = create_vtbl(ubi, si, i, vtbl); + if (err) { + kfree(vtbl); + return ERR_PTR(err); + } + } + + return vtbl; +} + +/** + * init_volumes - initialize volume information for existing volumes. + * @ubi: UBI device description object + * @si: scanning information + * @vtbl: volume table + * + * This function allocates volume description objects for existing volumes. + * Returns zero in case of success and a negative error code in case of + * failure. + */ +static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si, + const struct ubi_vtbl_record *vtbl) +{ + int i, reserved_pebs = 0; + struct ubi_scan_volume *sv; + struct ubi_volume *vol; + + for (i = 0; i < ubi->vtbl_slots; i++) { + cond_resched(); + + if (ubi32_to_cpu(vtbl[i].reserved_pebs) == 0) + continue; /* Empty record */ + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + vol->reserved_pebs = ubi32_to_cpu(vtbl[i].reserved_pebs); + vol->alignment = ubi32_to_cpu(vtbl[i].alignment); + vol->data_pad = ubi32_to_cpu(vtbl[i].data_pad); + vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + vol->name_len = ubi16_to_cpu(vtbl[i].name_len); + vol->usable_leb_size = ubi->leb_size - vol->data_pad; + memcpy(vol->name, vtbl[i].name, vol->name_len); + vol->name[vol->name_len] = '\0'; + vol->vol_id = i; + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[i] = vol; + ubi->vol_count += 1; + vol->ubi = ubi; + reserved_pebs += vol->reserved_pebs; + + /* + * In case of dynamic volume UBI knows nothing about how many + * data is stored there. So assume the whole volume is used. + */ + if (vol->vol_type == UBI_DYNAMIC_VOLUME) { + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->usable_leb_size; + vol->used_bytes = vol->used_ebs * vol->usable_leb_size; + continue; + } + + /* Static volumes only */ + sv = ubi_scan_find_sv(si, i); + if (!sv) { + /* + * No eraseblocks belonging to this volume found. We + * don't actually know whether this static volume is + * completely corrupted or just contains no data. And + * we cannot know this as long as data size is not + * stored on flash. So we just assume the volume is + * empty. FIXME: this should be handled. + */ + continue; + } + + if (sv->leb_count != sv->used_ebs) { + /* + * We found a static volume which misses several + * eraseblocks. Treat it as corrupted. + */ + ubi_warn("static volume %d misses %d LEBs - corrupted", + sv->vol_id, sv->used_ebs - sv->leb_count); + vol->corrupted = 1; + continue; + } + + vol->used_ebs = sv->used_ebs; + vol->used_bytes = (vol->used_ebs - 1) * vol->usable_leb_size; + vol->used_bytes += sv->last_data_size; + vol->last_eb_bytes = sv->last_data_size; + } + + vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL); + if (!vol) + return -ENOMEM; + + vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS; + vol->alignment = 1; + vol->vol_type = UBI_DYNAMIC_VOLUME; + vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1; + memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1); + vol->usable_leb_size = ubi->leb_size; + vol->used_ebs = vol->reserved_pebs; + vol->last_eb_bytes = vol->reserved_pebs; + vol->used_bytes = vol->used_ebs * (ubi->leb_size - vol->data_pad); + vol->vol_id = UBI_LAYOUT_VOL_ID; + + ubi_assert(!ubi->volumes[i]); + ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol; + reserved_pebs += vol->reserved_pebs; + ubi->vol_count += 1; + vol->ubi = ubi; + + if (reserved_pebs > ubi->avail_pebs) + ubi_err("not enough PEBs, required %d, available %d", + reserved_pebs, ubi->avail_pebs); + ubi->rsvd_pebs += reserved_pebs; + ubi->avail_pebs -= reserved_pebs; + + return 0; +} + +/** + * check_sv - check volume scanning information. + * @vol: UBI volume description object + * @sv: volume scanning information + * + * This function returns zero if the volume scanning information is consistent + * to the data read from the volume tabla, and %-EINVAL if not. + */ +static int check_sv(const struct ubi_volume *vol, + const struct ubi_scan_volume *sv) +{ + if (sv->highest_lnum >= vol->reserved_pebs) { + dbg_err("bad highest_lnum"); + goto bad; + } + if (sv->leb_count > vol->reserved_pebs) { + dbg_err("bad leb_count"); + goto bad; + } + if (sv->vol_type != vol->vol_type) { + dbg_err("bad vol_type"); + goto bad; + } + if (sv->used_ebs > vol->reserved_pebs) { + dbg_err("bad used_ebs"); + goto bad; + } + if (sv->data_pad != vol->data_pad) { + dbg_err("bad data_pad"); + goto bad; + } + return 0; + +bad: + ubi_err("bad scanning information"); + ubi_dbg_dump_sv(sv); + ubi_dbg_dump_vol_info(vol); + return -EINVAL; +} + +/** + * check_scanning_info - check that scanning information. + * @ubi: UBI device description object + * @si: scanning information + * + * Even though we protect on-flash data by CRC checksums, we still don't trust + * the media. This function ensures that scanning information is consistent to + * the information read from the volume table. Returns zero if the scanning + * information is OK and %-EINVAL if it is not. + */ +static int check_scanning_info(const struct ubi_device *ubi, + struct ubi_scan_info *si) +{ + int err, i; + struct ubi_scan_volume *sv; + struct ubi_volume *vol; + + if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) { + ubi_err("scanning found %d volumes, maximum is %d + %d", + si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots); + return -EINVAL; + } + + if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&& + si->highest_vol_id < UBI_INTERNAL_VOL_START) { + ubi_err("too large volume ID %d found by scanning", + si->highest_vol_id); + return -EINVAL; + } + + + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) { + cond_resched(); + + sv = ubi_scan_find_sv(si, i); + vol = ubi->volumes[i]; + if (!vol) { + if (sv) + ubi_scan_rm_volume(si, sv); + continue; + } + + if (vol->reserved_pebs == 0) { + ubi_assert(i < ubi->vtbl_slots); + + if (!sv) + continue; + + /* + * During scanning we found a volume which does not + * exist according to the information in the volume + * table. This must have happened due to an unclean + * reboot while the volume was being removed. Discard + * these eraseblocks. + */ + ubi_msg("finish volume %d removal", sv->vol_id); + ubi_scan_rm_volume(si, sv); + } else if (sv) { + err = check_sv(vol, sv); + if (err) + return err; + } + } + + return 0; +} + +/** + * ubi_read_volume_table - read volume table. + * information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function reads volume table, checks it, recover from errors if needed, + * or creates it if needed. Returns zero in case of success and a negative + * error code in case of failure. + */ +int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int i, err; + struct ubi_scan_volume *sv; + + empty_vtbl_record.crc = cpu_to_ubi32(0xf116c36b); + + /* + * The number of supported volumes is limited by the eraseblock size + * and by the UBI_MAX_VOLUMES constant. + */ + ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE; + if (ubi->vtbl_slots > UBI_MAX_VOLUMES) + ubi->vtbl_slots = UBI_MAX_VOLUMES; + + ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE; + ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size); + + sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOL_ID); + if (!sv) { + /* + * No logical eraseblocks belonging to the layout volume were + * found. This could mean that the flash is just empty. In + * this case we create empty layout volume. + * + * But if flash is not empty this must be a corruption or the + * MTD device just contains garbage. + */ + if (si->is_empty) { + ubi->vtbl = create_empty_lvol(ubi, si); + if (IS_ERR(ubi->vtbl)) + return PTR_ERR(ubi->vtbl); + } else { + ubi_err("the layout volume was not found"); + return -EINVAL; + } + } else { + if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) { + /* This must not happen with proper UBI images */ + dbg_err("too many LEBs (%d) in layout volume", + sv->leb_count); + return -EINVAL; + } + + ubi->vtbl = process_lvol(ubi, si, sv); + if (IS_ERR(ubi->vtbl)) + return PTR_ERR(ubi->vtbl); + } + + ubi->avail_pebs = ubi->good_peb_count; + + /* + * The layout volume is OK, initialize the corresponding in-RAM data + * structures. + */ + err = init_volumes(ubi, si, ubi->vtbl); + if (err) + goto out_free; + + /* + * Get sure that the scanning information is consistent to the + * information stored in the volume table. + */ + err = check_scanning_info(ubi, si); + if (err) + goto out_free; + + return 0; + +out_free: + kfree(ubi->vtbl); + for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) + if (ubi->volumes[i]) { + kfree(ubi->volumes[i]); + ubi->volumes[i] = NULL; + } + return err; +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_vtbl_check - check volume table. + * @ubi: UBI device description object + */ +static void paranoid_vtbl_check(const struct ubi_device *ubi) +{ + if (vtbl_check(ubi, ubi->vtbl)) { + ubi_err("paranoid check failed"); + BUG(); + } +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c new file mode 100644 index 00000000000..9ecaf77eca9 --- /dev/null +++ b/drivers/mtd/ubi/wl.c @@ -0,0 +1,1671 @@ +/* + * Copyright (c) International Business Machines Corp., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: Artem Bityutskiy (Битюцкий Артём), Thomas Gleixner + */ + +/* + * UBI wear-leveling unit. + * + * This unit is responsible for wear-leveling. It works in terms of physical + * eraseblocks and erase counters and knows nothing about logical eraseblocks, + * volumes, etc. From this unit's perspective all physical eraseblocks are of + * two types - used and free. Used physical eraseblocks are those that were + * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are + * those that were put by the 'ubi_wl_put_peb()' function. + * + * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter + * header. The rest of the physical eraseblock contains only 0xFF bytes. + * + * When physical eraseblocks are returned to the WL unit by means of the + * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is + * done asynchronously in context of the per-UBI device background thread, + * which is also managed by the WL unit. + * + * The wear-leveling is ensured by means of moving the contents of used + * physical eraseblocks with low erase counter to free physical eraseblocks + * with high erase counter. + * + * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick + * an "optimal" physical eraseblock. For example, when it is known that the + * physical eraseblock will be "put" soon because it contains short-term data, + * the WL unit may pick a free physical eraseblock with low erase counter, and + * so forth. + * + * If the WL unit fails to erase a physical eraseblock, it marks it as bad. + * + * This unit is also responsible for scrubbing. If a bit-flip is detected in a + * physical eraseblock, it has to be moved. Technically this is the same as + * moving it for wear-leveling reasons. + * + * As it was said, for the UBI unit all physical eraseblocks are either "free" + * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used + * eraseblocks are kept in a set of different RB-trees: @wl->used, + * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. + * + * Note, in this implementation, we keep a small in-RAM object for each physical + * eraseblock. This is surely not a scalable solution. But it appears to be good + * enough for moderately large flashes and it is simple. In future, one may + * re-work this unit and make it more scalable. + * + * At the moment this unit does not utilize the sequence number, which was + * introduced relatively recently. But it would be wise to do this because the + * sequence number of a logical eraseblock characterizes how old is it. For + * example, when we move a PEB with low erase counter, and we need to pick the + * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we + * pick target PEB with an average EC if our PEB is not very "old". This is a + * room for future re-works of the WL unit. + * + * FIXME: looks too complex, should be simplified (later). + */ + +#include <linux/slab.h> +#include <linux/crc32.h> +#include <linux/freezer.h> +#include <linux/kthread.h> +#include "ubi.h" + +/* Number of physical eraseblocks reserved for wear-leveling purposes */ +#define WL_RESERVED_PEBS 1 + +/* + * How many erase cycles are short term, unknown, and long term physical + * eraseblocks protected. + */ +#define ST_PROTECTION 16 +#define U_PROTECTION 10 +#define LT_PROTECTION 4 + +/* + * Maximum difference between two erase counters. If this threshold is + * exceeded, the WL unit starts moving data from used physical eraseblocks with + * low erase counter to free physical eraseblocks with high erase counter. + */ +#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD + +/* + * When a physical eraseblock is moved, the WL unit has to pick the target + * physical eraseblock to move to. The simplest way would be just to pick the + * one with the highest erase counter. But in certain workloads this could lead + * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a + * situation when the picked physical eraseblock is constantly erased after the + * data is written to it. So, we have a constant which limits the highest erase + * counter of the free physical eraseblock to pick. Namely, the WL unit does + * not pick eraseblocks with erase counter greater then the lowest erase + * counter plus %WL_FREE_MAX_DIFF. + */ +#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) + +/* + * Maximum number of consecutive background thread failures which is enough to + * switch to read-only mode. + */ +#define WL_MAX_FAILURES 32 + +/** + * struct ubi_wl_entry - wear-leveling entry. + * @rb: link in the corresponding RB-tree + * @ec: erase counter + * @pnum: physical eraseblock number + * + * Each physical eraseblock has a corresponding &struct wl_entry object which + * may be kept in different RB-trees. + */ +struct ubi_wl_entry { + struct rb_node rb; + int ec; + int pnum; +}; + +/** + * struct ubi_wl_prot_entry - PEB protection entry. + * @rb_pnum: link in the @wl->prot.pnum RB-tree + * @rb_aec: link in the @wl->prot.aec RB-tree + * @abs_ec: the absolute erase counter value when the protection ends + * @e: the wear-leveling entry of the physical eraseblock under protection + * + * When the WL unit returns a physical eraseblock, the physical eraseblock is + * protected from being moved for some "time". For this reason, the physical + * eraseblock is not directly moved from the @wl->free tree to the @wl->used + * tree. There is one more tree in between where this physical eraseblock is + * temporarily stored (@wl->prot). + * + * All this protection stuff is needed because: + * o we don't want to move physical eraseblocks just after we have given them + * to the user; instead, we first want to let users fill them up with data; + * + * o there is a chance that the user will put the physical eraseblock very + * soon, so it makes sense not to move it for some time, but wait; this is + * especially important in case of "short term" physical eraseblocks. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * absolute erase counter (@wl->abs_ec). When it reaches certain value, the + * physical eraseblocks are moved from the protection trees (@wl->prot.*) to + * the @wl->used tree. + * + * Protected physical eraseblocks are searched by physical eraseblock number + * (when they are put) and by the absolute erase counter (to check if it is + * time to move them to the @wl->used tree). So there are actually 2 RB-trees + * storing the protected physical eraseblocks: @wl->prot.pnum and + * @wl->prot.aec. They are referred to as the "protection" trees. The + * first one is indexed by the physical eraseblock number. The second one is + * indexed by the absolute erase counter. Both trees store + * &struct ubi_wl_prot_entry objects. + * + * Each physical eraseblock has 2 main states: free and used. The former state + * corresponds to the @wl->free tree. The latter state is split up on several + * sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is temporarily prohibited (@wl->prot.pnum and + * @wl->prot.aec trees); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those trees. + */ +struct ubi_wl_prot_entry { + struct rb_node rb_pnum; + struct rb_node rb_aec; + unsigned long long abs_ec; + struct ubi_wl_entry *e; +}; + +/** + * struct ubi_work - UBI work description data structure. + * @list: a link in the list of pending works + * @func: worker function + * @priv: private data of the worker function + * + * @e: physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * The @func pointer points to the worker function. If the @cancel argument is + * not zero, the worker has to free the resources and exit immediately. The + * worker has to return zero in case of success and a negative error code in + * case of failure. + */ +struct ubi_work { + struct list_head list; + int (*func)(struct ubi_device *ubi, struct ubi_work *wrk, int cancel); + /* The below fields are only relevant to erasure works */ + struct ubi_wl_entry *e; + int torture; +}; + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID +static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec); +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root); +#else +#define paranoid_check_ec(ubi, pnum, ec) 0 +#define paranoid_check_in_wl_tree(e, root) +#endif + +/* Slab cache for wear-leveling entries */ +static struct kmem_cache *wl_entries_slab; + +/** + * tree_empty - a helper function to check if an RB-tree is empty. + * @root: the root of the tree + * + * This function returns non-zero if the RB-tree is empty and zero if not. + */ +static inline int tree_empty(struct rb_root *root) +{ + return root->rb_node == NULL; +} + +/** + * wl_tree_add - add a wear-leveling entry to a WL RB-tree. + * @e: the wear-leveling entry to add + * @root: the root of the tree + * + * Note, we use (erase counter, physical eraseblock number) pairs as keys in + * the @ubi->used and @ubi->free RB-trees. + */ +static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root) +{ + struct rb_node **p, *parent = NULL; + + p = &root->rb_node; + while (*p) { + struct ubi_wl_entry *e1; + + parent = *p; + e1 = rb_entry(parent, struct ubi_wl_entry, rb); + + if (e->ec < e1->ec) + p = &(*p)->rb_left; + else if (e->ec > e1->ec) + p = &(*p)->rb_right; + else { + ubi_assert(e->pnum != e1->pnum); + if (e->pnum < e1->pnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + } + + rb_link_node(&e->rb, parent, p); + rb_insert_color(&e->rb, root); +} + + +/* + * Helper functions to add and delete wear-leveling entries from different + * trees. + */ + +static void free_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e) +{ + wl_tree_add(e, &ubi->free); +} +static inline void used_tree_add(struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + wl_tree_add(e, &ubi->used); +} +static inline void scrub_tree_add(struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + wl_tree_add(e, &ubi->scrub); +} +static inline void free_tree_del(struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + paranoid_check_in_wl_tree(e, &ubi->free); + rb_erase(&e->rb, &ubi->free); +} +static inline void used_tree_del(struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + paranoid_check_in_wl_tree(e, &ubi->used); + rb_erase(&e->rb, &ubi->used); +} +static inline void scrub_tree_del(struct ubi_device *ubi, + struct ubi_wl_entry *e) +{ + paranoid_check_in_wl_tree(e, &ubi->scrub); + rb_erase(&e->rb, &ubi->scrub); +} + +/** + * do_work - do one pending work. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int do_work(struct ubi_device *ubi) +{ + int err; + struct ubi_work *wrk; + + spin_lock(&ubi->wl_lock); + + if (list_empty(&ubi->works)) { + spin_unlock(&ubi->wl_lock); + return 0; + } + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); + spin_unlock(&ubi->wl_lock); + + /* + * Call the worker function. Do not touch the work structure + * after this call as it will have been freed or reused by that + * time by the worker function. + */ + err = wrk->func(ubi, wrk, 0); + if (err) + ubi_err("work failed with error code %d", err); + + spin_lock(&ubi->wl_lock); + ubi->works_count -= 1; + ubi_assert(ubi->works_count >= 0); + spin_unlock(&ubi->wl_lock); + return err; +} + +/** + * produce_free_peb - produce a free physical eraseblock. + * @ubi: UBI device description object + * + * This function tries to make a free PEB by means of synchronous execution of + * pending works. This may be needed if, for example the background thread is + * disabled. Returns zero in case of success and a negative error code in case + * of failure. + */ +static int produce_free_peb(struct ubi_device *ubi) +{ + int err; + + spin_lock(&ubi->wl_lock); + while (tree_empty(&ubi->free)) { + spin_unlock(&ubi->wl_lock); + + dbg_wl("do one work synchronously"); + err = do_work(ubi); + if (err) + return err; + + spin_lock(&ubi->wl_lock); + } + spin_unlock(&ubi->wl_lock); + + return 0; +} + +/** + * in_wl_tree - check if wear-leveling entry is present in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns non-zero if @e is in the @root RB-tree and zero if it + * is not. + */ +static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) +{ + struct rb_node *p; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + + e1 = rb_entry(p, struct ubi_wl_entry, rb); + + if (e->pnum == e1->pnum) { + ubi_assert(e == e1); + return 1; + } + + if (e->ec < e1->ec) + p = p->rb_left; + else if (e->ec > e1->ec) + p = p->rb_right; + else { + ubi_assert(e->pnum != e1->pnum); + if (e->pnum < e1->pnum) + p = p->rb_left; + else + p = p->rb_right; + } + } + + return 0; +} + +/** + * prot_tree_add - add physical eraseblock to protection trees. + * @ubi: UBI device description object + * @e: the physical eraseblock to add + * @pe: protection entry object to use + * @abs_ec: absolute erase counter value when this physical eraseblock has + * to be removed from the protection trees. + * + * @wl->lock has to be locked. + */ +static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, + struct ubi_wl_prot_entry *pe, int abs_ec) +{ + struct rb_node **p, *parent = NULL; + struct ubi_wl_prot_entry *pe1; + + pe->e = e; + pe->abs_ec = ubi->abs_ec + abs_ec; + + p = &ubi->prot.pnum.rb_node; + while (*p) { + parent = *p; + pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); + + if (e->pnum < pe1->e->pnum) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&pe->rb_pnum, parent, p); + rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); + + p = &ubi->prot.aec.rb_node; + parent = NULL; + while (*p) { + parent = *p; + pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); + + if (pe->abs_ec < pe1->abs_ec) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&pe->rb_aec, parent, p); + rb_insert_color(&pe->rb_aec, &ubi->prot.aec); +} + +/** + * find_wl_entry - find wear-leveling entry closest to certain erase counter. + * @root: the RB-tree where to look for + * @max: highest possible erase counter + * + * This function looks for a wear leveling entry with erase counter closest to + * @max and less then @max. + */ +static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) +{ + struct rb_node *p; + struct ubi_wl_entry *e; + + e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); + max += e->ec; + + p = root->rb_node; + while (p) { + struct ubi_wl_entry *e1; + + e1 = rb_entry(p, struct ubi_wl_entry, rb); + if (e1->ec >= max) + p = p->rb_left; + else { + p = p->rb_right; + e = e1; + } + } + + return e; +} + +/** + * ubi_wl_get_peb - get a physical eraseblock. + * @ubi: UBI device description object + * @dtype: type of data which will be stored in this physical eraseblock + * + * This function returns a physical eraseblock in case of success and a + * negative error code in case of failure. Might sleep. + */ +int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) +{ + int err, protect, medium_ec; + struct ubi_wl_entry *e, *first, *last; + struct ubi_wl_prot_entry *pe; + + ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || + dtype == UBI_UNKNOWN); + + pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_KERNEL); + if (!pe) + return -ENOMEM; + +retry: + spin_lock(&ubi->wl_lock); + if (tree_empty(&ubi->free)) { + if (ubi->works_count == 0) { + ubi_assert(list_empty(&ubi->works)); + ubi_err("no free eraseblocks"); + spin_unlock(&ubi->wl_lock); + kfree(pe); + return -ENOSPC; + } + spin_unlock(&ubi->wl_lock); + + err = produce_free_peb(ubi); + if (err < 0) { + kfree(pe); + return err; + } + goto retry; + } + + switch (dtype) { + case UBI_LONGTERM: + /* + * For long term data we pick a physical eraseblock + * with high erase counter. But the highest erase + * counter we can pick is bounded by the the lowest + * erase counter plus %WL_FREE_MAX_DIFF. + */ + e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + protect = LT_PROTECTION; + break; + case UBI_UNKNOWN: + /* + * For unknown data we pick a physical eraseblock with + * medium erase counter. But we by no means can pick a + * physical eraseblock with erase counter greater or + * equivalent than the lowest erase counter plus + * %WL_FREE_MAX_DIFF. + */ + first = rb_entry(rb_first(&ubi->free), + struct ubi_wl_entry, rb); + last = rb_entry(rb_last(&ubi->free), + struct ubi_wl_entry, rb); + + if (last->ec - first->ec < WL_FREE_MAX_DIFF) + e = rb_entry(ubi->free.rb_node, + struct ubi_wl_entry, rb); + else { + medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; + e = find_wl_entry(&ubi->free, medium_ec); + } + protect = U_PROTECTION; + break; + case UBI_SHORTTERM: + /* + * For short term data we pick a physical eraseblock + * with the lowest erase counter as we expect it will + * be erased soon. + */ + e = rb_entry(rb_first(&ubi->free), + struct ubi_wl_entry, rb); + protect = ST_PROTECTION; + break; + default: + protect = 0; + e = NULL; + BUG(); + } + + /* + * Move the physical eraseblock to the protection trees where it will + * be protected from being moved for some time. + */ + free_tree_del(ubi, e); + prot_tree_add(ubi, e, pe, protect); + + dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); + spin_unlock(&ubi->wl_lock); + + return e->pnum; +} + +/** + * prot_tree_del - remove a physical eraseblock from the protection trees + * @ubi: UBI device description object + * @pnum: the physical eraseblock to remove + */ +static void prot_tree_del(struct ubi_device *ubi, int pnum) +{ + struct rb_node *p; + struct ubi_wl_prot_entry *pe = NULL; + + p = ubi->prot.pnum.rb_node; + while (p) { + + pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); + + if (pnum == pe->e->pnum) + break; + + if (pnum < pe->e->pnum) + p = p->rb_left; + else + p = p->rb_right; + } + + ubi_assert(pe->e->pnum == pnum); + rb_erase(&pe->rb_aec, &ubi->prot.aec); + rb_erase(&pe->rb_pnum, &ubi->prot.pnum); + kfree(pe); +} + +/** + * sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @e: the the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) +{ + int err; + struct ubi_ec_hdr *ec_hdr; + unsigned long long ec = e->ec; + + dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); + + err = paranoid_check_ec(ubi, e->pnum, e->ec); + if (err > 0) + return -EINVAL; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_sync_erase(ubi, e->pnum, torture); + if (err < 0) + goto out_free; + + ec += err; + if (ec > UBI_MAX_ERASECOUNTER) { + /* + * Erase counter overflow. Upgrade UBI and use 64-bit + * erase counters internally. + */ + ubi_err("erase counter overflow at PEB %d, EC %llu", + e->pnum, ec); + err = -EINVAL; + goto out_free; + } + + dbg_wl("erased PEB %d, new EC %llu", e->pnum, ec); + + ec_hdr->ec = cpu_to_ubi64(ec); + + err = ubi_io_write_ec_hdr(ubi, e->pnum, ec_hdr); + if (err) + goto out_free; + + e->ec = ec; + spin_lock(&ubi->wl_lock); + if (e->ec > ubi->max_ec) + ubi->max_ec = e->ec; + spin_unlock(&ubi->wl_lock); + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * check_protection_over - check if it is time to stop protecting some + * physical eraseblocks. + * @ubi: UBI device description object + * + * This function is called after each erase operation, when the absolute erase + * counter is incremented, to check if some physical eraseblock have not to be + * protected any longer. These physical eraseblocks are moved from the + * protection trees to the used tree. + */ +static void check_protection_over(struct ubi_device *ubi) +{ + struct ubi_wl_prot_entry *pe; + + /* + * There may be several protected physical eraseblock to remove, + * process them all. + */ + while (1) { + spin_lock(&ubi->wl_lock); + if (tree_empty(&ubi->prot.aec)) { + spin_unlock(&ubi->wl_lock); + break; + } + + pe = rb_entry(rb_first(&ubi->prot.aec), + struct ubi_wl_prot_entry, rb_aec); + + if (pe->abs_ec > ubi->abs_ec) { + spin_unlock(&ubi->wl_lock); + break; + } + + dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", + pe->e->pnum, ubi->abs_ec, pe->abs_ec); + rb_erase(&pe->rb_aec, &ubi->prot.aec); + rb_erase(&pe->rb_pnum, &ubi->prot.pnum); + used_tree_add(ubi, pe->e); + spin_unlock(&ubi->wl_lock); + + kfree(pe); + cond_resched(); + } +} + +/** + * schedule_ubi_work - schedule a work. + * @ubi: UBI device description object + * @wrk: the work to schedule + * + * This function enqueues a work defined by @wrk to the tail of the pending + * works list. + */ +static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) +{ + spin_lock(&ubi->wl_lock); + list_add_tail(&wrk->list, &ubi->works); + ubi_assert(ubi->works_count >= 0); + ubi->works_count += 1; + if (ubi->thread_enabled) + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); +} + +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + int cancel); + +/** + * schedule_erase - schedule an erase work. + * @ubi: UBI device description object + * @e: the WL entry of the physical eraseblock to erase + * @torture: if the physical eraseblock has to be tortured + * + * This function returns zero in case of success and a %-ENOMEM in case of + * failure. + */ +static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, + int torture) +{ + struct ubi_work *wl_wrk; + + dbg_wl("schedule erasure of PEB %d, EC %d, torture %d", + e->pnum, e->ec, torture); + + wl_wrk = kmalloc(sizeof(struct ubi_work), GFP_KERNEL); + if (!wl_wrk) + return -ENOMEM; + + wl_wrk->func = &erase_worker; + wl_wrk->e = e; + wl_wrk->torture = torture; + + schedule_ubi_work(ubi, wl_wrk); + return 0; +} + +/** + * wear_leveling_worker - wear-leveling worker function. + * @ubi: UBI device description object + * @wrk: the work object + * @cancel: non-zero if the worker has to free memory and exit + * + * This function copies a more worn out physical eraseblock to a less worn out + * one. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, + int cancel) +{ + int err, put = 0; + struct ubi_wl_entry *e1, *e2; + struct ubi_vid_hdr *vid_hdr; + + kfree(wrk); + + if (cancel) + return 0; + + vid_hdr = ubi_zalloc_vid_hdr(ubi); + if (!vid_hdr) + return -ENOMEM; + + spin_lock(&ubi->wl_lock); + + /* + * Only one WL worker at a time is supported at this implementation, so + * make sure a PEB is not being moved already. + */ + if (ubi->move_to || tree_empty(&ubi->free) || + (tree_empty(&ubi->used) && tree_empty(&ubi->scrub))) { + /* + * Only one WL worker at a time is supported at this + * implementation, so if a LEB is already being moved, cancel. + * + * No free physical eraseblocks? Well, we cancel wear-leveling + * then. It will be triggered again when a free physical + * eraseblock appears. + * + * No used physical eraseblocks? They must be temporarily + * protected from being moved. They will be moved to the + * @ubi->used tree later and the wear-leveling will be + * triggered again. + */ + dbg_wl("cancel WL, a list is empty: free %d, used %d", + tree_empty(&ubi->free), tree_empty(&ubi->used)); + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + } + + if (tree_empty(&ubi->scrub)) { + /* + * Now pick the least worn-out used physical eraseblock and a + * highly worn-out free physical eraseblock. If the erase + * counters differ much enough, start wear-leveling. + */ + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + + if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { + dbg_wl("no WL needed: min used EC %d, max free EC %d", + e1->ec, e2->ec); + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + ubi_free_vid_hdr(ubi, vid_hdr); + return 0; + } + used_tree_del(ubi, e1); + dbg_wl("move PEB %d EC %d to PEB %d EC %d", + e1->pnum, e1->ec, e2->pnum, e2->ec); + } else { + e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + scrub_tree_del(ubi, e1); + dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); + } + + free_tree_del(ubi, e2); + ubi_assert(!ubi->move_from && !ubi->move_to); + ubi_assert(!ubi->move_to_put && !ubi->move_from_put); + ubi->move_from = e1; + ubi->move_to = e2; + spin_unlock(&ubi->wl_lock); + + /* + * Now we are going to copy physical eraseblock @e1->pnum to @e2->pnum. + * We so far do not know which logical eraseblock our physical + * eraseblock (@e1) belongs to. We have to read the volume identifier + * header first. + */ + + err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); + if (err && err != UBI_IO_BITFLIPS) { + if (err == UBI_IO_PEB_FREE) { + /* + * We are trying to move PEB without a VID header. UBI + * always write VID headers shortly after the PEB was + * given, so we have a situation when it did not have + * chance to write it down because it was preempted. + * Just re-schedule the work, so that next time it will + * likely have the VID header in place. + */ + dbg_wl("PEB %d has no VID header", e1->pnum); + err = 0; + } else { + ubi_err("error %d while reading VID header from PEB %d", + err, e1->pnum); + if (err > 0) + err = -EIO; + } + goto error; + } + + err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); + if (err) { + if (err == UBI_IO_BITFLIPS) + err = 0; + goto error; + } + + ubi_free_vid_hdr(ubi, vid_hdr); + spin_lock(&ubi->wl_lock); + if (!ubi->move_to_put) + used_tree_add(ubi, e2); + else + put = 1; + ubi->move_from = ubi->move_to = NULL; + ubi->move_from_put = ubi->move_to_put = 0; + ubi->wl_scheduled = 0; + spin_unlock(&ubi->wl_lock); + + if (put) { + /* + * Well, the target PEB was put meanwhile, schedule it for + * erasure. + */ + dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); + err = schedule_erase(ubi, e2, 0); + if (err) { + kmem_cache_free(wl_entries_slab, e2); + ubi_ro_mode(ubi); + } + } + + err = schedule_erase(ubi, e1, 0); + if (err) { + kmem_cache_free(wl_entries_slab, e1); + ubi_ro_mode(ubi); + } + + dbg_wl("done"); + return err; + + /* + * Some error occurred. @e1 was not changed, so return it back. @e2 + * might be changed, schedule it for erasure. + */ +error: + if (err) + dbg_wl("error %d occurred, cancel operation", err); + ubi_assert(err <= 0); + + ubi_free_vid_hdr(ubi, vid_hdr); + spin_lock(&ubi->wl_lock); + ubi->wl_scheduled = 0; + if (ubi->move_from_put) + put = 1; + else + used_tree_add(ubi, e1); + ubi->move_from = ubi->move_to = NULL; + ubi->move_from_put = ubi->move_to_put = 0; + spin_unlock(&ubi->wl_lock); + + if (put) { + /* + * Well, the target PEB was put meanwhile, schedule it for + * erasure. + */ + dbg_wl("PEB %d was put meanwhile, erase", e1->pnum); + err = schedule_erase(ubi, e1, 0); + if (err) { + kmem_cache_free(wl_entries_slab, e1); + ubi_ro_mode(ubi); + } + } + + err = schedule_erase(ubi, e2, 0); + if (err) { + kmem_cache_free(wl_entries_slab, e2); + ubi_ro_mode(ubi); + } + + yield(); + return err; +} + +/** + * ensure_wear_leveling - schedule wear-leveling if it is needed. + * @ubi: UBI device description object + * + * This function checks if it is time to start wear-leveling and schedules it + * if yes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +static int ensure_wear_leveling(struct ubi_device *ubi) +{ + int err = 0; + struct ubi_wl_entry *e1; + struct ubi_wl_entry *e2; + struct ubi_work *wrk; + + spin_lock(&ubi->wl_lock); + if (ubi->wl_scheduled) + /* Wear-leveling is already in the work queue */ + goto out_unlock; + + /* + * If the ubi->scrub tree is not empty, scrubbing is needed, and the + * the WL worker has to be scheduled anyway. + */ + if (tree_empty(&ubi->scrub)) { + if (tree_empty(&ubi->used) || tree_empty(&ubi->free)) + /* No physical eraseblocks - no deal */ + goto out_unlock; + + /* + * We schedule wear-leveling only if the difference between the + * lowest erase counter of used physical eraseblocks and a high + * erase counter of free physical eraseblocks is greater then + * %UBI_WL_THRESHOLD. + */ + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); + e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); + + if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) + goto out_unlock; + dbg_wl("schedule wear-leveling"); + } else + dbg_wl("schedule scrubbing"); + + ubi->wl_scheduled = 1; + spin_unlock(&ubi->wl_lock); + + wrk = kmalloc(sizeof(struct ubi_work), GFP_KERNEL); + if (!wrk) { + err = -ENOMEM; + goto out_cancel; + } + + wrk->func = &wear_leveling_worker; + schedule_ubi_work(ubi, wrk); + return err; + +out_cancel: + spin_lock(&ubi->wl_lock); + ubi->wl_scheduled = 0; +out_unlock: + spin_unlock(&ubi->wl_lock); + return err; +} + +/** + * erase_worker - physical eraseblock erase worker function. + * @ubi: UBI device description object + * @wl_wrk: the work object + * @cancel: non-zero if the worker has to free memory and exit + * + * This function erases a physical eraseblock and perform torture testing if + * needed. It also takes care about marking the physical eraseblock bad if + * needed. Returns zero in case of success and a negative error code in case of + * failure. + */ +static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + int cancel) +{ + int err; + struct ubi_wl_entry *e = wl_wrk->e; + int pnum = e->pnum; + + if (cancel) { + dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec); + kfree(wl_wrk); + kmem_cache_free(wl_entries_slab, e); + return 0; + } + + dbg_wl("erase PEB %d EC %d", pnum, e->ec); + + err = sync_erase(ubi, e, wl_wrk->torture); + if (!err) { + /* Fine, we've erased it successfully */ + kfree(wl_wrk); + + spin_lock(&ubi->wl_lock); + ubi->abs_ec += 1; + free_tree_add(ubi, e); + spin_unlock(&ubi->wl_lock); + + /* + * One more erase operation has happened, take care about protected + * physical eraseblocks. + */ + check_protection_over(ubi); + + /* And take care about wear-leveling */ + err = ensure_wear_leveling(ubi); + return err; + } + + kfree(wl_wrk); + kmem_cache_free(wl_entries_slab, e); + + if (err != -EIO) { + /* + * If this is not %-EIO, we have no idea what to do. Scheduling + * this physical eraseblock for erasure again would cause + * errors again and again. Well, lets switch to RO mode. + */ + ubi_ro_mode(ubi); + return err; + } + + /* It is %-EIO, the PEB went bad */ + + if (!ubi->bad_allowed) { + ubi_err("bad physical eraseblock %d detected", pnum); + ubi_ro_mode(ubi); + err = -EIO; + } else { + int need; + + spin_lock(&ubi->volumes_lock); + need = ubi->beb_rsvd_level - ubi->beb_rsvd_pebs + 1; + if (need > 0) { + need = ubi->avail_pebs >= need ? need : ubi->avail_pebs; + ubi->avail_pebs -= need; + ubi->rsvd_pebs += need; + ubi->beb_rsvd_pebs += need; + if (need > 0) + ubi_msg("reserve more %d PEBs", need); + } + + if (ubi->beb_rsvd_pebs == 0) { + spin_unlock(&ubi->volumes_lock); + ubi_err("no reserved physical eraseblocks"); + ubi_ro_mode(ubi); + return -EIO; + } + + spin_unlock(&ubi->volumes_lock); + ubi_msg("mark PEB %d as bad", pnum); + + err = ubi_io_mark_bad(ubi, pnum); + if (err) { + ubi_ro_mode(ubi); + return err; + } + + spin_lock(&ubi->volumes_lock); + ubi->beb_rsvd_pebs -= 1; + ubi->bad_peb_count += 1; + ubi->good_peb_count -= 1; + ubi_calculate_reserved(ubi); + if (ubi->beb_rsvd_pebs == 0) + ubi_warn("last PEB from the reserved pool was used"); + spin_unlock(&ubi->volumes_lock); + } + + return err; +} + +/** + * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling + * unit. + * @ubi: UBI device description object + * @pnum: physical eraseblock to return + * @torture: if this physical eraseblock has to be tortured + * + * This function is called to return physical eraseblock @pnum to the pool of + * free physical eraseblocks. The @torture flag has to be set if an I/O error + * occurred to this @pnum and it has to be tested. This function returns zero + * in case of success and a negative error code in case of failure. + */ +int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) +{ + int err; + struct ubi_wl_entry *e; + + dbg_wl("PEB %d", pnum); + ubi_assert(pnum >= 0); + ubi_assert(pnum < ubi->peb_count); + + spin_lock(&ubi->wl_lock); + + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from) { + /* + * User is putting the physical eraseblock which was selected to + * be moved. It will be scheduled for erasure in the + * wear-leveling worker. + */ + dbg_wl("PEB %d is being moved", pnum); + ubi_assert(!ubi->move_from_put); + ubi->move_from_put = 1; + spin_unlock(&ubi->wl_lock); + return 0; + } else if (e == ubi->move_to) { + /* + * User is putting the physical eraseblock which was selected + * as the target the data is moved to. It may happen if the EBA + * unit already re-mapped the LEB but the WL unit did has not + * put the PEB to the "used" tree. + */ + dbg_wl("PEB %d is the target of data moving", pnum); + ubi_assert(!ubi->move_to_put); + ubi->move_to_put = 1; + spin_unlock(&ubi->wl_lock); + return 0; + } else { + if (in_wl_tree(e, &ubi->used)) + used_tree_del(ubi, e); + else if (in_wl_tree(e, &ubi->scrub)) + scrub_tree_del(ubi, e); + else + prot_tree_del(ubi, e->pnum); + } + spin_unlock(&ubi->wl_lock); + + err = schedule_erase(ubi, e, torture); + if (err) { + spin_lock(&ubi->wl_lock); + used_tree_add(ubi, e); + spin_unlock(&ubi->wl_lock); + } + + return err; +} + +/** + * ubi_wl_scrub_peb - schedule a physical eraseblock for scrubbing. + * @ubi: UBI device description object + * @pnum: the physical eraseblock to schedule + * + * If a bit-flip in a physical eraseblock is detected, this physical eraseblock + * needs scrubbing. This function schedules a physical eraseblock for + * scrubbing which is done in background. This function returns zero in case of + * success and a negative error code in case of failure. + */ +int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum) +{ + struct ubi_wl_entry *e; + + ubi_msg("schedule PEB %d for scrubbing", pnum); + +retry: + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; + if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { + spin_unlock(&ubi->wl_lock); + return 0; + } + + if (e == ubi->move_to) { + /* + * This physical eraseblock was used to move data to. The data + * was moved but the PEB was not yet inserted to the proper + * tree. We should just wait a little and let the WL worker + * proceed. + */ + spin_unlock(&ubi->wl_lock); + dbg_wl("the PEB %d is not in proper tree, retry", pnum); + yield(); + goto retry; + } + + if (in_wl_tree(e, &ubi->used)) + used_tree_del(ubi, e); + else + prot_tree_del(ubi, pnum); + + scrub_tree_add(ubi, e); + spin_unlock(&ubi->wl_lock); + + /* + * Technically scrubbing is the same as wear-leveling, so it is done + * by the WL worker. + */ + return ensure_wear_leveling(ubi); +} + +/** + * ubi_wl_flush - flush all pending works. + * @ubi: UBI device description object + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubi_wl_flush(struct ubi_device *ubi) +{ + int err, pending_count; + + pending_count = ubi->works_count; + + dbg_wl("flush (%d pending works)", pending_count); + + /* + * Erase while the pending works queue is not empty, but not more then + * the number of currently pending works. + */ + while (pending_count-- > 0) { + err = do_work(ubi); + if (err) + return err; + } + + return 0; +} + +/** + * tree_destroy - destroy an RB-tree. + * @root: the root of the tree to destroy + */ +static void tree_destroy(struct rb_root *root) +{ + struct rb_node *rb; + struct ubi_wl_entry *e; + + rb = root->rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + e = rb_entry(rb, struct ubi_wl_entry, rb); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &e->rb) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + kmem_cache_free(wl_entries_slab, e); + } + } +} + +/** + * ubi_thread - UBI background thread. + * @u: the UBI device description object pointer + */ +static int ubi_thread(void *u) +{ + int failures = 0; + struct ubi_device *ubi = u; + + ubi_msg("background thread \"%s\" started, PID %d", + ubi->bgt_name, current->pid); + + for (;;) { + int err; + + if (kthread_should_stop()) + goto out; + + if (try_to_freeze()) + continue; + + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works) || ubi->ro_mode || + !ubi->thread_enabled) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock(&ubi->wl_lock); + schedule(); + continue; + } + spin_unlock(&ubi->wl_lock); + + err = do_work(ubi); + if (err) { + ubi_err("%s: work failed with error code %d", + ubi->bgt_name, err); + if (failures++ > WL_MAX_FAILURES) { + /* + * Too many failures, disable the thread and + * switch to read-only mode. + */ + ubi_msg("%s: %d consecutive failures", + ubi->bgt_name, WL_MAX_FAILURES); + ubi_ro_mode(ubi); + break; + } + } else + failures = 0; + + cond_resched(); + } + +out: + dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); + return 0; +} + +/** + * cancel_pending - cancel all pending works. + * @ubi: UBI device description object + */ +static void cancel_pending(struct ubi_device *ubi) +{ + while (!list_empty(&ubi->works)) { + struct ubi_work *wrk; + + wrk = list_entry(ubi->works.next, struct ubi_work, list); + list_del(&wrk->list); + wrk->func(ubi, wrk, 1); + ubi->works_count -= 1; + ubi_assert(ubi->works_count >= 0); + } +} + +/** + * ubi_wl_init_scan - initialize the wear-leveling unit using scanning + * information. + * @ubi: UBI device description object + * @si: scanning information + * + * This function returns zero in case of success, and a negative error code in + * case of failure. + */ +int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) +{ + int err; + struct rb_node *rb1, *rb2; + struct ubi_scan_volume *sv; + struct ubi_scan_leb *seb, *tmp; + struct ubi_wl_entry *e; + + + ubi->used = ubi->free = ubi->scrub = RB_ROOT; + ubi->prot.pnum = ubi->prot.aec = RB_ROOT; + spin_lock_init(&ubi->wl_lock); + ubi->max_ec = si->max_ec; + INIT_LIST_HEAD(&ubi->works); + + sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num); + + ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); + if (IS_ERR(ubi->bgt_thread)) { + err = PTR_ERR(ubi->bgt_thread); + ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name, + err); + return err; + } + + if (ubi_devices_cnt == 0) { + wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab", + sizeof(struct ubi_wl_entry), + 0, 0, NULL, NULL); + if (!wl_entries_slab) + return -ENOMEM; + } + + err = -ENOMEM; + ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL); + if (!ubi->lookuptbl) + goto out_free; + + list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { + cond_resched(); + + e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, 0)) { + kmem_cache_free(wl_entries_slab, e); + goto out_free; + } + } + + list_for_each_entry(seb, &si->free, u.list) { + cond_resched(); + + e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi_assert(e->ec >= 0); + free_tree_add(ubi, e); + ubi->lookuptbl[e->pnum] = e; + } + + list_for_each_entry(seb, &si->corr, u.list) { + cond_resched(); + + e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (schedule_erase(ubi, e, 0)) { + kmem_cache_free(wl_entries_slab, e); + goto out_free; + } + } + + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + cond_resched(); + + e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL); + if (!e) + goto out_free; + + e->pnum = seb->pnum; + e->ec = seb->ec; + ubi->lookuptbl[e->pnum] = e; + if (!seb->scrub) { + dbg_wl("add PEB %d EC %d to the used tree", + e->pnum, e->ec); + used_tree_add(ubi, e); + } else { + dbg_wl("add PEB %d EC %d to the scrub tree", + e->pnum, e->ec); + scrub_tree_add(ubi, e); + } + } + } + + if (WL_RESERVED_PEBS > ubi->avail_pebs) { + ubi_err("no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, WL_RESERVED_PEBS); + goto out_free; + } + ubi->avail_pebs -= WL_RESERVED_PEBS; + ubi->rsvd_pebs += WL_RESERVED_PEBS; + + /* Schedule wear-leveling if needed */ + err = ensure_wear_leveling(ubi); + if (err) + goto out_free; + + return 0; + +out_free: + cancel_pending(ubi); + tree_destroy(&ubi->used); + tree_destroy(&ubi->free); + tree_destroy(&ubi->scrub); + kfree(ubi->lookuptbl); + if (ubi_devices_cnt == 0) + kmem_cache_destroy(wl_entries_slab); + return err; +} + +/** + * protection_trees_destroy - destroy the protection RB-trees. + * @ubi: UBI device description object + */ +static void protection_trees_destroy(struct ubi_device *ubi) +{ + struct rb_node *rb; + struct ubi_wl_prot_entry *pe; + + rb = ubi->prot.aec.rb_node; + while (rb) { + if (rb->rb_left) + rb = rb->rb_left; + else if (rb->rb_right) + rb = rb->rb_right; + else { + pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); + + rb = rb_parent(rb); + if (rb) { + if (rb->rb_left == &pe->rb_aec) + rb->rb_left = NULL; + else + rb->rb_right = NULL; + } + + kmem_cache_free(wl_entries_slab, pe->e); + kfree(pe); + } + } +} + +/** + * ubi_wl_close - close the wear-leveling unit. + * @ubi: UBI device description object + */ +void ubi_wl_close(struct ubi_device *ubi) +{ + dbg_wl("disable \"%s\"", ubi->bgt_name); + if (ubi->bgt_thread) + kthread_stop(ubi->bgt_thread); + + dbg_wl("close the UBI wear-leveling unit"); + + cancel_pending(ubi); + protection_trees_destroy(ubi); + tree_destroy(&ubi->used); + tree_destroy(&ubi->free); + tree_destroy(&ubi->scrub); + kfree(ubi->lookuptbl); + if (ubi_devices_cnt == 1) + kmem_cache_destroy(wl_entries_slab); +} + +#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID + +/** + * paranoid_check_ec - make sure that the erase counter of a physical eraseblock + * is correct. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @ec: the erase counter to check + * + * This function returns zero if the erase counter of physical eraseblock @pnum + * is equivalent to @ec, %1 if not, and a negative error code if an error + * occurred. + */ +static int paranoid_check_ec(const struct ubi_device *ubi, int pnum, int ec) +{ + int err; + long long read_ec; + struct ubi_ec_hdr *ec_hdr; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ec_hdr) + return -ENOMEM; + + err = ubi_io_read_ec_hdr(ubi, pnum, ec_hdr, 0); + if (err && err != UBI_IO_BITFLIPS) { + /* The header does not have to exist */ + err = 0; + goto out_free; + } + + read_ec = ubi64_to_cpu(ec_hdr->ec); + if (ec != read_ec) { + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_err("read EC is %lld, should be %d", read_ec, ec); + ubi_dbg_dump_stack(); + err = 1; + } else + err = 0; + +out_free: + kfree(ec_hdr); + return err; +} + +/** + * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present + * in a WL RB-tree. + * @e: the wear-leveling entry to check + * @root: the root of the tree + * + * This function returns zero if @e is in the @root RB-tree and %1 if it + * is not. + */ +static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root) +{ + if (in_wl_tree(e, root)) + return 0; + + ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ", + e->pnum, e->ec, root); + ubi_dbg_dump_stack(); + return 1; +} + +#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ |