From 7b6c32daec3bff380ced6822002bc352bdf2c982 Mon Sep 17 00:00:00 2001 From: Xiaochuan-Xu Date: Mon, 15 Dec 2008 21:07:41 +0800 Subject: UBI: simplify PEB protection code UBI has 2 RB-trees to implement PEB protection, which is too much for simply prevent PEB from being moved for some time. This patch implements this using lists. The benefits: 1. No need to allocate protection entry on each PEB get. 2. No need to maintain balanced trees and walk them. Signed-off-by: Xiaochuan-Xu Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/wl.c | 364 ++++++++++++++++++++------------------------------- 1 file changed, 140 insertions(+), 224 deletions(-) (limited to 'drivers/mtd/ubi/wl.c') diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 0279bf9dc72..14901cb82c1 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -22,7 +22,7 @@ * UBI wear-leveling sub-system. * * This sub-system is responsible for wear-leveling. It works in terms of - * physical* eraseblocks and erase counters and knows nothing about logical + * physical eraseblocks and erase counters and knows nothing about logical * eraseblocks, volumes, etc. From this sub-system's perspective all physical * eraseblocks are of two types - used and free. Used physical eraseblocks are * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical @@ -55,8 +55,39 @@ * * As it was said, for the UBI sub-system all physical eraseblocks are either * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while - * used eraseblocks are kept in a set of different RB-trees: @wl->used, - * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. + * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or + * (temporarily) in the @wl->pq queue. + * + * When the WL sub-system returns a physical eraseblock, the physical + * eraseblock is protected from being moved for some "time". For this reason, + * the physical eraseblock is not directly moved from the @wl->free tree to the + * @wl->used tree. There is a protection queue in between where this + * physical eraseblock is temporarily stored (@wl->pq). + * + * All this protection stuff is needed because: + * o we don't want to move physical eraseblocks just after we have given them + * to the user; instead, we first want to let users fill them up with data; + * + * o there is a chance that the user will put the physical eraseblock very + * soon, so it makes sense not to move it for some time, but wait; this is + * especially important in case of "short term" physical eraseblocks. + * + * Physical eraseblocks stay protected only for limited time. But the "time" is + * measured in erase cycles in this case. This is implemented with help of the + * protection queue. Eraseblocks are put to the tail of this queue when they + * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the + * head of the queue on each erase operation (for any eraseblock). So the + * length of the queue defines how may (global) erase cycles PEBs are protected. + * + * To put it differently, each physical eraseblock has 2 main states: free and + * used. The former state corresponds to the @wl->free tree. The latter state + * is split up on several sub-states: + * o the WL movement is allowed (@wl->used tree); + * o the WL movement is temporarily prohibited (@wl->pq queue); + * o scrubbing is needed (@wl->scrub tree). + * + * Depending on the sub-state, wear-leveling entries of the used physical + * eraseblocks may be kept in one of those structures. * * Note, in this implementation, we keep a small in-RAM object for each physical * eraseblock. This is surely not a scalable solution. But it appears to be good @@ -70,9 +101,6 @@ * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we * pick target PEB with an average EC if our PEB is not very "old". This is a * room for future re-works of the WL sub-system. - * - * Note: the stuff with protection trees looks too complex and is difficult to - * understand. Should be fixed. */ #include @@ -84,14 +112,6 @@ /* Number of physical eraseblocks reserved for wear-leveling purposes */ #define WL_RESERVED_PEBS 1 -/* - * How many erase cycles are short term, unknown, and long term physical - * eraseblocks protected. - */ -#define ST_PROTECTION 16 -#define U_PROTECTION 10 -#define LT_PROTECTION 4 - /* * Maximum difference between two erase counters. If this threshold is * exceeded, the WL sub-system starts moving data from used physical @@ -119,65 +139,10 @@ */ #define WL_MAX_FAILURES 32 -/** - * struct ubi_wl_prot_entry - PEB protection entry. - * @rb_pnum: link in the @wl->prot.pnum RB-tree - * @rb_aec: link in the @wl->prot.aec RB-tree - * @abs_ec: the absolute erase counter value when the protection ends - * @e: the wear-leveling entry of the physical eraseblock under protection - * - * When the WL sub-system returns a physical eraseblock, the physical - * eraseblock is protected from being moved for some "time". For this reason, - * the physical eraseblock is not directly moved from the @wl->free tree to the - * @wl->used tree. There is one more tree in between where this physical - * eraseblock is temporarily stored (@wl->prot). - * - * All this protection stuff is needed because: - * o we don't want to move physical eraseblocks just after we have given them - * to the user; instead, we first want to let users fill them up with data; - * - * o there is a chance that the user will put the physical eraseblock very - * soon, so it makes sense not to move it for some time, but wait; this is - * especially important in case of "short term" physical eraseblocks. - * - * Physical eraseblocks stay protected only for limited time. But the "time" is - * measured in erase cycles in this case. This is implemented with help of the - * absolute erase counter (@wl->abs_ec). When it reaches certain value, the - * physical eraseblocks are moved from the protection trees (@wl->prot.*) to - * the @wl->used tree. - * - * Protected physical eraseblocks are searched by physical eraseblock number - * (when they are put) and by the absolute erase counter (to check if it is - * time to move them to the @wl->used tree). So there are actually 2 RB-trees - * storing the protected physical eraseblocks: @wl->prot.pnum and - * @wl->prot.aec. They are referred to as the "protection" trees. The - * first one is indexed by the physical eraseblock number. The second one is - * indexed by the absolute erase counter. Both trees store - * &struct ubi_wl_prot_entry objects. - * - * Each physical eraseblock has 2 main states: free and used. The former state - * corresponds to the @wl->free tree. The latter state is split up on several - * sub-states: - * o the WL movement is allowed (@wl->used tree); - * o the WL movement is temporarily prohibited (@wl->prot.pnum and - * @wl->prot.aec trees); - * o scrubbing is needed (@wl->scrub tree). - * - * Depending on the sub-state, wear-leveling entries of the used physical - * eraseblocks may be kept in one of those trees. - */ -struct ubi_wl_prot_entry { - struct rb_node rb_pnum; - struct rb_node rb_aec; - unsigned long long abs_ec; - struct ubi_wl_entry *e; -}; - /** * struct ubi_work - UBI work description data structure. * @list: a link in the list of pending works * @func: worker function - * @priv: private data of the worker function * @e: physical eraseblock to erase * @torture: if the physical eraseblock has to be tortured * @@ -198,9 +163,11 @@ struct ubi_work { static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root); +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e); #else #define paranoid_check_ec(ubi, pnum, ec) 0 #define paranoid_check_in_wl_tree(e, root) +#define paranoid_check_in_pq(ubi, e) 0 #endif /** @@ -355,49 +322,24 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root) } /** - * prot_tree_add - add physical eraseblock to protection trees. + * prot_queue_add - add physical eraseblock to the protection queue. * @ubi: UBI device description object * @e: the physical eraseblock to add - * @pe: protection entry object to use - * @ec: for how many erase operations this PEB should be protected * - * @wl->lock has to be locked. + * This function adds @e to the tail of the protection queue @ubi->pq, where + * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be + * temporarily protected from the wear-leveling worker. Note, @wl->lock has to + * be locked. */ -static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, - struct ubi_wl_prot_entry *pe, int ec) +static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e) { - struct rb_node **p, *parent = NULL; - struct ubi_wl_prot_entry *pe1; + int pq_tail = ubi->pq_head - 1; - pe->e = e; - pe->abs_ec = ubi->abs_ec + ec; - - p = &ubi->prot.pnum.rb_node; - while (*p) { - parent = *p; - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum); - - if (e->pnum < pe1->e->pnum) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&pe->rb_pnum, parent, p); - rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum); - - p = &ubi->prot.aec.rb_node; - parent = NULL; - while (*p) { - parent = *p; - pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec); - - if (pe->abs_ec < pe1->abs_ec) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&pe->rb_aec, parent, p); - rb_insert_color(&pe->rb_aec, &ubi->prot.aec); + if (pq_tail < 0) + pq_tail = UBI_PROT_QUEUE_LEN - 1; + ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN); + list_add_tail(&e->u.list, &ubi->pq[pq_tail]); + dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec); } /** @@ -442,17 +384,12 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) */ int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) { - int err, protect, medium_ec; + int err, medium_ec; struct ubi_wl_entry *e, *first, *last; - struct ubi_wl_prot_entry *pe; ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || dtype == UBI_UNKNOWN); - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); - if (!pe) - return -ENOMEM; - retry: spin_lock(&ubi->wl_lock); if (!ubi->free.rb_node) { @@ -460,16 +397,13 @@ retry: ubi_assert(list_empty(&ubi->works)); ubi_err("no free eraseblocks"); spin_unlock(&ubi->wl_lock); - kfree(pe); return -ENOSPC; } spin_unlock(&ubi->wl_lock); err = produce_free_peb(ubi); - if (err < 0) { - kfree(pe); + if (err < 0) return err; - } goto retry; } @@ -482,7 +416,6 @@ retry: * %WL_FREE_MAX_DIFF. */ e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); - protect = LT_PROTECTION; break; case UBI_UNKNOWN: /* @@ -502,7 +435,6 @@ retry: medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; e = find_wl_entry(&ubi->free, medium_ec); } - protect = U_PROTECTION; break; case UBI_SHORTTERM: /* @@ -510,63 +442,45 @@ retry: * lowest erase counter as we expect it will be erased soon. */ e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb); - protect = ST_PROTECTION; break; default: - protect = 0; - e = NULL; BUG(); } + paranoid_check_in_wl_tree(e, &ubi->free); + /* - * Move the physical eraseblock to the protection trees where it will + * Move the physical eraseblock to the protection queue where it will * be protected from being moved for some time. */ - paranoid_check_in_wl_tree(e, &ubi->free); rb_erase(&e->u.rb, &ubi->free); - prot_tree_add(ubi, e, pe, protect); - - dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect); + dbg_wl("PEB %d EC %d", e->pnum, e->ec); + prot_queue_add(ubi, e); spin_unlock(&ubi->wl_lock); - return e->pnum; } /** - * prot_tree_del - remove a physical eraseblock from the protection trees + * prot_queue_del - remove a physical eraseblock from the protection queue. * @ubi: UBI device description object * @pnum: the physical eraseblock to remove * - * This function returns PEB @pnum from the protection trees and returns zero - * in case of success and %-ENODEV if the PEB was not found in the protection - * trees. + * This function deletes PEB @pnum from the protection queue and returns zero + * in case of success and %-ENODEV if the PEB was not found. */ -static int prot_tree_del(struct ubi_device *ubi, int pnum) +static int prot_queue_del(struct ubi_device *ubi, int pnum) { - struct rb_node *p; - struct ubi_wl_prot_entry *pe = NULL; - - p = ubi->prot.pnum.rb_node; - while (p) { - - pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); - - if (pnum == pe->e->pnum) - goto found; + struct ubi_wl_entry *e; - if (pnum < pe->e->pnum) - p = p->rb_left; - else - p = p->rb_right; - } + e = ubi->lookuptbl[pnum]; + if (!e) + return -ENODEV; - return -ENODEV; + if (paranoid_check_in_pq(ubi, e)) + return -ENODEV; -found: - ubi_assert(pe->e->pnum == pnum); - rb_erase(&pe->rb_aec, &ubi->prot.aec); - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); - kfree(pe); + list_del(&e->u.list); + dbg_wl("deleted PEB %d from the protection queue", e->pnum); return 0; } @@ -632,47 +546,47 @@ out_free: } /** - * check_protection_over - check if it is time to stop protecting some PEBs. + * serve_prot_queue - check if it is time to stop protecting PEBs. * @ubi: UBI device description object * - * This function is called after each erase operation, when the absolute erase - * counter is incremented, to check if some physical eraseblock have not to be - * protected any longer. These physical eraseblocks are moved from the - * protection trees to the used tree. + * This function is called after each erase operation and removes PEBs from the + * tail of the protection queue. These PEBs have been protected for long enough + * and should be moved to the used tree. */ -static void check_protection_over(struct ubi_device *ubi) +static void serve_prot_queue(struct ubi_device *ubi) { - struct ubi_wl_prot_entry *pe; + struct ubi_wl_entry *e, *tmp; + int count; /* * There may be several protected physical eraseblock to remove, * process them all. */ - while (1) { - spin_lock(&ubi->wl_lock); - if (!ubi->prot.aec.rb_node) { - spin_unlock(&ubi->wl_lock); - break; - } - - pe = rb_entry(rb_first(&ubi->prot.aec), - struct ubi_wl_prot_entry, rb_aec); +repeat: + count = 0; + spin_lock(&ubi->wl_lock); + list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) { + dbg_wl("PEB %d EC %d protection over, move to used tree", + e->pnum, e->ec); - if (pe->abs_ec > ubi->abs_ec) { + list_del(&e->u.list); + wl_tree_add(e, &ubi->used); + if (count++ > 32) { + /* + * Let's be nice and avoid holding the spinlock for + * too long. + */ spin_unlock(&ubi->wl_lock); - break; + cond_resched(); + goto repeat; } - - dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu", - pe->e->pnum, ubi->abs_ec, pe->abs_ec); - rb_erase(&pe->rb_aec, &ubi->prot.aec); - rb_erase(&pe->rb_pnum, &ubi->prot.pnum); - wl_tree_add(pe->e, &ubi->used); - spin_unlock(&ubi->wl_lock); - - kfree(pe); - cond_resched(); } + + ubi->pq_head += 1; + if (ubi->pq_head == UBI_PROT_QUEUE_LEN) + ubi->pq_head = 0; + ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN); + spin_unlock(&ubi->wl_lock); } /** @@ -680,8 +594,8 @@ static void check_protection_over(struct ubi_device *ubi) * @ubi: UBI device description object * @wrk: the work to schedule * - * This function enqueues a work defined by @wrk to the tail of the pending - * works list. + * This function adds a work defined by @wrk to the tail of the pending works + * list. */ static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) { @@ -740,7 +654,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, int cancel) { int err, scrubbing = 0, torture = 0; - struct ubi_wl_prot_entry *uninitialized_var(pe); struct ubi_wl_entry *e1, *e2; struct ubi_vid_hdr *vid_hdr; @@ -857,23 +770,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, * The LEB has not been moved because the volume is being * deleted or the PEB has been put meanwhile. We should prevent * this PEB from being selected for wear-leveling movement - * again, so put it to the protection tree. + * again, so put it to the protection queue. */ dbg_wl("canceled moving PEB %d", e1->pnum); ubi_assert(err == 1); - pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); - if (!pe) { - err = -ENOMEM; - goto out_error; - } - ubi_free_vid_hdr(ubi, vid_hdr); vid_hdr = NULL; spin_lock(&ubi->wl_lock); - prot_tree_add(ubi, e1, pe, U_PROTECTION); + prot_queue_add(ubi, e1); ubi_assert(!ubi->move_to_put); ubi->move_from = ubi->move_to = NULL; ubi->wl_scheduled = 0; @@ -1075,7 +982,6 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, kfree(wl_wrk); spin_lock(&ubi->wl_lock); - ubi->abs_ec += 1; wl_tree_add(e, &ubi->free); spin_unlock(&ubi->wl_lock); @@ -1083,7 +989,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, * One more erase operation has happened, take care about * protected physical eraseblocks. */ - check_protection_over(ubi); + serve_prot_queue(ubi); /* And take care about wear-leveling */ err = ensure_wear_leveling(ubi); @@ -1220,7 +1126,7 @@ retry: paranoid_check_in_wl_tree(e, &ubi->scrub); rb_erase(&e->u.rb, &ubi->scrub); } else { - err = prot_tree_del(ubi, e->pnum); + err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err("PEB %d not found", pnum); ubi_ro_mode(ubi); @@ -1284,7 +1190,7 @@ retry: } else { int err; - err = prot_tree_del(ubi, e->pnum); + err = prot_queue_del(ubi, e->pnum); if (err) { ubi_err("PEB %d not found", pnum); ubi_ro_mode(ubi); @@ -1315,7 +1221,7 @@ int ubi_wl_flush(struct ubi_device *ubi) int err; /* - * Erase while the pending works queue is not empty, but not more then + * Erase while the pending works queue is not empty, but not more than * the number of currently pending works. */ dbg_wl("flush (%d pending works)", ubi->works_count); @@ -1461,15 +1367,13 @@ static void cancel_pending(struct ubi_device *ubi) */ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) { - int err; + int err, i; struct rb_node *rb1, *rb2; struct ubi_scan_volume *sv; struct ubi_scan_leb *seb, *tmp; struct ubi_wl_entry *e; - ubi->used = ubi->free = ubi->scrub = RB_ROOT; - ubi->prot.pnum = ubi->prot.aec = RB_ROOT; spin_lock_init(&ubi->wl_lock); mutex_init(&ubi->move_mutex); init_rwsem(&ubi->work_sem); @@ -1483,6 +1387,10 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) if (!ubi->lookuptbl) return err; + for (i = 0; i < UBI_PROT_QUEUE_LEN; i++) + INIT_LIST_HEAD(&ubi->pq[i]); + ubi->pq_head = 0; + list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { cond_resched(); @@ -1577,33 +1485,18 @@ out_free: } /** - * protection_trees_destroy - destroy the protection RB-trees. + * protection_queue_destroy - destroy the protection queue. * @ubi: UBI device description object */ -static void protection_trees_destroy(struct ubi_device *ubi) +static void protection_queue_destroy(struct ubi_device *ubi) { - struct rb_node *rb; - struct ubi_wl_prot_entry *pe; + int i; + struct ubi_wl_entry *e, *tmp; - rb = ubi->prot.aec.rb_node; - while (rb) { - if (rb->rb_left) - rb = rb->rb_left; - else if (rb->rb_right) - rb = rb->rb_right; - else { - pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec); - - rb = rb_parent(rb); - if (rb) { - if (rb->rb_left == &pe->rb_aec) - rb->rb_left = NULL; - else - rb->rb_right = NULL; - } - - kmem_cache_free(ubi_wl_entry_slab, pe->e); - kfree(pe); + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) { + list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) { + list_del(&e->u.list); + kmem_cache_free(ubi_wl_entry_slab, e); } } } @@ -1616,7 +1509,7 @@ void ubi_wl_close(struct ubi_device *ubi) { dbg_wl("close the WL sub-system"); cancel_pending(ubi); - protection_trees_destroy(ubi); + protection_queue_destroy(ubi); tree_destroy(&ubi->used); tree_destroy(&ubi->free); tree_destroy(&ubi->scrub); @@ -1686,4 +1579,27 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, return 1; } +/** + * paranoid_check_in_pq - check if wear-leveling entry is in the protection + * queue. + * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * + * This function returns zero if @e is in @ubi->pq and %1 if it is not. + */ +static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) +{ + struct ubi_wl_entry *p; + int i; + + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) + list_for_each_entry(p, &ubi->pq[i], u.list) + if (p == e) + return 0; + + ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue", + e->pnum, e->ec); + ubi_dbg_dump_stack(); + return 1; +} #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ -- cgit v1.2.3