From 2bd632165c1f783888bd4cbed95f2f304829159b Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 16:57:38 -0800 Subject: ocfs2/trivial: Remove trailing whitespaces Patch removes trailing whitespaces. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmapi.h | 2 +- fs/ocfs2/dlm/dlmast.c | 2 +- fs/ocfs2/dlm/dlmconvert.c | 2 +- fs/ocfs2/dlm/dlmdomain.c | 2 +- fs/ocfs2/dlm/dlmlock.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 38 +++++++++++++++++++------------------- fs/ocfs2/dlm/dlmrecovery.c | 38 +++++++++++++++++++------------------- fs/ocfs2/dlm/dlmunlock.c | 8 ++++---- 8 files changed, 47 insertions(+), 47 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index b5786a787fa..3cfa114aa39 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h @@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err); mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \ } while (0) -#define DLM_LKSB_UNUSED1 0x01 +#define DLM_LKSB_UNUSED1 0x01 #define DLM_LKSB_PUT_LVB 0x02 #define DLM_LKSB_GET_LVB 0x04 #define DLM_LKSB_UNUSED2 0x08 diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 01cf8cc3d28..dccc439fa08 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) dlm_lock_put(lock); /* free up the reserved bast that we are cancelling. * guaranteed that this will not be the last reserved - * ast because *both* an ast and a bast were reserved + * ast because *both* an ast and a bast were reserved * to get to this point. the res->spinlock will not be * taken here */ dlm_lockres_release_ast(dlm, res); diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index ca96bce50e1..f283bce776b 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm, /* instead of logging the same network error over * and over, sleep here and wait for the heartbeat * to notice the node is dead. times out after 5s. */ - dlm_wait_for_node_death(dlm, res->owner, + dlm_wait_for_node_death(dlm, res->owner, DLM_NODE_DEATH_WAIT_MAX); ret = DLM_RECOVERING; mlog(0, "node %u died so returning DLM_RECOVERING " diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 0334000676d..988c9055fd4 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data, } /* Once the dlm ctxt is marked as leaving then we don't want - * to be put in someone's domain map. + * to be put in someone's domain map. * Also, explicitly disallow joining at certain troublesome * times (ie. during recovery). */ if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) { diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 437698e9465..73333777267 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm, } dlm_revert_pending_lock(res, lock); dlm_lock_put(lock); - } else if (dlm_is_recovery_lock(res->lockname.name, + } else if (dlm_is_recovery_lock(res->lockname.name, res->lockname.len)) { /* special case for the $RECOVERY lock. * there will never be an AST delivered to put diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 03ccf9a7b1f..a659606dcb9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up) struct dlm_master_list_entry *mle; assert_spin_locked(&dlm->spinlock); - + list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) { if (node_up) dlm_mle_node_up(dlm, mle, NULL, idx); @@ -833,7 +833,7 @@ lookup: __dlm_insert_mle(dlm, mle); /* still holding the dlm spinlock, check the recovery map - * to see if there are any nodes that still need to be + * to see if there are any nodes that still need to be * considered. these will not appear in the mle nodemap * but they might own this lockres. wait on them. */ bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); @@ -883,7 +883,7 @@ redo_request: msleep(500); } continue; - } + } dlm_kick_recovery_thread(dlm); msleep(1000); @@ -939,8 +939,8 @@ wait: res->lockname.name, blocked); if (++tries > 20) { mlog(ML_ERROR, "%s:%.*s: spinning on " - "dlm_wait_for_lock_mastery, blocked=%d\n", - dlm->name, res->lockname.len, + "dlm_wait_for_lock_mastery, blocked=%d\n", + dlm->name, res->lockname.len, res->lockname.name, blocked); dlm_print_one_lock_resource(res); dlm_print_one_mle(mle); @@ -1029,7 +1029,7 @@ recheck: ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked); b = (mle->type == DLM_MLE_BLOCK); if ((*blocked && !b) || (!*blocked && b)) { - mlog(0, "%s:%.*s: status change: old=%d new=%d\n", + mlog(0, "%s:%.*s: status change: old=%d new=%d\n", dlm->name, res->lockname.len, res->lockname.name, *blocked, b); *blocked = b; @@ -1602,7 +1602,7 @@ send_response: } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); - ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, + ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, DLM_ASSERT_MASTER_MLE_CLEANUP); if (ret < 0) { mlog(ML_ERROR, "failed to dispatch assert master work\n"); @@ -1701,7 +1701,7 @@ again: if (r & DLM_ASSERT_RESPONSE_REASSERT) { mlog(0, "%.*s: node %u create mles on other " - "nodes and requests a re-assert\n", + "nodes and requests a re-assert\n", namelen, lockname, to); reassert = 1; } @@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&dlm->master_lock); spin_unlock(&dlm->spinlock); goto done; - } + } } } spin_unlock(&dlm->master_lock); @@ -1883,7 +1883,7 @@ ok: int extra_ref = 0; int nn = -1; int rr, err = 0; - + spin_lock(&mle->spinlock); if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION) extra_ref = 1; @@ -1891,7 +1891,7 @@ ok: /* MASTER mle: if any bits set in the response map * then the calling node needs to re-assert to clear * up nodes that this node contacted */ - while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, + while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES, nn+1)) < O2NM_MAX_NODES) { if (nn != dlm->node_num && nn != assert->node_idx) master_request = 1; @@ -2002,7 +2002,7 @@ kill: __dlm_print_one_lock_resource(res); spin_unlock(&res->spinlock); spin_unlock(&dlm->spinlock); - *ret_data = (void *)res; + *ret_data = (void *)res; dlm_put(dlm); return -EINVAL; } @@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm, item->u.am.request_from = request_from; item->u.am.flags = flags; - if (ignore_higher) - mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, + if (ignore_higher) + mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len, res->lockname.name); - + spin_lock(&dlm->work_lock); list_add_tail(&item->list, &dlm->work_list); spin_unlock(&dlm->work_lock); @@ -2133,7 +2133,7 @@ put: * think that $RECOVERY is currently mastered by a dead node. If so, * we wait a short time to allow that node to get notified by its own * heartbeat stack, then check again. All $RECOVERY lock resources - * mastered by dead nodes are purged when the hearbeat callback is + * mastered by dead nodes are purged when the hearbeat callback is * fired, so we can know for sure that it is safe to continue once * the node returns a live node or no node. */ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, @@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm, ret = -EAGAIN; } spin_unlock(&dlm->spinlock); - mlog(0, "%s: reco lock master is %u\n", dlm->name, + mlog(0, "%s: reco lock master is %u\n", dlm->name, master); break; } @@ -2602,7 +2602,7 @@ fail: mlog(0, "%s:%.*s: timed out during migration\n", dlm->name, res->lockname.len, res->lockname.name); - /* avoid hang during shutdown when migrating lockres + /* avoid hang during shutdown when migrating lockres * to a node which also goes down */ if (dlm_is_node_dead(dlm, target)) { mlog(0, "%s:%.*s: expected migration " @@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm, can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING); spin_unlock(&res->spinlock); - /* target has died, so make the caller break out of the + /* target has died, so make the caller break out of the * wait_event, but caller must recheck the domain_map */ spin_lock(&dlm->spinlock); if (!test_bit(mig_target, dlm->domain_map)) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 2f9e4e19a4f..57736d3ea7b 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm, if (lock->ml.node == dead_node) { mlog(0, "AHA! there was " "a $RECOVERY lock for dead " - "node %u (%s)!\n", + "node %u (%s)!\n", dead_node, dlm->name); list_del_init(&lock->list); dlm_lock_put(lock); @@ -1839,7 +1839,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, * the lvb. */ memcpy(res->lvb, mres->lvb, DLM_LVB_LEN); } else { - /* otherwise, the node is sending its + /* otherwise, the node is sending its * most recent valid lvb info */ BUG_ON(ml->type != LKM_EXMODE && ml->type != LKM_PRMODE); @@ -2114,7 +2114,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm, assert_spin_locked(&res->spinlock); if (res->owner == dlm->node_num) - /* if this node owned the lockres, and if the dead node + /* if this node owned the lockres, and if the dead node * had an EX when he died, blank out the lvb */ search_node = dead_node; else { @@ -2152,7 +2152,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, /* this node is the lockres master: * 1) remove any stale locks for the dead node - * 2) if the dead node had an EX when he died, blank out the lvb + * 2) if the dead node had an EX when he died, blank out the lvb */ assert_spin_locked(&dlm->spinlock); assert_spin_locked(&res->spinlock); @@ -2260,7 +2260,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) } spin_unlock(&res->spinlock); continue; - } + } spin_lock(&res->spinlock); /* zero the lvb if necessary */ dlm_revalidate_lvb(dlm, res, dead_node); @@ -2411,7 +2411,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st) * this function on each node racing to become the recovery * master will not stop attempting this until either: * a) this node gets the EX (and becomes the recovery master), - * or b) dlm->reco.new_master gets set to some nodenum + * or b) dlm->reco.new_master gets set to some nodenum * != O2NM_INVALID_NODE_NUM (another node will do the reco). * so each time a recovery master is needed, the entire cluster * will sync at this point. if the new master dies, that will @@ -2424,7 +2424,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm) mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n", dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num); -again: +again: memset(&lksb, 0, sizeof(lksb)); ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY, @@ -2437,8 +2437,8 @@ again: if (ret == DLM_NORMAL) { mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n", dlm->name, dlm->node_num); - - /* got the EX lock. check to see if another node + + /* got the EX lock. check to see if another node * just became the reco master */ if (dlm_reco_master_ready(dlm)) { mlog(0, "%s: got reco EX lock, but %u will " @@ -2451,12 +2451,12 @@ again: /* see if recovery was already finished elsewhere */ spin_lock(&dlm->spinlock); if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { - status = -EINVAL; + status = -EINVAL; mlog(0, "%s: got reco EX lock, but " "node got recovered already\n", dlm->name); if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) { mlog(ML_ERROR, "%s: new master is %u " - "but no dead node!\n", + "but no dead node!\n", dlm->name, dlm->reco.new_master); BUG(); } @@ -2468,7 +2468,7 @@ again: * set the master and send the messages to begin recovery */ if (!status) { mlog(0, "%s: dead=%u, this=%u, sending " - "begin_reco now\n", dlm->name, + "begin_reco now\n", dlm->name, dlm->reco.dead_node, dlm->node_num); status = dlm_send_begin_reco_message(dlm, dlm->reco.dead_node); @@ -2501,7 +2501,7 @@ again: mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n", dlm->name, dlm->node_num); /* another node is master. wait on - * reco.new_master != O2NM_INVALID_NODE_NUM + * reco.new_master != O2NM_INVALID_NODE_NUM * for at most one second */ wait_event_timeout(dlm->dlm_reco_thread_wq, dlm_reco_master_ready(dlm), @@ -2599,7 +2599,7 @@ retry: } if (ret < 0) { struct dlm_lock_resource *res; - /* this is now a serious problem, possibly ENOMEM + /* this is now a serious problem, possibly ENOMEM * in the network stack. must retry */ mlog_errno(ret); mlog(ML_ERROR, "begin reco of dlm %s to node %u " @@ -2612,7 +2612,7 @@ retry: } else { mlog(ML_ERROR, "recovery lock not found\n"); } - /* sleep for a bit in hopes that we can avoid + /* sleep for a bit in hopes that we can avoid * another ENOMEM */ msleep(100); goto retry; @@ -2664,7 +2664,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data, } if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) { mlog(ML_NOTICE, "%s: dead_node previously set to %u, " - "node %u changing it to %u\n", dlm->name, + "node %u changing it to %u\n", dlm->name, dlm->reco.dead_node, br->node_idx, br->dead_node); } dlm_set_reco_master(dlm, br->node_idx); @@ -2730,8 +2730,8 @@ stage2: if (ret < 0) { mlog_errno(ret); if (dlm_is_host_down(ret)) { - /* this has no effect on this recovery - * session, so set the status to zero to + /* this has no effect on this recovery + * session, so set the status to zero to * finish out the last recovery */ mlog(ML_ERROR, "node %u went down after this " "node finished recovery.\n", nodenum); @@ -2768,7 +2768,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, mlog(0, "%s: node %u finalizing recovery stage%d of " "node %u (%u:%u)\n", dlm->name, fr->node_idx, stage, fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master); - + spin_lock(&dlm->spinlock); if (dlm->reco.new_master != fr->node_idx) { diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 00f53b2aea7..49e29ecd020 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, actions &= ~(DLM_UNLOCK_REMOVE_LOCK| DLM_UNLOCK_REGRANT_LOCK| DLM_UNLOCK_CLEAR_CONVERT_TYPE); - } else if (status == DLM_RECOVERING || - status == DLM_MIGRATING || + } else if (status == DLM_RECOVERING || + status == DLM_MIGRATING || status == DLM_FORWARD) { /* must clear the actions because this unlock * is about to be retried. cannot free or do @@ -661,14 +661,14 @@ retry: if (call_ast) { mlog(0, "calling unlockast(%p, %d)\n", data, status); if (is_master) { - /* it is possible that there is one last bast + /* it is possible that there is one last bast * pending. make sure it is flushed, then * call the unlockast. * not an issue if this is a mastered remotely, * since this lock has been removed from the * lockres queues and cannot be found. */ dlm_kick_thread(dlm, NULL); - wait_event(dlm->ast_wq, + wait_event(dlm->ast_wq, dlm_lock_basts_flushed(dlm, lock)); } (*unlockast)(data, status); -- cgit v1.2.3 From 71656fa6ec10473eb9b646c10a2173fdea2f83c9 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 16:57:39 -0800 Subject: ocfs2/dlm: Ignore LVBs of locks in the Blocked list During lock resource migration, o2dlm fills the packet with a LVB from the first valid lock. For sanity, it ensures that the other valid locks have the same LVB. If not, it BUGs. The valid locks are ones that have granted EX or PR lock levels and are either on the Granted or Converting lists. Locks in the Blocked list cannot have a valid LVB. This patch ensures that we skip the locks in the Blocked list. Fixes oss bugzilla#1202 http://oss.oracle.com/bugzilla/show_bug.cgi?id=1202 Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 48 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 14 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 57736d3ea7b..9d67894cda6 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres, mres->master = master; } +static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock, + struct dlm_migratable_lockres *mres, + int queue) +{ + if (!lock->lksb) + return; + + /* Ignore lvb in all locks in the blocked list */ + if (queue == DLM_BLOCKED_LIST) + return; + + /* Only consider lvbs in locks with granted EX or PR lock levels */ + if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE) + return; + + if (dlm_lvb_is_empty(mres->lvb)) { + memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); + return; + } + + /* Ensure the lvb copied for migration matches in other valid locks */ + if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN)) + return; + + mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, " + "node=%u\n", + dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), + dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), + lock->lockres->lockname.len, lock->lockres->lockname.name, + lock->ml.node); + dlm_print_one_lock_resource(lock->lockres); + BUG(); +} /* returns 1 if this lock fills the network structure, * 0 otherwise */ @@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock, ml->list = queue; if (lock->lksb) { ml->flags = lock->lksb->flags; - /* send our current lvb */ - if (ml->type == LKM_EXMODE || - ml->type == LKM_PRMODE) { - /* if it is already set, this had better be a PR - * and it has to match */ - if (!dlm_lvb_is_empty(mres->lvb) && - (ml->type == LKM_EXMODE || - memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) { - mlog(ML_ERROR, "mismatched lvbs!\n"); - dlm_print_one_lock_resource(lock->lockres); - BUG(); - } - memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN); - } + dlm_prepare_lvb_for_migration(lock, mres, queue); } ml->node = lock->ml.node; mres->num_locks++; -- cgit v1.2.3 From 26636bf6b2010aa84c54d577231e017ba71493d0 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 16:57:40 -0800 Subject: ocfs2/dlm: Print more messages during lock migration When a lock resource is migrated, the dlm compares the migrated locks with that that was already existing on the new node. If the comparison fails, it BUGs. This patch prints more messages when the comparison fails inorder to help with the root cause analyis. http://oss.oracle.com/bugzilla/show_bug.cgi?id=1206 This does not fix bz1206. However, if we run into it again, we will have more information to chew on. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9d67894cda6..cfb2ae9ab53 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1750,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, struct dlm_lock *lock = NULL; u8 from = O2NM_MAX_NODES; unsigned int added = 0; + __be64 c; mlog(0, "running %d locks for this lockres\n", mres->num_locks); for (i=0; inum_locks; i++) { @@ -1797,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, /* lock is always created locally first, and * destroyed locally last. it must be on the list */ if (!lock) { - __be64 c = ml->cookie; - mlog(ML_ERROR, "could not find local lock " - "with cookie %u:%llu!\n", + c = ml->cookie; + mlog(ML_ERROR, "Could not find local lock " + "with cookie %u:%llu, node %u, " + "list %u, flags 0x%x, type %d, " + "conv %d, highest blocked %d\n", dlm_get_lock_cookie_node(be64_to_cpu(c)), - dlm_get_lock_cookie_seq(be64_to_cpu(c))); + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + ml->node, ml->list, ml->flags, ml->type, + ml->convert_type, ml->highest_blocked); + __dlm_print_one_lock_resource(res); + BUG(); + } + + if (lock->ml.node != ml->node) { + c = lock->ml.cookie; + mlog(ML_ERROR, "Mismatched node# in lock " + "cookie %u:%llu, name %.*s, node %u\n", + dlm_get_lock_cookie_node(be64_to_cpu(c)), + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + res->lockname.len, res->lockname.name, + lock->ml.node); + c = ml->cookie; + mlog(ML_ERROR, "Migrate lock cookie %u:%llu, " + "node %u, list %u, flags 0x%x, type %d, " + "conv %d, highest blocked %d\n", + dlm_get_lock_cookie_node(be64_to_cpu(c)), + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + ml->node, ml->list, ml->flags, ml->type, + ml->convert_type, ml->highest_blocked); __dlm_print_one_lock_resource(res); BUG(); } - BUG_ON(lock->ml.node != ml->node); if (tmpq != queue) { - mlog(0, "lock was on %u instead of %u for %.*s\n", - j, ml->list, res->lockname.len, res->lockname.name); + c = ml->cookie; + mlog(0, "Lock cookie %u:%llu was on list %u " + "instead of list %u for %.*s\n", + dlm_get_lock_cookie_node(be64_to_cpu(c)), + dlm_get_lock_cookie_seq(be64_to_cpu(c)), + j, ml->list, res->lockname.len, + res->lockname.name); + __dlm_print_one_lock_resource(res); spin_unlock(&res->spinlock); continue; } @@ -1906,7 +1936,7 @@ skip_lvb: spin_lock(&res->spinlock); list_for_each_entry(lock, queue, list) { if (lock->ml.cookie == ml->cookie) { - __be64 c = lock->ml.cookie; + c = lock->ml.cookie; mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already " "exists on this lockres!\n", dlm->name, res->lockname.len, res->lockname.name, -- cgit v1.2.3 From cd34edd8cf80b507bb84b3f0c2988fe05099ffb5 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 25 Jan 2010 17:58:30 -0800 Subject: ocfs2/dlm: Handle EAGAIN for compatibility - v2 Mainline commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8 made the dlm_begin_reco_handler() return -EAGAIN instead of EAGAIN. As this error is transmitted over the wire, we want the receiver, dlm_send_begin_reco_message(), to understand both the older EAGAIN and the newer -EAGAIN, to allow rolling upgrade of the cluster nodes. Signed-off-by: Sunil Mushran Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index cfb2ae9ab53..ad712211d4e 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2639,7 +2639,13 @@ retry: "begin reco msg (%d)\n", dlm->name, nodenum, ret); ret = 0; } - if (ret == -EAGAIN) { + + /* + * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8, + * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN. + * We are handling both for compatibility reasons. + */ + if (ret == -EAGAIN || ret == EAGAIN) { mlog(0, "%s: trying to start recovery of node " "%u, but node %u is waiting for last recovery " "to complete, backoff for a bit\n", dlm->name, -- cgit v1.2.3 From cda70ba8c05a8661f882862c4699a31d215ab151 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Mon, 1 Feb 2010 17:34:58 -0800 Subject: ocfs2/dlm: Remove BUG_ON in dlm recovery when freeing locks of a dead node During recovery, the dlm frees the locks for the dead node. If it finds a lock in a resource for the dead node, it expects that node to also have a ref in that lock resource. If not, it BUGs. ossbz#1175 was filed with the above BUG. Now, while it is correct that we should be expecting the ref, I see no reason why we have to BUG. After all, we are freeing up the lock and clearing the ref. This patch replaces the BUG_ON with a printk(). Hopefully, that will give us more clues next time this happens. http://oss.oracle.com/bugzilla/show_bug.cgi?id=1175 Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index ad712211d4e..344bcf90cbf 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2243,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: freed %u locks for dead node %u, " "dropping ref from lockres\n", dlm->name, res->lockname.len, res->lockname.name, freed, dead_node); - BUG_ON(!test_bit(dead_node, res->refmap)); + if(!test_bit(dead_node, res->refmap)) { + mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, " + "but ref was not set\n", dlm->name, + res->lockname.len, res->lockname.name, freed, dead_node); + __dlm_print_one_lock_resource(res); + } dlm_lockres_clear_refmap_bit(dead_node, res); } else if (test_bit(dead_node, res->refmap)) { mlog(0, "%s:%.*s: dead node %u had a ref, but had " -- cgit v1.2.3 From 86a06abab0ffbb9d8ce2b7f6b6652412ce2d2c36 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 5 Feb 2010 17:55:56 -0800 Subject: ocfs2/dlm: Fix printing of lockname The debug call printing the name of the lock resource was chopping off the last character. This patch fixes the problem. Signed-off-by: Sunil Mushran Acked-by: Mark Fasheh Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmdebug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 42b0bad7a61..0cd24cf5439 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) assert_spin_locked(&res->spinlock); stringify_lockname(res->lockname.name, res->lockname.len, - buf, sizeof(buf) - 1); + buf, sizeof(buf)); printk("lockres: %s, owner=%u, state=%u\n", buf, res->owner, res->state); printk(" last used: %lu, refcnt: %u, on purge list: %s\n", -- cgit v1.2.3