/* -*- mode: c; c-basic-offset: 8; -*- * vim: noexpandtab sw=8 ts=8 sts=0: * * dlmdebug.c * * debug functionality for the dlm * * Copyright (C) 2004 Oracle. All rights reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 021110-1307, USA. * */ #include #include #include #include #include #include #include #include "cluster/heartbeat.h" #include "cluster/nodemanager.h" #include "cluster/tcp.h" #include "dlmapi.h" #include "dlmcommon.h" #include "dlmdomain.h" #include "dlmdebug.h" #define MLOG_MASK_PREFIX ML_DLM #include "cluster/masklog.h" void dlm_print_one_lock_resource(struct dlm_lock_resource *res) { mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", res->lockname.len, res->lockname.name, res->owner, res->state); spin_lock(&res->spinlock); __dlm_print_one_lock_resource(res); spin_unlock(&res->spinlock); } static void dlm_print_lockres_refmap(struct dlm_lock_resource *res) { int bit; assert_spin_locked(&res->spinlock); mlog(ML_NOTICE, " refmap nodes: [ "); bit = 0; while (1) { bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit); if (bit >= O2NM_MAX_NODES) break; printk("%u ", bit); bit++; } printk("], inflight=%u\n", res->inflight_locks); } void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) { struct list_head *iter2; struct dlm_lock *lock; assert_spin_locked(&res->spinlock); mlog(ML_NOTICE, "lockres: %.*s, owner=%u, state=%u\n", res->lockname.len, res->lockname.name, res->owner, res->state); mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n", res->last_used, list_empty(&res->purge) ? "no" : "yes"); dlm_print_lockres_refmap(res); mlog(ML_NOTICE, " granted queue: \n"); list_for_each(iter2, &res->granted) { lock = list_entry(iter2, struct dlm_lock, list); spin_lock(&lock->spinlock); mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", lock->ml.type, lock->ml.convert_type, lock->ml.node, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), list_empty(&lock->ast_list) ? 'y' : 'n', lock->ast_pending ? 'y' : 'n', list_empty(&lock->bast_list) ? 'y' : 'n', lock->bast_pending ? 'y' : 'n'); spin_unlock(&lock->spinlock); } mlog(ML_NOTICE, " converting queue: \n"); list_for_each(iter2, &res->converting) { lock = list_entry(iter2, struct dlm_lock, list); spin_lock(&lock->spinlock); mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", lock->ml.type, lock->ml.convert_type, lock->ml.node, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), list_empty(&lock->ast_list) ? 'y' : 'n', lock->ast_pending ? 'y' : 'n', list_empty(&lock->bast_list) ? 'y' : 'n', lock->bast_pending ? 'y' : 'n'); spin_unlock(&lock->spinlock); } mlog(ML_NOTICE, " blocked queue: \n"); list_for_each(iter2, &res->blocked) { lock = list_entry(iter2, struct dlm_lock, list); spin_lock(&lock->spinlock); mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, " "cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n", lock->ml.type, lock->ml.convert_type, lock->ml.node, dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)), dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)), list_empty(&lock->ast_list) ? 'y' : 'n', lock->ast_pending ? 'y' : 'n', list_empty(&lock->bast_list) ? 'y' : 'n', lock->bast_pending ? 'y' : 'n'); spin_unlock(&lock->spinlock); } } void dlm_print_one_lock(struct dlm_lock *lockid) { dlm_print_one_lock_resource(lockid->lockres); } EXPORT_SYMBOL_GPL(dlm_print_one_lock); #if 0 void dlm_dump_lock_resources(struct dlm_ctxt *dlm) { struct dlm_lock_resource *res; struct hlist_node *iter; struct hlist_head *bucket; int i; mlog(ML_NOTICE, "struct dlm_ctxt: %s, node=%u, key=%u\n", dlm->name, dlm->node_num, dlm->key); if (!dlm || !dlm->name) { mlog(ML_ERROR, "dlm=%p\n", dlm); return; } spin_lock(&dlm->spinlock); for (i=0; ispinlock); } #endif /* 0 */ static const char *dlm_errnames[] = { [DLM_NORMAL] = "DLM_NORMAL", [DLM_GRANTED] = "DLM_GRANTED", [DLM_DENIED] = "DLM_DENIED", [DLM_DENIED_NOLOCKS] = "DLM_DENIED_NOLOCKS", [DLM_WORKING] = "DLM_WORKING", [DLM_BLOCKED] = "DLM_BLOCKED", [DLM_BLOCKED_ORPHAN] = "DLM_BLOCKED_ORPHAN", [DLM_DENIED_GRACE_PERIOD] = "DLM_DENIED_GRACE_PERIOD", [DLM_SYSERR] = "DLM_SYSERR", [DLM_NOSUPPORT] = "DLM_NOSUPPORT", [DLM_CANCELGRANT] = "DLM_CANCELGRANT", [DLM_IVLOCKID] = "DLM_IVLOCKID", [DLM_SYNC] = "DLM_SYNC", [DLM_BADTYPE] = "DLM_BADTYPE", [DLM_BADRESOURCE] = "DLM_BADRESOURCE", [DLM_MAXHANDLES] = "DLM_MAXHANDLES", [DLM_NOCLINFO] = "DLM_NOCLINFO", [DLM_NOLOCKMGR] = "DLM_NOLOCKMGR", [DLM_NOPURGED] = "DLM_NOPURGED", [DLM_BADARGS] = "DLM_BADARGS", [DLM_VOID] = "DLM_VOID", [DLM_NOTQUEUED] = "DLM_NOTQUEUED", [DLM_IVBUFLEN] = "DLM_IVBUFLEN", [DLM_CVTUNGRANT] = "DLM_CVTUNGRANT", [DLM_BADPARAM] = "DLM_BADPARAM", [DLM_VALNOTVALID] = "DLM_VALNOTVALID", [DLM_REJECTED] = "DLM_REJECTED", [DLM_ABORT] = "DLM_ABORT", [DLM_CANCEL] = "DLM_CANCEL", [DLM_IVRESHANDLE] = "DLM_IVRESHANDLE", [DLM_DEADLOCK] = "DLM_DEADLOCK", [DLM_DENIED_NOASTS] = "DLM_DENIED_NOASTS", [DLM_FORWARD] = "DLM_FORWARD", [DLM_TIMEOUT] = "DLM_TIMEOUT", [DLM_IVGROUPID] = "DLM_IVGROUPID", [DLM_VERS_CONFLICT] = "DLM_VERS_CONFLICT", [DLM_BAD_DEVICE_PATH] = "DLM_BAD_DEVICE_PATH", [DLM_NO_DEVICE_PERMISSION] = "DLM_NO_DEVICE_PERMISSION", [DLM_NO_CONTROL_DEVICE ] = "DLM_NO_CONTROL_DEVICE ", [DLM_RECOVERING] = "DLM_RECOVERING", [DLM_MIGRATING] = "DLM_MIGRATING", [DLM_MAXSTATS] = "DLM_MAXSTATS", }; static const char *dlm_errmsgs[] = { [DLM_NORMAL] = "request in progress", [DLM_GRANTED] = "request granted", [DLM_DENIED] = "request denied", [DLM_DENIED_NOLOCKS] = "request denied, out of system resources", [DLM_WORKING] = "async request in progress", [DLM_BLOCKED] = "lock request blocked", [DLM_BLOCKED_ORPHAN] = "lock request blocked by a orphan lock", [DLM_DENIED_GRACE_PERIOD] = "topological change in progress", [DLM_SYSERR] = "system error", [DLM_NOSUPPORT] = "unsupported", [DLM_CANCELGRANT] = "can't cancel convert: already granted", [DLM_IVLOCKID] = "bad lockid", [DLM_SYNC] = "synchronous request granted", [DLM_BADTYPE] = "bad resource type", [DLM_BADRESOURCE] = "bad resource handle", [DLM_MAXHANDLES] = "no more resource handles", [DLM_NOCLINFO] = "can't contact cluster manager", [DLM_NOLOCKMGR] = "can't contact lock manager", [DLM_NOPURGED] = "can't contact purge daemon", [DLM_BADARGS] = "bad api args", [DLM_VOID] = "no status", [DLM_NOTQUEUED] = "NOQUEUE was specified and request failed", [DLM_IVBUFLEN] = "invalid resource name length", [DLM_CVTUNGRANT] = "attempted to convert ungranted lock", [DLM_BADPARAM] = "invalid lock mode specified", [DLM_VALNOTVALID] = "value block has been invalidated", [DLM_REJECTED] = "request rejected, unrecognized client", [DLM_ABORT] = "blocked lock request cancelled", [DLM_CANCEL] = "conversion request cancelled", [DLM_IVRESHANDLE] = "invalid resource handle", [DLM_DEADLOCK] = "deadlock recovery refused this request", [DLM_DENIED_NOASTS] = "failed to allocate AST", [DLM_FORWARD] = "request must wait for primary's response", [DLM_TIMEOUT] = "timeout value for lock has expired", [DLM_IVGROUPID] = "invalid group specification", [DLM_VERS_CONFLICT] = "version conflicts prevent request handling", [DLM_BAD_DEVICE_PATH] = "Locks device does not exist or path wrong", [DLM_NO_DEVICE_PERMISSION] = "Client has insufficient perms for device", [DLM_NO_CONTROL_DEVICE] = "Cannot set options on opened device ", [DLM_RECOVERING] = "lock resource being recovered", [DLM_MIGRATING] = "lock resource being migrated", [DLM_MAXSTATS] = "invalid error number", }; const char *dlm_errmsg(enum dlm_status err) { if (err >= DLM_MAXSTATS || err < 0) return dlm_errmsgs[DLM_MAXSTATS]; return dlm_errmsgs[err]; } EXPORT_SYMBOL_GPL(dlm_errmsg); const char *dlm_errname(enum dlm_status err) { if (err >= DLM_MAXSTATS || err < 0) return dlm_errnames[DLM_MAXSTATS]; return dlm_errnames[err]; } EXPORT_SYMBOL_GPL(dlm_errname); #ifdef CONFIG_DEBUG_FS static struct dentry *dlm_debugfs_root = NULL; #define DLM_DEBUGFS_DIR "o2dlm" #define DLM_DEBUGFS_DLM_STATE "dlm_state" /* begin - utils funcs */ static void dlm_debug_free(struct kref *kref) { struct dlm_debug_ctxt *dc; dc = container_of(kref, struct dlm_debug_ctxt, debug_refcnt); kfree(dc); } void dlm_debug_put(struct dlm_debug_ctxt *dc) { if (dc) kref_put(&dc->debug_refcnt, dlm_debug_free); } static void dlm_debug_get(struct dlm_debug_ctxt *dc) { kref_get(&dc->debug_refcnt); } static int stringify_nodemap(unsigned long *nodemap, int maxnodes, char *buf, int len) { int out = 0; int i = -1; while ((i = find_next_bit(nodemap, maxnodes, i + 1)) < maxnodes) out += snprintf(buf + out, len - out, "%d ", i); return out; } static struct debug_buffer *debug_buffer_allocate(void) { struct debug_buffer *db = NULL; db = kzalloc(sizeof(struct debug_buffer), GFP_KERNEL); if (!db) goto bail; db->len = PAGE_SIZE; db->buf = kmalloc(db->len, GFP_KERNEL); if (!db->buf) goto bail; return db; bail: kfree(db); return NULL; } static ssize_t debug_buffer_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { struct debug_buffer *db = file->private_data; return simple_read_from_buffer(buf, nbytes, ppos, db->buf, db->len); } static loff_t debug_buffer_llseek(struct file *file, loff_t off, int whence) { struct debug_buffer *db = file->private_data; loff_t new = -1; switch (whence) { case 0: new = off; break; case 1: new = file->f_pos + off; break; } if (new < 0 || new > db->len) return -EINVAL; return (file->f_pos = new); } static int debug_buffer_release(struct inode *inode, struct file *file) { struct debug_buffer *db = (struct debug_buffer *)file->private_data; if (db) kfree(db->buf); kfree(db); return 0; } /* end - util funcs */ /* begin - debug state funcs */ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) { int out = 0; struct dlm_reco_node_data *node; char *state; int lres, rres, ures, tres; lres = atomic_read(&dlm->local_resources); rres = atomic_read(&dlm->remote_resources); ures = atomic_read(&dlm->unknown_resources); tres = lres + rres + ures; spin_lock(&dlm->spinlock); switch (dlm->dlm_state) { case DLM_CTXT_NEW: state = "NEW"; break; case DLM_CTXT_JOINED: state = "JOINED"; break; case DLM_CTXT_IN_SHUTDOWN: state = "SHUTDOWN"; break; case DLM_CTXT_LEAVING: state = "LEAVING"; break; default: state = "UNKNOWN"; break; } /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ out += snprintf(db->buf + out, db->len - out, "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); /* Thread Pid: xxx Node: xxx State: xxxxx */ out += snprintf(db->buf + out, db->len - out, "Thread Pid: %d Node: %d State: %s\n", dlm->dlm_thread_task->pid, dlm->node_num, state); /* Number of Joins: xxx Joining Node: xxx */ out += snprintf(db->buf + out, db->len - out, "Number of Joins: %d Joining Node: %d\n", dlm->num_joins, dlm->joining_node); /* Domain Map: xx xx xx */ out += snprintf(db->buf + out, db->len - out, "Domain Map: "); out += stringify_nodemap(dlm->domain_map, O2NM_MAX_NODES, db->buf + out, db->len - out); out += snprintf(db->buf + out, db->len - out, "\n"); /* Live Map: xx xx xx */ out += snprintf(db->buf + out, db->len - out, "Live Map: "); out += stringify_nodemap(dlm->live_nodes_map, O2NM_MAX_NODES, db->buf + out, db->len - out); out += snprintf(db->buf + out, db->len - out, "\n"); /* Mastered Resources Total: xxx Locally: xxx Remotely: ... */ out += snprintf(db->buf + out, db->len - out, "Mastered Resources Total: %d Locally: %d " "Remotely: %d Unknown: %d\n", tres, lres, rres, ures); /* Lists: Dirty=Empty Purge=InUse PendingASTs=Empty ... */ out += snprintf(db->buf + out, db->len - out, "Lists: Dirty=%s Purge=%s PendingASTs=%s " "PendingBASTs=%s Master=%s\n", (list_empty(&dlm->dirty_list) ? "Empty" : "InUse"), (list_empty(&dlm->purge_list) ? "Empty" : "InUse"), (list_empty(&dlm->pending_asts) ? "Empty" : "InUse"), (list_empty(&dlm->pending_basts) ? "Empty" : "InUse"), (list_empty(&dlm->master_list) ? "Empty" : "InUse")); /* Purge Count: xxx Refs: xxx */ out += snprintf(db->buf + out, db->len - out, "Purge Count: %d Refs: %d\n", dlm->purge_count, atomic_read(&dlm->dlm_refs.refcount)); /* Dead Node: xxx */ out += snprintf(db->buf + out, db->len - out, "Dead Node: %d\n", dlm->reco.dead_node); /* What about DLM_RECO_STATE_FINALIZE? */ if (dlm->reco.state == DLM_RECO_STATE_ACTIVE) state = "ACTIVE"; else state = "INACTIVE"; /* Recovery Pid: xxxx Master: xxx State: xxxx */ out += snprintf(db->buf + out, db->len - out, "Recovery Pid: %d Master: %d State: %s\n", dlm->dlm_reco_thread_task->pid, dlm->reco.new_master, state); /* Recovery Map: xx xx */ out += snprintf(db->buf + out, db->len - out, "Recovery Map: "); out += stringify_nodemap(dlm->recovery_map, O2NM_MAX_NODES, db->buf + out, db->len - out); out += snprintf(db->buf + out, db->len - out, "\n"); /* Recovery Node State: */ out += snprintf(db->buf + out, db->len - out, "Recovery Node State:\n"); list_for_each_entry(node, &dlm->reco.node_data, list) { switch (node->state) { case DLM_RECO_NODE_DATA_INIT: state = "INIT"; break; case DLM_RECO_NODE_DATA_REQUESTING: state = "REQUESTING"; break; case DLM_RECO_NODE_DATA_DEAD: state = "DEAD"; break; case DLM_RECO_NODE_DATA_RECEIVING: state = "RECEIVING"; break; case DLM_RECO_NODE_DATA_REQUESTED: state = "REQUESTED"; break; case DLM_RECO_NODE_DATA_DONE: state = "DONE"; break; case DLM_RECO_NODE_DATA_FINALIZE_SENT: state = "FINALIZE-SENT"; break; default: state = "BAD"; break; } out += snprintf(db->buf + out, db->len - out, "\t%u - %s\n", node->node_num, state); } spin_unlock(&dlm->spinlock); return out; } static int debug_state_open(struct inode *inode, struct file *file) { struct dlm_ctxt *dlm = inode->i_private; struct debug_buffer *db = NULL; db = debug_buffer_allocate(); if (!db) goto bail; db->len = debug_state_print(dlm, db); file->private_data = db; return 0; bail: return -ENOMEM; } static struct file_operations debug_state_fops = { .open = debug_state_open, .release = debug_buffer_release, .read = debug_buffer_read, .llseek = debug_buffer_llseek, }; /* end - debug state funcs */ /* files in subroot */ int dlm_debug_init(struct dlm_ctxt *dlm) { struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; /* for dumping dlm_ctxt */ dc->debug_state_dentry = debugfs_create_file(DLM_DEBUGFS_DLM_STATE, S_IFREG|S_IRUSR, dlm->dlm_debugfs_subroot, dlm, &debug_state_fops); if (!dc->debug_state_dentry) { mlog_errno(-ENOMEM); goto bail; } dlm_debug_get(dc); return 0; bail: dlm_debug_shutdown(dlm); return -ENOMEM; } void dlm_debug_shutdown(struct dlm_ctxt *dlm) { struct dlm_debug_ctxt *dc = dlm->dlm_debug_ctxt; if (dc) { if (dc->debug_state_dentry) debugfs_remove(dc->debug_state_dentry); dlm_debug_put(dc); } } /* subroot - domain dir */ int dlm_create_debugfs_subroot(struct dlm_ctxt *dlm) { dlm->dlm_debugfs_subroot = debugfs_create_dir(dlm->name, dlm_debugfs_root); if (!dlm->dlm_debugfs_subroot) { mlog_errno(-ENOMEM); goto bail; } dlm->dlm_debug_ctxt = kzalloc(sizeof(struct dlm_debug_ctxt), GFP_KERNEL); if (!dlm->dlm_debug_ctxt) { mlog_errno(-ENOMEM); goto bail; } kref_init(&dlm->dlm_debug_ctxt->debug_refcnt); return 0; bail: dlm_destroy_debugfs_subroot(dlm); return -ENOMEM; } void dlm_destroy_debugfs_subroot(struct dlm_ctxt *dlm) { if (dlm->dlm_debugfs_subroot) debugfs_remove(dlm->dlm_debugfs_subroot); } /* debugfs root */ int dlm_create_debugfs_root(void) { dlm_debugfs_root = debugfs_create_dir(DLM_DEBUGFS_DIR, NULL); if (!dlm_debugfs_root) { mlog_errno(-ENOMEM); return -ENOMEM; } return 0; } void dlm_destroy_debugfs_root(void) { if (dlm_debugfs_root) debugfs_remove(dlm_debugfs_root); } #endif /* CONFIG_DEBUG_FS */