/* * kvm trace * * It is designed to allow debugging traces of kvm to be generated * on UP / SMP machines. Each trace entry can be timestamped so that * it's possible to reconstruct a chronological record of trace events. * The implementation refers to blktrace kernel support. * * Copyright (c) 2008 Intel Corporation * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> * * Authors: Feng(Eric) Liu, eric.e.liu@intel.com * * Date: Feb 2008 */ #include <linux/module.h> #include <linux/relay.h> #include <linux/debugfs.h> #include <linux/ktime.h> #include <linux/kvm_host.h> #define KVM_TRACE_STATE_RUNNING (1 << 0) #define KVM_TRACE_STATE_PAUSE (1 << 1) #define KVM_TRACE_STATE_CLEARUP (1 << 2) struct kvm_trace { int trace_state; struct rchan *rchan; struct dentry *lost_file; atomic_t lost_records; }; static struct kvm_trace *kvm_trace; struct kvm_trace_probe { const char *name; const char *format; u32 timestamp_in; marker_probe_func *probe_func; }; static inline int calc_rec_size(int timestamp, int extra) { int rec_size = KVM_TRC_HEAD_SIZE; rec_size += extra; return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; } static void kvm_add_trace(void *probe_private, void *call_data, const char *format, va_list *args) { struct kvm_trace_probe *p = probe_private; struct kvm_trace *kt = kvm_trace; struct kvm_trace_rec rec; struct kvm_vcpu *vcpu; int i, size; u32 extra; if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) return; rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); vcpu = va_arg(*args, struct kvm_vcpu *); rec.pid = current->tgid; rec.vcpu_id = vcpu->vcpu_id; extra = va_arg(*args, u32); WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) | TRACE_REC_NUM_DATA_ARGS(extra); if (p->timestamp_in) { rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); for (i = 0; i < extra; i++) rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); } else { for (i = 0; i < extra; i++) rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); } size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); relay_write(kt->rchan, &rec, size); } static struct kvm_trace_probe kvm_trace_probes[] = { { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, }; static int lost_records_get(void *data, u64 *val) { struct kvm_trace *kt = data; *val = atomic_read(&kt->lost_records); return 0; } DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); /* * The relay channel is used in "no-overwrite" mode, it keeps trace of how * many times we encountered a full subbuffer, to tell user space app the * lost records there were. */ static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { struct kvm_trace *kt; if (!relay_buf_full(buf)) { if (!prev_subbuf) { /* * executed only once when the channel is opened * save metadata as first record */ subbuf_start_reserve(buf, sizeof(u32)); *(u32 *)subbuf = 0x12345678; } return 1; } kt = buf->chan->private_data; atomic_inc(&kt->lost_records); return 0; } static struct dentry *kvm_create_buf_file_callack(const char *filename, struct dentry *parent, int mode, struct rchan_buf *buf, int *is_global) { return debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); } static int kvm_remove_buf_file_callback(struct dentry *dentry) { debugfs_remove(dentry); return 0; } static struct rchan_callbacks kvm_relay_callbacks = { .subbuf_start = kvm_subbuf_start_callback, .create_buf_file = kvm_create_buf_file_callack, .remove_buf_file = kvm_remove_buf_file_callback, }; static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) { struct kvm_trace *kt; int i, r = -ENOMEM; if (!kuts->buf_size || !kuts->buf_nr) return -EINVAL; kt = kzalloc(sizeof(*kt), GFP_KERNEL); if (!kt) goto err; r = -EIO; atomic_set(&kt->lost_records, 0); kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, kt, &kvm_trace_lost_ops); if (!kt->lost_file) goto err; kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, kuts->buf_nr, &kvm_relay_callbacks, kt); if (!kt->rchan) goto err; kvm_trace = kt; for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { struct kvm_trace_probe *p = &kvm_trace_probes[i]; r = marker_probe_register(p->name, p->format, p->probe_func, p); if (r) printk(KERN_INFO "Unable to register probe %s\n", p->name); } kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; return 0; err: if (kt) { if (kt->lost_file) debugfs_remove(kt->lost_file); if (kt->rchan) relay_close(kt->rchan); kfree(kt); } return r; } static int kvm_trace_enable(char __user *arg) { struct kvm_user_trace_setup kuts; int ret; ret = copy_from_user(&kuts, arg, sizeof(kuts)); if (ret) return -EFAULT; ret = do_kvm_trace_enable(&kuts); if (ret) return ret; return 0; } static int kvm_trace_pause(void) { struct kvm_trace *kt = kvm_trace; int r = -EINVAL; if (kt == NULL) return r; if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { kt->trace_state = KVM_TRACE_STATE_PAUSE; relay_flush(kt->rchan); r = 0; } return r; } void kvm_trace_cleanup(void) { struct kvm_trace *kt = kvm_trace; int i; if (kt == NULL) return; if (kt->trace_state == KVM_TRACE_STATE_RUNNING || kt->trace_state == KVM_TRACE_STATE_PAUSE) { kt->trace_state = KVM_TRACE_STATE_CLEARUP; for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { struct kvm_trace_probe *p = &kvm_trace_probes[i]; marker_probe_unregister(p->name, p->probe_func, p); } marker_synchronize_unregister(); relay_close(kt->rchan); debugfs_remove(kt->lost_file); kfree(kt); } } int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) { void __user *argp = (void __user *)arg; long r = -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EPERM; switch (ioctl) { case KVM_TRACE_ENABLE: r = kvm_trace_enable(argp); break; case KVM_TRACE_PAUSE: r = kvm_trace_pause(); break; case KVM_TRACE_DISABLE: r = 0; kvm_trace_cleanup(); break; } return r; }