aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShailabh Nagar <nagar@watson.ibm.com>2006-07-14 00:24:41 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-14 21:53:56 -0700
commit6f44993fe1d7b2b097f6ac60cd5835c6f5ca0874 (patch)
tree0f349f4e6c28cc5d11b7083273543a294c437216
parentc757249af152c59fd74b85e52e8c090acb33d9c0 (diff)
[PATCH] per-task-delay-accounting: delay accounting usage of taskstats interface
Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <nagar@us.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/delayacct.h15
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/linux/taskstats.h55
-rw-r--r--include/linux/taskstats_kern.h1
-rw-r--r--init/Kconfig1
-rw-r--r--kernel/delayacct.c62
-rw-r--r--kernel/taskstats.c16
7 files changed, 144 insertions, 7 deletions
diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 0ecbf9aad8e..d955078a144 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,6 +18,7 @@
#define _LINUX_DELAYACCT_H
#include <linux/sched.h>
+#include <linux/taskstats_kern.h>
/*
* Per-task flags relevant to delay accounting
@@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *);
extern void __delayacct_tsk_exit(struct task_struct *);
extern void __delayacct_blkio_start(void);
extern void __delayacct_blkio_end(void);
+extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
static inline void delayacct_set_flag(int flag)
{
@@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void)
__delayacct_blkio_end();
}
+static inline int delayacct_add_tsk(struct taskstats *d,
+ struct task_struct *tsk)
+{
+ if (likely(!delayacct_on))
+ return -EINVAL;
+ if (!tsk->delays)
+ return 0;
+ return __delayacct_add_tsk(d, tsk);
+}
+
#else
static inline void delayacct_set_flag(int flag)
{}
@@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void)
{}
static inline void delayacct_blkio_end(void)
{}
+static inline int delayacct_add_tsk(struct taskstats *d,
+ struct task_struct *tsk)
+{ return 0; }
#endif /* CONFIG_TASK_DELAY_ACCT */
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f751062d89a..3c5610ca0c9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -990,6 +990,7 @@ struct task_struct {
*/
struct pipe_inode_info *splice_pipe;
#ifdef CONFIG_TASK_DELAY_ACCT
+ spinlock_t delays_lock;
struct task_delay_info *delays;
#endif
};
diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 51f62759bea..c6aeca32348 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -34,7 +34,60 @@
struct taskstats {
/* Version 1 */
- __u64 version;
+ __u16 version;
+ __u16 padding[3]; /* Userspace should not interpret the padding
+ * field which can be replaced by useful
+ * fields if struct taskstats is extended.
+ */
+
+ /* Delay accounting fields start
+ *
+ * All values, until comment "Delay accounting fields end" are
+ * available only if delay accounting is enabled, even though the last
+ * few fields are not delays
+ *
+ * xxx_count is the number of delay values recorded
+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+ *
+ * xxx_delay_total wraps around to zero on overflow
+ * xxx_count incremented regardless of overflow
+ */
+
+ /* Delay waiting for cpu, while runnable
+ * count, delay_total NOT updated atomically
+ */
+ __u64 cpu_count;
+ __u64 cpu_delay_total;
+
+ /* Following four fields atomically updated using task->delays->lock */
+
+ /* Delay waiting for synchronous block I/O to complete
+ * does not account for delays in I/O submission
+ */
+ __u64 blkio_count;
+ __u64 blkio_delay_total;
+
+ /* Delay waiting for page fault I/O (swap in only) */
+ __u64 swapin_count;
+ __u64 swapin_delay_total;
+
+ /* cpu "wall-clock" running time
+ * On some architectures, value will adjust for cpu time stolen
+ * from the kernel in involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_real_total;
+
+ /* cpu "virtual" running time
+ * Uses time intervals seen by the kernel i.e. no adjustment
+ * for kernel's involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_virtual_total;
+ /* Delay accounting fields end */
+ /* version 1 ends here */
};
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index bd0ecb969c2..fc9da2e2644 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -17,6 +17,7 @@ enum {
#ifdef CONFIG_TASKSTATS
extern kmem_cache_t *taskstats_cache;
+extern struct mutex taskstats_exit_mutex;
static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
struct taskstats **ptgidstats)
diff --git a/init/Kconfig b/init/Kconfig
index 56a7093b4e4..a099fc6526d 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -173,6 +173,7 @@ config TASKSTATS
config TASK_DELAY_ACCT
bool "Enable per-task delay accounting (EXPERIMENTAL)"
+ depends on TASKSTATS
help
Collect information on time spent by a task waiting for system
resources like cpu, synchronous block I/O completion and swapping
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index 3546b0800f9..1be274a462c 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -41,6 +41,10 @@ void delayacct_init(void)
void __delayacct_tsk_init(struct task_struct *tsk)
{
+ spin_lock_init(&tsk->delays_lock);
+ /* No need to acquire tsk->delays_lock for allocation here unless
+ __delayacct_tsk_init called after tsk is attached to tasklist
+ */
tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
if (tsk->delays)
spin_lock_init(&tsk->delays->lock);
@@ -48,8 +52,11 @@ void __delayacct_tsk_init(struct task_struct *tsk)
void __delayacct_tsk_exit(struct task_struct *tsk)
{
- kmem_cache_free(delayacct_cache, tsk->delays);
+ struct task_delay_info *delays = tsk->delays;
+ spin_lock(&tsk->delays_lock);
tsk->delays = NULL;
+ spin_unlock(&tsk->delays_lock);
+ kmem_cache_free(delayacct_cache, delays);
}
/*
@@ -104,3 +111,56 @@ void __delayacct_blkio_end(void)
&current->delays->blkio_delay,
&current->delays->blkio_count);
}
+
+int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
+{
+ s64 tmp;
+ struct timespec ts;
+ unsigned long t1,t2,t3;
+
+ spin_lock(&tsk->delays_lock);
+
+ /* Though tsk->delays accessed later, early exit avoids
+ * unnecessary returning of other data
+ */
+ if (!tsk->delays)
+ goto done;
+
+ tmp = (s64)d->cpu_run_real_total;
+ cputime_to_timespec(tsk->utime + tsk->stime, &ts);
+ tmp += timespec_to_ns(&ts);
+ d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
+
+ /*
+ * No locking available for sched_info (and too expensive to add one)
+ * Mitigate by taking snapshot of values
+ */
+ t1 = tsk->sched_info.pcnt;
+ t2 = tsk->sched_info.run_delay;
+ t3 = tsk->sched_info.cpu_time;
+
+ d->cpu_count += t1;
+
+ jiffies_to_timespec(t2, &ts);
+ tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
+ d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
+
+ tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
+ d->cpu_run_virtual_total =
+ (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
+
+ /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
+
+ spin_lock(&tsk->delays->lock);
+ tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
+ d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
+ tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
+ d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
+ d->blkio_count += tsk->delays->blkio_count;
+ d->swapin_count += tsk->delays->swapin_count;
+ spin_unlock(&tsk->delays->lock);
+
+done:
+ spin_unlock(&tsk->delays_lock);
+ return 0;
+}
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 82ec9137d90..ea9506de3b8 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -18,13 +18,13 @@
#include <linux/kernel.h>
#include <linux/taskstats_kern.h>
+#include <linux/delayacct.h>
#include <net/genetlink.h>
#include <asm/atomic.h>
static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
static int family_registered;
kmem_cache_t *taskstats_cache;
-static DEFINE_MUTEX(taskstats_exit_mutex);
static struct genl_family family = {
.id = GENL_ID_GENERATE,
@@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
* goto err;
*/
-err:
+ rc = delayacct_add_tsk(stats, tsk);
+ stats->version = TASKSTATS_VERSION;
+
+ /* Define err: label here if needed */
put_task_struct(tsk);
return rc;
@@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
* break;
*/
+ rc = delayacct_add_tsk(stats, tsk);
+ if (rc)
+ break;
+
} while_each_thread(first, tsk);
read_unlock(&tasklist_lock);
+ stats->version = TASKSTATS_VERSION;
+
/*
* Accounting subsytems can also add calls here if they don't
@@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
if (!family_registered || !tidstats)
return;
- mutex_lock(&taskstats_exit_mutex);
-
is_thread_group = !thread_group_empty(tsk);
rc = 0;
@@ -292,7 +299,6 @@ nla_put_failure:
err_skb:
nlmsg_free(rep_skb);
ret:
- mutex_unlock(&taskstats_exit_mutex);
return;
}