From 05169237b55058a3993fb4804d00b65dfa3e4a0c Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 4 Jun 2007 15:15:37 +1000
Subject: [POWERPC] spufs: Add support for SPU single stepping

This patch adds support for SPU single stepping. The single
step bit is set in the SPU when the current process is
being single-stepped via ptrace. The spu then stops and
returns with a specific flag set and the syscall exit code
will generate the SIGTRAP.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/run.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 57626600b1a..6625ed2a7fd 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -142,8 +142,12 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
 			runcntl = SPU_RUNCNTL_RUNNABLE;
 		ctx->ops->runcntl_write(ctx, runcntl);
 	} else {
+		unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL;
 		spu_start_tick(ctx);
 		ctx->ops->npc_write(ctx, *npc);
+		if (test_thread_flag(TIF_SINGLESTEP))
+			mode = SPU_PRIVCNTL_MODE_SINGLE_STEP;
+		out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode);
 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
 	}
 
@@ -334,7 +338,8 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 		ret = spu_process_events(ctx);
 
 	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
-				      SPU_STATUS_STOPPED_BY_HALT)));
+				      SPU_STATUS_STOPPED_BY_HALT |
+				       SPU_STATUS_SINGLE_STEP)));
 
 	ctx->ops->master_stop(ctx);
 	ret = spu_run_fini(ctx, npc, &status);
@@ -344,10 +349,15 @@ out2:
 	if ((ret == 0) ||
 	    ((ret == -ERESTARTSYS) &&
 	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
+	      (status & SPU_STATUS_SINGLE_STEP) ||
 	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
 	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
 		ret = status;
 
+	/* Note: we don't need to force_sig SIGTRAP on single-step
+	 * since we have TIF_SINGLESTEP set, thus the kernel will do
+	 * it upon return from the syscall anyawy
+	 */
 	if ((status & SPU_STATUS_STOPPED_BY_STOP)
 	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) {
 		force_sig(SIGTRAP, current);
-- 
cgit v1.2.3


From cbe709c1683dd54a2ec2981c9e8415cb3176f4e0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 4 Jun 2007 15:15:38 +1000
Subject: [POWERPC] spufs: Add a "capabilities" file to spu contexts

This adds a "capabilities" file to spu contexts consisting of a
list of linefeed separated capability names. The current exposed
capabilities are "sched" (the context is scheduleable) and
"step" (the context supports single stepping).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/file.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index b1e7e2f8a2e..f1cecaaad98 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -28,6 +28,7 @@
 #include <linux/pagemap.h>
 #include <linux/poll.h>
 #include <linux/ptrace.h>
+#include <linux/seq_file.h>
 
 #include <asm/io.h>
 #include <asm/semaphore.h>
@@ -39,6 +40,7 @@
 
 #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
 
+
 static int
 spufs_mem_open(struct inode *inode, struct file *file)
 {
@@ -1797,6 +1799,29 @@ static int spufs_info_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static int spufs_caps_show(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	if (!(ctx->flags & SPU_CREATE_NOSCHED))
+		seq_puts(s, "sched\n");
+	if (!(ctx->flags & SPU_CREATE_ISOLATE))
+		seq_puts(s, "step\n");
+	return 0;
+}
+
+static int spufs_caps_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_caps_fops = {
+	.open		= spufs_caps_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
 			char __user *buf, size_t len, loff_t *pos)
 {
@@ -2015,6 +2040,7 @@ static const struct file_operations spufs_proxydma_info_fops = {
 };
 
 struct tree_descr spufs_dir_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
 	{ "mem",  &spufs_mem_fops,  0666, },
 	{ "regs", &spufs_regs_fops,  0666, },
 	{ "mbox", &spufs_mbox_fops, 0444, },
@@ -2050,6 +2076,7 @@ struct tree_descr spufs_dir_contents[] = {
 };
 
 struct tree_descr spufs_dir_nosched_contents[] = {
+	{ "capabilities", &spufs_caps_fops, 0444, },
 	{ "mem",  &spufs_mem_fops,  0666, },
 	{ "mbox", &spufs_mbox_fops, 0444, },
 	{ "ibox", &spufs_ibox_fops, 0444, },
-- 
cgit v1.2.3


From 7a896dc5f4a369193256653535aa7e2b521c611d Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linux.vnet.ibm.com>
Date: Fri, 29 Jun 2007 10:57:49 +1000
Subject: [POWERPC] spufs: fix building spufs/spu_save_dump.h

Currently it fails with gcc from sdk 2.1 because of a spec change [1].
Maybe we should start using the definitions from spu_mfcio.h.

[1] http://gcc.gnu.org/ml/gcc-patches/2006-11/msg01598.html

Signed-off-by: Sebastian Siewior <bigeasy@linux.vnet.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/spu_save.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/spu_save.c b/arch/powerpc/platforms/cell/spufs/spu_save.c
index 196033b8a57..ae95cc1701e 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_save.c
+++ b/arch/powerpc/platforms/cell/spufs/spu_save.c
@@ -44,7 +44,7 @@ static inline void save_event_mask(void)
 	 *    Read the SPU_RdEventMsk channel and save to the LSCSA.
 	 */
 	offset = LSCSA_QW_OFFSET(event_mask);
-	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventStatMask);
+	regs_spill[offset].slot[0] = spu_readch(SPU_RdEventMask);
 }
 
 static inline void save_tag_mask(void)
-- 
cgit v1.2.3


From be7031773eded128675de6da778234a935c8d8ea Mon Sep 17 00:00:00 2001
From: Sebastian Siewior <bigeasy@linux.vnet.ibm.com>
Date: Fri, 29 Jun 2007 10:57:50 +1000
Subject: [POWERPC] spufs: Add bit definition

Add a bit define from book, and replace one hex number with a
symbol, for clarity.

Signed-off-by: Sebastian Siewior <bigeasy@linux.vnet.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/run.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 6625ed2a7fd..3ba30cea764 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -29,7 +29,8 @@ static inline int spu_stopped(struct spu_context *ctx, u32 * stat)
 	spu = ctx->spu;
 	pte_fault = spu->dsisr &
 	    (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED);
-	return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0;
+	return (!(*stat & SPU_STATUS_RUNNING) || pte_fault || spu->class_0_pending) ?
+		1 : 0;
 }
 
 static int spu_setup_isolated(struct spu_context *ctx)
-- 
cgit v1.2.3


From 379018022071489a7dffee74db2a267465dab561 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:57:51 +1000
Subject: [POWERPC] spusched: Switch from workqueues to kthread + timer tick

Get rid of the scheduler workqueues that complicated things a lot to
a dedicated spu scheduler thread that gets woken by a traditional
scheduler tick.  By default this scheduler tick runs a HZ * 10, aka
one spu scheduler tick for every 10 cpu ticks.

Currently the tick is not disabled when we have less context than
available spus, but I will implement this later.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/context.c |   2 +-
 arch/powerpc/platforms/cell/spufs/run.c     |   6 +-
 arch/powerpc/platforms/cell/spufs/sched.c   | 142 ++++++++++++++++------------
 arch/powerpc/platforms/cell/spufs/spufs.h   |  12 +--
 4 files changed, 86 insertions(+), 76 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 7c51cb54bca..f084667e4f5 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -56,7 +56,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 	ctx->rt_priority = current->rt_priority;
 	ctx->policy = current->policy;
 	ctx->prio = current->prio;
-	INIT_DELAYED_WORK(&ctx->sched_work, spu_sched_tick);
+	ctx->time_slice = SPU_DEF_TIMESLICE;
 	goto out;
 out_free:
 	kfree(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 3ba30cea764..89b02b6bfc5 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -144,7 +144,6 @@ static int spu_run_init(struct spu_context *ctx, u32 * npc)
 		ctx->ops->runcntl_write(ctx, runcntl);
 	} else {
 		unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL;
-		spu_start_tick(ctx);
 		ctx->ops->npc_write(ctx, *npc);
 		if (test_thread_flag(TIF_SINGLESTEP))
 			mode = SPU_PRIVCNTL_MODE_SINGLE_STEP;
@@ -160,7 +159,6 @@ static int spu_run_fini(struct spu_context *ctx, u32 * npc,
 {
 	int ret = 0;
 
-	spu_stop_tick(ctx);
 	*status = ctx->ops->status_read(ctx);
 	*npc = ctx->ops->npc_read(ctx);
 	spu_release(ctx);
@@ -330,10 +328,8 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 
 		if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) {
 			ret = spu_reacquire_runnable(ctx, npc, &status);
-			if (ret) {
-				spu_stop_tick(ctx);
+			if (ret)
 				goto out2;
-			}
 			continue;
 		}
 		ret = spu_process_events(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 3b831e07f1e..d673353b6d3 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -35,6 +35,7 @@
 #include <linux/numa.h>
 #include <linux/mutex.h>
 #include <linux/notifier.h>
+#include <linux/kthread.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -45,6 +46,8 @@
 
 #define SPU_TIMESLICE	(HZ)
 
+#define SPUSCHED_TICK	(HZ / 100)
+
 struct spu_prio_array {
 	DECLARE_BITMAP(bitmap, MAX_PRIO);
 	struct list_head runq[MAX_PRIO];
@@ -54,7 +57,8 @@ struct spu_prio_array {
 };
 
 static struct spu_prio_array *spu_prio;
-static struct workqueue_struct *spu_sched_wq;
+static struct task_struct *spusched_task;
+static struct timer_list spusched_timer;
 
 static inline int node_allowed(int node)
 {
@@ -68,31 +72,6 @@ static inline int node_allowed(int node)
 	return 1;
 }
 
-void spu_start_tick(struct spu_context *ctx)
-{
-	if (ctx->policy == SCHED_RR) {
-		/*
-		 * Make sure the exiting bit is cleared.
-		 */
-		clear_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
-		mb();
-		queue_delayed_work(spu_sched_wq, &ctx->sched_work, SPU_TIMESLICE);
-	}
-}
-
-void spu_stop_tick(struct spu_context *ctx)
-{
-	if (ctx->policy == SCHED_RR) {
-		/*
-		 * While the work can be rearming normally setting this flag
-		 * makes sure it does not rearm itself anymore.
-		 */
-		set_bit(SPU_SCHED_EXITING, &ctx->sched_flags);
-		mb();
-		cancel_delayed_work(&ctx->sched_work);
-	}
-}
-
 /**
  * spu_add_to_active_list - add spu to active list
  * @spu:	spu to add to the active list
@@ -104,6 +83,11 @@ static void spu_add_to_active_list(struct spu *spu)
 	mutex_unlock(&spu_prio->active_mutex[spu->node]);
 }
 
+static void __spu_remove_from_active_list(struct spu *spu)
+{
+	list_del_init(&spu->list);
+}
+
 /**
  * spu_remove_from_active_list - remove spu from active list
  * @spu:       spu to remove from the active list
@@ -113,7 +97,7 @@ static void spu_remove_from_active_list(struct spu *spu)
 	int node = spu->node;
 
 	mutex_lock(&spu_prio->active_mutex[node]);
-	list_del_init(&spu->list);
+	__spu_remove_from_active_list(spu);
 	mutex_unlock(&spu_prio->active_mutex[node]);
 }
 
@@ -161,7 +145,6 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 	spu->timestamp = jiffies;
 	spu_cpu_affinity_set(spu, raw_smp_processor_id());
 	spu_switch_notify(spu, ctx);
-	spu_add_to_active_list(spu);
 	ctx->state = SPU_STATE_RUNNABLE;
 }
 
@@ -175,7 +158,6 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
 		 spu->pid, spu->number, spu->node);
 
-	spu_remove_from_active_list(spu);
 	spu_switch_notify(spu, NULL);
 	spu_unmap_mappings(ctx);
 	spu_save(&ctx->csa, spu);
@@ -312,6 +294,7 @@ static struct spu *find_victim(struct spu_context *ctx)
 				victim = NULL;
 				goto restart;
 			}
+			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, victim);
 			mutex_unlock(&victim->state_mutex);
 			/*
@@ -354,6 +337,7 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
 			spu = find_victim(ctx);
 		if (spu) {
 			spu_bind_context(spu, ctx);
+			spu_add_to_active_list(spu);
 			return 0;
 		}
 
@@ -397,6 +381,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
 	if (spu) {
 		new = grab_runnable_context(max_prio);
 		if (new || force) {
+			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
 			spu_free(spu);
 			if (new)
@@ -437,51 +422,78 @@ void spu_yield(struct spu_context *ctx)
 	}
 }
 
-void spu_sched_tick(struct work_struct *work)
+static void spusched_tick(struct spu_context *ctx)
 {
-	struct spu_context *ctx =
-		container_of(work, struct spu_context, sched_work.work);
-	int preempted;
+	if (ctx->policy != SCHED_RR || --ctx->time_slice)
+		return;
 
 	/*
-	 * If this context is being stopped avoid rescheduling from the
-	 * scheduler tick because we would block on the state_mutex.
-	 * The caller will yield the spu later on anyway.
+	 * Unfortunately active_mutex ranks outside of state_mutex, so
+	 * we have to trylock here.  If we fail give the context another
+	 * tick and try again.
 	 */
-	if (test_bit(SPU_SCHED_EXITING, &ctx->sched_flags))
-		return;
-
-	mutex_lock(&ctx->state_mutex);
-	preempted = __spu_deactivate(ctx, 0, ctx->prio + 1);
-	mutex_unlock(&ctx->state_mutex);
+	if (mutex_trylock(&ctx->state_mutex)) {
+		struct spu_context *new = grab_runnable_context(ctx->prio + 1);
+		if (new) {
+ 			struct spu *spu = ctx->spu;
 
-	if (preempted) {
-		/*
-		 * We need to break out of the wait loop in spu_run manually
-		 * to ensure this context gets put on the runqueue again
-		 * ASAP.
-		 */
-		wake_up(&ctx->stop_wq);
+			__spu_remove_from_active_list(spu);
+			spu_unbind_context(spu, ctx);
+			spu_free(spu);
+			wake_up(&new->stop_wq);
+			/*
+			 * We need to break out of the wait loop in
+			 * spu_run manually to ensure this context
+			 * gets put on the runqueue again ASAP.
+			 */
+			wake_up(&ctx->stop_wq);
+		}
+		ctx->time_slice = SPU_DEF_TIMESLICE;
+		mutex_unlock(&ctx->state_mutex);
 	} else {
-		spu_start_tick(ctx);
+		ctx->time_slice++;
 	}
 }
 
+static void spusched_wake(unsigned long data)
+{
+	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	wake_up_process(spusched_task);
+}
+
+static int spusched_thread(void *unused)
+{
+	struct spu *spu, *next;
+	int node;
+
+	setup_timer(&spusched_timer, spusched_wake, 0);
+	__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+		for (node = 0; node < MAX_NUMNODES; node++) {
+			mutex_lock(&spu_prio->active_mutex[node]);
+			list_for_each_entry_safe(spu, next,
+						 &spu_prio->active_list[node],
+						 list)
+				spusched_tick(spu->ctx);
+			mutex_unlock(&spu_prio->active_mutex[node]);
+		}
+	}
+
+	del_timer_sync(&spusched_timer);
+	return 0;
+}
+
 int __init spu_sched_init(void)
 {
 	int i;
 
-	spu_sched_wq = create_singlethread_workqueue("spusched");
-	if (!spu_sched_wq)
-		return 1;
-
 	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
-	if (!spu_prio) {
-		printk(KERN_WARNING "%s: Unable to allocate priority queue.\n",
-		       __FUNCTION__);
-		       destroy_workqueue(spu_sched_wq);
-		return 1;
-	}
+	if (!spu_prio)
+		return -ENOMEM;
+
 	for (i = 0; i < MAX_PRIO; i++) {
 		INIT_LIST_HEAD(&spu_prio->runq[i]);
 		__clear_bit(i, spu_prio->bitmap);
@@ -492,7 +504,14 @@ int __init spu_sched_init(void)
 		INIT_LIST_HEAD(&spu_prio->active_list[i]);
 	}
 	spin_lock_init(&spu_prio->runq_lock);
+
+	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
+	if (IS_ERR(spusched_task)) {
+		kfree(spu_prio);
+		return PTR_ERR(spusched_task);
+	}
 	return 0;
+
 }
 
 void __exit spu_sched_exit(void)
@@ -500,6 +519,8 @@ void __exit spu_sched_exit(void)
 	struct spu *spu, *tmp;
 	int node;
 
+	kthread_stop(spusched_task);
+
 	for (node = 0; node < MAX_NUMNODES; node++) {
 		mutex_lock(&spu_prio->active_mutex[node]);
 		list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node],
@@ -510,5 +531,4 @@ void __exit spu_sched_exit(void)
 		mutex_unlock(&spu_prio->active_mutex[node]);
 	}
 	kfree(spu_prio);
-	destroy_workqueue(spu_sched_wq);
 }
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 47617e8014a..8068171dfa9 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -31,6 +31,8 @@
 #include <asm/spu_csa.h>
 #include <asm/spu_info.h>
 
+#define SPU_DEF_TIMESLICE	100
+
 /* The magic number for our file system */
 enum {
 	SPUFS_MAGIC = 0x23c9b64e,
@@ -39,11 +41,6 @@ enum {
 struct spu_context_ops;
 struct spu_gang;
 
-/* ctx->sched_flags */
-enum {
-	SPU_SCHED_EXITING = 0,
-};
-
 struct spu_context {
 	struct spu *spu;		  /* pointer to a physical SPU */
 	struct spu_state csa;		  /* SPU context save area. */
@@ -83,7 +80,7 @@ struct spu_context {
 
 	/* scheduler fields */
  	struct list_head rq;
-	struct delayed_work sched_work;
+	unsigned int time_slice;
 	unsigned long sched_flags;
 	unsigned long rt_priority;
 	int policy;
@@ -200,9 +197,6 @@ void spu_acquire_saved(struct spu_context *ctx);
 int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
-void spu_start_tick(struct spu_context *ctx);
-void spu_stop_tick(struct spu_context *ctx);
-void spu_sched_tick(struct work_struct *work);
 int __init spu_sched_init(void);
 void __exit spu_sched_exit(void);
 
-- 
cgit v1.2.3


From fe443ef2ac421c9c652e251e8733e2479d8e411a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:57:52 +1000
Subject: [POWERPC] spusched: Dynamic timeslicing for SCHED_OTHER

Enable preemptive scheduling for non-RT contexts.

We use the same algorithms as the CPU scheduler to calculate the time
slice length, and for now we also use the same timeslice length as the
CPU scheduler. This might be not enough for good performance and can be
changed after some benchmarking.

Note that currently we do not boost the priority for contexts waiting
on the runqueue for a long time, so contexts with a higher nice value
could starve ones with less priority.  This could easily be fixed once
the rework of the spu lists that Luke and I discussed is done.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/context.c | 15 ++++++--
 arch/powerpc/platforms/cell/spufs/sched.c   | 54 ++++++++++++++++++++++++-----
 arch/powerpc/platforms/cell/spufs/spufs.h   |  4 +--
 3 files changed, 58 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index f084667e4f5..c5ec7cfc24b 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -53,10 +53,19 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 	INIT_LIST_HEAD(&ctx->rq);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
-	ctx->rt_priority = current->rt_priority;
+
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately thies field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
 	ctx->policy = current->policy;
-	ctx->prio = current->prio;
-	ctx->time_slice = SPU_DEF_TIMESLICE;
+	spu_set_timeslice(ctx);
 	goto out;
 out_free:
 	kfree(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index d673353b6d3..1b2916bdc1c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -44,10 +44,6 @@
 #include <asm/spu_priv1.h>
 #include "spufs.h"
 
-#define SPU_TIMESLICE	(HZ)
-
-#define SPUSCHED_TICK	(HZ / 100)
-
 struct spu_prio_array {
 	DECLARE_BITMAP(bitmap, MAX_PRIO);
 	struct list_head runq[MAX_PRIO];
@@ -60,6 +56,46 @@ static struct spu_prio_array *spu_prio;
 static struct task_struct *spusched_task;
 static struct timer_list spusched_timer;
 
+/*
+ * Priority of a normal, non-rt, non-niced'd process (aka nice level 0).
+ */
+#define NORMAL_PRIO		120
+
+/*
+ * Frequency of the spu scheduler tick.  By default we do one SPU scheduler
+ * tick for every 10 CPU scheduler ticks.
+ */
+#define SPUSCHED_TICK		(10)
+
+/*
+ * These are the 'tuning knobs' of the scheduler:
+ *
+ * Minimum timeslice is 5 msecs (or 10 jiffies, whichever is larger),
+ * default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ */
+#define MIN_SPU_TIMESLICE	max(5 * HZ / 100, 10)
+#define DEF_SPU_TIMESLICE	(100 * HZ / 100)
+
+#define MAX_USER_PRIO		(MAX_PRIO - MAX_RT_PRIO)
+#define SCALE_PRIO(x, prio) \
+	max(x * (MAX_PRIO - prio) / (MAX_USER_PRIO / 2), MIN_SPU_TIMESLICE)
+
+/*
+ * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values:
+ * [800ms ... 100ms ... 5ms]
+ *
+ * The higher a thread's priority, the bigger timeslices
+ * it gets during one round of execution. But even the lowest
+ * priority thread gets MIN_TIMESLICE worth of execution time.
+ */
+void spu_set_timeslice(struct spu_context *ctx)
+{
+	if (ctx->prio < NORMAL_PRIO)
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio);
+	else
+		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
+}
+
 static inline int node_allowed(int node)
 {
 	cpumask_t mask;
@@ -265,8 +301,8 @@ static struct spu *find_victim(struct spu_context *ctx)
 		list_for_each_entry(spu, &spu_prio->active_list[node], list) {
 			struct spu_context *tmp = spu->ctx;
 
-			if (tmp->rt_priority < ctx->rt_priority &&
-			    (!victim || tmp->rt_priority < victim->rt_priority))
+			if (tmp->prio > ctx->prio &&
+			    (!victim || tmp->prio > victim->prio))
 				victim = spu->ctx;
 		}
 		mutex_unlock(&spu_prio->active_mutex[node]);
@@ -333,7 +369,7 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
 		 * If this is a realtime thread we try to get it running by
 		 * preempting a lower priority thread.
 		 */
-		if (!spu && ctx->rt_priority)
+		if (!spu && rt_prio(ctx->prio))
 			spu = find_victim(ctx);
 		if (spu) {
 			spu_bind_context(spu, ctx);
@@ -424,7 +460,7 @@ void spu_yield(struct spu_context *ctx)
 
 static void spusched_tick(struct spu_context *ctx)
 {
-	if (ctx->policy != SCHED_RR || --ctx->time_slice)
+	if (ctx->policy == SCHED_FIFO || --ctx->time_slice)
 		return;
 
 	/*
@@ -448,7 +484,7 @@ static void spusched_tick(struct spu_context *ctx)
 			 */
 			wake_up(&ctx->stop_wq);
 		}
-		ctx->time_slice = SPU_DEF_TIMESLICE;
+		spu_set_timeslice(ctx);
 		mutex_unlock(&ctx->state_mutex);
 	} else {
 		ctx->time_slice++;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 8068171dfa9..fddc59c204b 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -31,8 +31,6 @@
 #include <asm/spu_csa.h>
 #include <asm/spu_info.h>
 
-#define SPU_DEF_TIMESLICE	100
-
 /* The magic number for our file system */
 enum {
 	SPUFS_MAGIC = 0x23c9b64e,
@@ -82,7 +80,6 @@ struct spu_context {
  	struct list_head rq;
 	unsigned int time_slice;
 	unsigned long sched_flags;
-	unsigned long rt_priority;
 	int policy;
 	int prio;
 };
@@ -197,6 +194,7 @@ void spu_acquire_saved(struct spu_context *ctx);
 int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
+void spu_set_timeslice(struct spu_context *ctx);
 int __init spu_sched_init(void);
 void __exit spu_sched_exit(void);
 
-- 
cgit v1.2.3


From 60e242393346c1a9a64e7b14dfb7f613a737324f Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Fri, 29 Jun 2007 10:57:53 +1000
Subject: [POWERPC] spusched: Fix timeslice calculations

The current timeslice code mixes 'jiffies' up with 'spesched ticks'. This
change correctly defines the number of time slices each SPE contexts is
given, and clarifies the comment.

This brings the default timeslice for SPE contexts into a reasonable
range.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 1b2916bdc1c..6843a01b1a2 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -70,11 +70,11 @@ static struct timer_list spusched_timer;
 /*
  * These are the 'tuning knobs' of the scheduler:
  *
- * Minimum timeslice is 5 msecs (or 10 jiffies, whichever is larger),
- * default timeslice is 100 msecs, maximum timeslice is 800 msecs.
+ * Minimum timeslice is 5 msecs (or 1 spu scheduler tick, whichever is
+ * larger), default timeslice is 100 msecs, maximum timeslice is 800 msecs.
  */
-#define MIN_SPU_TIMESLICE	max(5 * HZ / 100, 10)
-#define DEF_SPU_TIMESLICE	(100 * HZ / 100)
+#define MIN_SPU_TIMESLICE	max(5 * HZ / (1000 * SPUSCHED_TICK), 1)
+#define DEF_SPU_TIMESLICE	(100 * HZ / (1000 * SPUSCHED_TICK))
 
 #define MAX_USER_PRIO		(MAX_PRIO - MAX_RT_PRIO)
 #define SCALE_PRIO(x, prio) \
-- 
cgit v1.2.3


From f3f59bec0c7ad083e9c95a550bcb1e9ca27e25f4 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Fri, 29 Jun 2007 10:57:54 +1000
Subject: [POWERPC] spusched: Print out scheduling tunables with DEBUG

Print out a few scheduler tuning parameters when we've compiled
with DEBUG defined.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 6843a01b1a2..002b40af4a7 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -546,6 +546,9 @@ int __init spu_sched_init(void)
 		kfree(spu_prio);
 		return PTR_ERR(spusched_task);
 	}
+
+	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
+			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
 	return 0;
 
 }
-- 
cgit v1.2.3


From 2cf2b3b49f10d2f4a0703070fc54ce1cd84a6cda Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:57:55 +1000
Subject: [POWERPC] spusched: Update scheduling paramters on every spu_run

Update scheduling information on every spu_run to allow for setting
threads to realtime priority just before running them.  This requires
some slightly ugly code in spufs_run_spu because we can just update
the information unlocked if the spu is not runnable, but we need to
acquire the active_mutex when it is runnable to protect against
find_victim.  This locking scheme requires opencoding
spu_acquire_runnable in spufs_run_spu which actually is a nice cleanup
all by itself.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/context.c | 11 -----------
 arch/powerpc/platforms/cell/spufs/run.c     | 19 ++++++++++++++++---
 arch/powerpc/platforms/cell/spufs/sched.c   | 27 +++++++++++++++++++++++++++
 arch/powerpc/platforms/cell/spufs/spufs.h   |  2 ++
 4 files changed, 45 insertions(+), 14 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index c5ec7cfc24b..c778d9178e0 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -54,17 +54,6 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
 
-	/*
-	 * We do our own priority calculations, so we normally want
-	 * ->static_prio to start with. Unfortunately thies field
-	 * contains junk for threads with a realtime scheduling
-	 * policy so we have to look at ->prio in this case.
-	 */
-	if (rt_prio(current->prio))
-		ctx->prio = current->prio;
-	else
-		ctx->prio = current->static_prio;
-	ctx->policy = current->policy;
 	spu_set_timeslice(ctx);
 	goto out;
 out_free:
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 89b02b6bfc5..4e0db6ae0d5 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -301,9 +301,22 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 	ctx->ops->master_start(ctx);
 	ctx->event_return = 0;
 
-	ret = spu_acquire_runnable(ctx, 0);
-	if (ret)
-		return ret;
+	spu_acquire(ctx);
+	if (ctx->state == SPU_STATE_SAVED) {
+		__spu_update_sched_info(ctx);
+
+		ret = spu_activate(ctx, 0);
+		if (ret) {
+			spu_release(ctx);
+			goto out;
+		}
+	} else {
+		/*
+		 * We have to update the scheduling priority under active_mutex
+		 * to protect against find_victim().
+		 */
+		spu_update_sched_info(ctx);
+	}
 
 	ret = spu_run_init(ctx, npc);
 	if (ret) {
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 002b40af4a7..3707c7fdbde 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -96,6 +96,33 @@ void spu_set_timeslice(struct spu_context *ctx)
 		ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
 }
 
+/*
+ * Update scheduling information from the owning thread.
+ */
+void __spu_update_sched_info(struct spu_context *ctx)
+{
+	/*
+	 * We do our own priority calculations, so we normally want
+	 * ->static_prio to start with. Unfortunately thies field
+	 * contains junk for threads with a realtime scheduling
+	 * policy so we have to look at ->prio in this case.
+	 */
+	if (rt_prio(current->prio))
+		ctx->prio = current->prio;
+	else
+		ctx->prio = current->static_prio;
+	ctx->policy = current->policy;
+}
+
+void spu_update_sched_info(struct spu_context *ctx)
+{
+	int node = ctx->spu->node;
+
+	mutex_lock(&spu_prio->active_mutex[node]);
+	__spu_update_sched_info(ctx);
+	mutex_unlock(&spu_prio->active_mutex[node]);
+}
+
 static inline int node_allowed(int node)
 {
 	cpumask_t mask;
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index fddc59c204b..ff77f904fa3 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -195,6 +195,8 @@ int spu_activate(struct spu_context *ctx, unsigned long flags);
 void spu_deactivate(struct spu_context *ctx);
 void spu_yield(struct spu_context *ctx);
 void spu_set_timeslice(struct spu_context *ctx);
+void spu_update_sched_info(struct spu_context *ctx);
+void __spu_update_sched_info(struct spu_context *ctx);
 int __init spu_sched_init(void);
 void __exit spu_sched_exit(void);
 
-- 
cgit v1.2.3


From ea1ae5949d7fcd2e622226ba71741a0f43b6ef0a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:57:56 +1000
Subject: [POWERPC] spusched: fix cpu/node binding

Add a cpus_allowed allowed filed to struct spu_context so that we always
use the cpu mask of the owning thread instead of the one happening to
call into the scheduler.  Also use this information in
grab_runnable_context to avoid spurious wakeups.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/context.c |  2 +-
 arch/powerpc/platforms/cell/spufs/sched.c   | 70 ++++++++++++++++++++---------
 arch/powerpc/platforms/cell/spufs/spufs.h   |  2 +
 3 files changed, 52 insertions(+), 22 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index c778d9178e0..6ff2a75589f 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -53,7 +53,7 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 	INIT_LIST_HEAD(&ctx->rq);
 	if (gang)
 		spu_gang_add_ctx(gang, ctx);
-
+	ctx->cpus_allowed = current->cpus_allowed;
 	spu_set_timeslice(ctx);
 	goto out;
 out_free:
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 3707c7fdbde..69272620a6b 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -112,6 +112,16 @@ void __spu_update_sched_info(struct spu_context *ctx)
 	else
 		ctx->prio = current->static_prio;
 	ctx->policy = current->policy;
+
+	/*
+	 * A lot of places that don't hold active_mutex poke into
+	 * cpus_allowed, including grab_runnable_context which
+	 * already holds the runq_lock.  So abuse runq_lock
+	 * to protect this field aswell.
+	 */
+	spin_lock(&spu_prio->runq_lock);
+	ctx->cpus_allowed = current->cpus_allowed;
+	spin_unlock(&spu_prio->runq_lock);
 }
 
 void spu_update_sched_info(struct spu_context *ctx)
@@ -123,16 +133,27 @@ void spu_update_sched_info(struct spu_context *ctx)
 	mutex_unlock(&spu_prio->active_mutex[node]);
 }
 
-static inline int node_allowed(int node)
+static int __node_allowed(struct spu_context *ctx, int node)
 {
-	cpumask_t mask;
+	if (nr_cpus_node(node)) {
+		cpumask_t mask = node_to_cpumask(node);
 
-	if (!nr_cpus_node(node))
-		return 0;
-	mask = node_to_cpumask(node);
-	if (!cpus_intersects(mask, current->cpus_allowed))
-		return 0;
-	return 1;
+		if (cpus_intersects(mask, ctx->cpus_allowed))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int node_allowed(struct spu_context *ctx, int node)
+{
+	int rval;
+
+	spin_lock(&spu_prio->runq_lock);
+	rval = __node_allowed(ctx, node);
+	spin_unlock(&spu_prio->runq_lock);
+
+	return rval;
 }
 
 /**
@@ -289,7 +310,7 @@ static struct spu *spu_get_idle(struct spu_context *ctx)
 
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
-		if (!node_allowed(node))
+		if (!node_allowed(ctx, node))
 			continue;
 		spu = spu_alloc_node(node);
 		if (spu)
@@ -321,7 +342,7 @@ static struct spu *find_victim(struct spu_context *ctx)
 	node = cpu_to_node(raw_smp_processor_id());
 	for (n = 0; n < MAX_NUMNODES; n++, node++) {
 		node = (node < MAX_NUMNODES) ? node : 0;
-		if (!node_allowed(node))
+		if (!node_allowed(ctx, node))
 			continue;
 
 		mutex_lock(&spu_prio->active_mutex[node]);
@@ -416,23 +437,28 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
  * Remove the highest priority context on the runqueue and return it
  * to the caller.  Returns %NULL if no runnable context was found.
  */
-static struct spu_context *grab_runnable_context(int prio)
+static struct spu_context *grab_runnable_context(int prio, int node)
 {
-	struct spu_context *ctx = NULL;
+	struct spu_context *ctx;
 	int best;
 
 	spin_lock(&spu_prio->runq_lock);
 	best = sched_find_first_bit(spu_prio->bitmap);
-	if (best < prio) {
+	while (best < prio) {
 		struct list_head *rq = &spu_prio->runq[best];
 
-		BUG_ON(list_empty(rq));
-
-		ctx = list_entry(rq->next, struct spu_context, rq);
-		__spu_del_from_rq(ctx);
+		list_for_each_entry(ctx, rq, rq) {
+			/* XXX(hch): check for affinity here aswell */
+			if (__node_allowed(ctx, node)) {
+				__spu_del_from_rq(ctx);
+				goto found;
+			}
+		}
+		best++;
 	}
+	ctx = NULL;
+ found:
 	spin_unlock(&spu_prio->runq_lock);
-
 	return ctx;
 }
 
@@ -442,7 +468,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
 	struct spu_context *new = NULL;
 
 	if (spu) {
-		new = grab_runnable_context(max_prio);
+		new = grab_runnable_context(max_prio, spu->node);
 		if (new || force) {
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
@@ -496,9 +522,11 @@ static void spusched_tick(struct spu_context *ctx)
 	 * tick and try again.
 	 */
 	if (mutex_trylock(&ctx->state_mutex)) {
-		struct spu_context *new = grab_runnable_context(ctx->prio + 1);
+ 		struct spu *spu = ctx->spu;
+		struct spu_context *new;
+
+		new = grab_runnable_context(ctx->prio + 1, spu->node);
 		if (new) {
- 			struct spu *spu = ctx->spu;
 
 			__spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index ff77f904fa3..98d3c18b2b6 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -26,6 +26,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
+#include <linux/cpumask.h>
 
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
@@ -80,6 +81,7 @@ struct spu_context {
  	struct list_head rq;
 	unsigned int time_slice;
 	unsigned long sched_flags;
+	cpumask_t cpus_allowed;
 	int policy;
 	int prio;
 };
-- 
cgit v1.2.3


From 46cbf93960e64f313f6e247cbca7afaa50e3ee2c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:57:57 +1000
Subject: [POWERPC] spusched: Catch nosched contexts in spu_deactivate

spu_deactivate should never be called for nosched contets.  Put in
a check so we can print a stacktrace and exit early in case it
happes erroneously.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 69272620a6b..a20e4e28858 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -491,6 +491,15 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
  */
 void spu_deactivate(struct spu_context *ctx)
 {
+	/*
+	 * We must never reach this for a nosched context,
+	 * but handle the case gracefull instead of panicing.
+	 */
+	if (ctx->flags & SPU_CREATE_NOSCHED) {
+		WARN_ON(1);
+		return;
+	}
+
 	__spu_deactivate(ctx, 1, MAX_PRIO);
 }
 
-- 
cgit v1.2.3


From df09cf3e2cd597d373f3a6046df0e0a50881ea44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:57:58 +1000
Subject: [POWERPC] spusched: No preemption for nosched contexts

And last but not least we need to make sure the scheduler tick never
preempts a nosched context.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index a20e4e28858..7bb5229b1e3 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -522,7 +522,12 @@ void spu_yield(struct spu_context *ctx)
 
 static void spusched_tick(struct spu_context *ctx)
 {
-	if (ctx->policy == SCHED_FIFO || --ctx->time_slice)
+	if (ctx->flags & SPU_CREATE_NOSCHED)
+		return;
+	if (ctx->policy == SCHED_FIFO)
+		return;
+
+	if (--ctx->time_slice)
 		return;
 
 	/*
-- 
cgit v1.2.3


From b8c295f90854d682018d74599efc258628be32e3 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Fri, 29 Jun 2007 10:57:59 +1000
Subject: [POWERPC] spufs: Remove spufs_dir_inode_operations

spufs_dir_inode_operations is exactly the same as
simple_dir_inode_operations.  Use that instead.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/inode.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 9807206e021..f37460e5bfd 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -232,10 +232,6 @@ static int spufs_dir_close(struct inode *inode, struct file *file)
 	return dcache_dir_close(inode, file);
 }
 
-const struct inode_operations spufs_dir_inode_operations = {
-	.lookup = simple_lookup,
-};
-
 const struct file_operations spufs_context_fops = {
 	.open		= dcache_dir_open,
 	.release	= spufs_dir_close,
@@ -269,7 +265,7 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags,
 		goto out_iput;
 
 	ctx->flags = flags;
-	inode->i_op = &spufs_dir_inode_operations;
+	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	if (flags & SPU_CREATE_NOSCHED)
 		ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents,
@@ -386,7 +382,7 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, int mode)
 	if (!gang)
 		goto out_iput;
 
-	inode->i_op = &spufs_dir_inode_operations;
+	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 
 	d_instantiate(dentry, inode);
@@ -593,7 +589,7 @@ spufs_create_root(struct super_block *sb, void *data)
 	if (!inode)
 		goto out;
 
-	inode->i_op = &spufs_dir_inode_operations;
+	inode->i_op = &simple_dir_inode_operations;
 	inode->i_fop = &simple_dir_operations;
 	SPUFS_I(inode)->i_ctx = NULL;
 
-- 
cgit v1.2.3


From 7022543ee404880aab5c641e4983e237815edc35 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Fri, 29 Jun 2007 10:58:00 +1000
Subject: [POWERPC] spufs: Trivial whitespace fixes

Remove redundant whitespace in arch/powerpc/platforms/cell/spufs/

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/file.c        | 12 ++++++------
 arch/powerpc/platforms/cell/spufs/sched.c       |  2 +-
 arch/powerpc/platforms/cell/spufs/spu_restore.c |  2 +-
 arch/powerpc/platforms/cell/spufs/spufs.h       |  4 ++--
 arch/powerpc/platforms/cell/spufs/switch.c      |  4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index f1cecaaad98..2e84ed5ae67 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -218,12 +218,12 @@ unsigned long spufs_get_unmapped_area(struct file *file, unsigned long addr,
 #endif /* CONFIG_SPU_FS_64K_LS */
 
 static const struct file_operations spufs_mem_fops = {
-	.open	 		= spufs_mem_open,
-	.release 		= spufs_mem_release,
-	.read   		= spufs_mem_read,
-	.write   		= spufs_mem_write,
-	.llseek  		= generic_file_llseek,
-	.mmap    		= spufs_mem_mmap,
+	.open			= spufs_mem_open,
+	.release		= spufs_mem_release,
+	.read			= spufs_mem_read,
+	.write			= spufs_mem_write,
+	.llseek			= generic_file_llseek,
+	.mmap			= spufs_mem_mmap,
 #ifdef CONFIG_SPU_FS_64K_LS
 	.get_unmapped_area	= spufs_get_unmapped_area,
 #endif
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 7bb5229b1e3..4381dd00d23 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -536,7 +536,7 @@ static void spusched_tick(struct spu_context *ctx)
 	 * tick and try again.
 	 */
 	if (mutex_trylock(&ctx->state_mutex)) {
- 		struct spu *spu = ctx->spu;
+		struct spu *spu = ctx->spu;
 		struct spu_context *new;
 
 		new = grab_runnable_context(ctx->prio + 1, spu->node);
diff --git a/arch/powerpc/platforms/cell/spufs/spu_restore.c b/arch/powerpc/platforms/cell/spufs/spu_restore.c
index 0bf723dcd67..4e19ed7a075 100644
--- a/arch/powerpc/platforms/cell/spufs/spu_restore.c
+++ b/arch/powerpc/platforms/cell/spufs/spu_restore.c
@@ -296,7 +296,7 @@ static inline void restore_complete(void)
  * This code deviates from the documented sequence in the
  * following aspects:
  *
- * 	1. The EA for LSCSA is passed from PPE in the
+ *	1. The EA for LSCSA is passed from PPE in the
  *	   signal notification channels.
  *	2. The register spill area is pulled by SPU
  *	   into LS, rather than pushed by PPE.
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 98d3c18b2b6..dab8858b699 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -78,7 +78,7 @@ struct spu_context {
 	struct spu_gang *gang;
 
 	/* scheduler fields */
- 	struct list_head rq;
+	struct list_head rq;
 	unsigned int time_slice;
 	unsigned long sched_flags;
 	cpumask_t cpus_allowed;
@@ -206,7 +206,7 @@ extern char *isolated_loader;
 
 /*
  * spufs_wait
- * 	Same as wait_event_interruptible(), except that here
+ *	Same as wait_event_interruptible(), except that here
  *	we need to call spu_release(ctx) before sleeping, and
  *	then spu_acquire(ctx) when awoken.
  */
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 71a0b41adb8..881485847ac 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -70,7 +70,7 @@
   }
 #endif				/* debug */
 
-#define POLL_WHILE_FALSE(_c) 	POLL_WHILE_TRUE(!(_c))
+#define POLL_WHILE_FALSE(_c)	POLL_WHILE_TRUE(!(_c))
 
 static inline void acquire_spu_lock(struct spu *spu)
 {
@@ -1930,7 +1930,7 @@ static void harvest(struct spu_state *prev, struct spu *spu)
 	reset_spu_privcntl(prev, spu);	        /* Step 16. */
 	reset_spu_lslr(prev, spu);              /* Step 17. */
 	setup_mfc_sr1(prev, spu);	        /* Step 18. */
-	spu_invalidate_slbs(spu);        	/* Step 19. */
+	spu_invalidate_slbs(spu);		/* Step 19. */
 	reset_ch_part1(prev, spu);	        /* Step 20. */
 	reset_ch_part2(prev, spu);	        /* Step 21. */
 	enable_interrupts(prev, spu);	        /* Step 22. */
-- 
cgit v1.2.3


From 476273adc7277333aed9963bc4dc9b39066d3038 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:58:01 +1000
Subject: [POWERPC] spufs: Add tid file

The new tid file contains the ID of the thread currently running the
context, if any.  This is used so that the new spu-top and spu-ps
tools can find the thread in /proc.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/file.c  | 22 ++++++++++++++++++++++
 arch/powerpc/platforms/cell/spufs/sched.c |  7 +++++++
 arch/powerpc/platforms/cell/spufs/spufs.h |  3 +++
 3 files changed, 32 insertions(+)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 2e84ed5ae67..2bb51ca51a6 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2039,6 +2039,26 @@ static const struct file_operations spufs_proxydma_info_fops = {
 	.read = spufs_proxydma_info_read,
 };
 
+static int spufs_show_tid(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	seq_printf(s, "%d\n", ctx->tid);
+	return 0;
+}
+
+static int spufs_tid_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_tid, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_tid_fops = {
+	.open		= spufs_tid_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 struct tree_descr spufs_dir_contents[] = {
 	{ "capabilities", &spufs_caps_fops, 0444, },
 	{ "mem",  &spufs_mem_fops,  0666, },
@@ -2072,6 +2092,7 @@ struct tree_descr spufs_dir_contents[] = {
 	{ "wbox_info", &spufs_wbox_info_fops, 0444, },
 	{ "dma_info", &spufs_dma_info_fops, 0444, },
 	{ "proxydma_info", &spufs_proxydma_info_fops, 0444, },
+	{ "tid", &spufs_tid_fops, 0444, },
 	{},
 };
 
@@ -2095,6 +2116,7 @@ struct tree_descr spufs_dir_nosched_contents[] = {
 	{ "psmap", &spufs_psmap_fops, 0666, },
 	{ "phys-id", &spufs_id_ops, 0666, },
 	{ "object-id", &spufs_object_id_ops, 0666, },
+	{ "tid", &spufs_tid_fops, 0444, },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 4381dd00d23..540067550e8 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -101,6 +101,13 @@ void spu_set_timeslice(struct spu_context *ctx)
  */
 void __spu_update_sched_info(struct spu_context *ctx)
 {
+	/*
+	 * 32-Bit assignment are atomic on powerpc, and we don't care about
+	 * memory ordering here because retriving the controlling thread is
+	 * per defintion racy.
+	 */
+	ctx->tid = current->pid;
+
 	/*
 	 * We do our own priority calculations, so we normally want
 	 * ->static_prio to start with. Unfortunately thies field
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index dab8858b699..8ff16b4b6bd 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -77,6 +77,9 @@ struct spu_context {
 	struct list_head gang_list;
 	struct spu_gang *gang;
 
+	/* owner thread */
+	pid_t tid;
+
 	/* scheduler fields */
 	struct list_head rq;
 	unsigned int time_slice;
-- 
cgit v1.2.3


From 65de66f0b8bcb7431d9df82cf32b002062b3a611 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:58:02 +1000
Subject: [POWERPC] spufs: Implement /proc/spu_loadavg

Provide load average information for spu context.  The format
is identical to /proc/loadavg, which is also where a lot of code
and concepts is borrowed from.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/context.c |   7 ++
 arch/powerpc/platforms/cell/spufs/sched.c   | 127 ++++++++++++++++++++++++++--
 arch/powerpc/platforms/cell/spufs/spufs.h   |   1 +
 3 files changed, 127 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 6ff2a75589f..f623d963fdc 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -23,10 +23,14 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <asm/atomic.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
 #include "spufs.h"
 
+
+atomic_t nr_spu_contexts = ATOMIC_INIT(0);
+
 struct spu_context *alloc_spu_context(struct spu_gang *gang)
 {
 	struct spu_context *ctx;
@@ -55,6 +59,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 		spu_gang_add_ctx(gang, ctx);
 	ctx->cpus_allowed = current->cpus_allowed;
 	spu_set_timeslice(ctx);
+
+	atomic_inc(&nr_spu_contexts);
 	goto out;
 out_free:
 	kfree(ctx);
@@ -74,6 +80,7 @@ void destroy_spu_context(struct kref *kref)
 	if (ctx->gang)
 		spu_gang_remove_ctx(ctx->gang, ctx);
 	BUG_ON(!list_empty(&ctx->rq));
+	atomic_dec(&nr_spu_contexts);
 	kfree(ctx);
 }
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 540067550e8..9fc09306c9a 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -36,6 +36,9 @@
 #include <linux/mutex.h>
 #include <linux/notifier.h>
 #include <linux/kthread.h>
+#include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
@@ -50,8 +53,11 @@ struct spu_prio_array {
 	spinlock_t runq_lock;
 	struct list_head active_list[MAX_NUMNODES];
 	struct mutex active_mutex[MAX_NUMNODES];
+	int nr_active[MAX_NUMNODES];
+	int nr_waiting;
 };
 
+static unsigned long spu_avenrun[3];
 static struct spu_prio_array *spu_prio;
 static struct task_struct *spusched_task;
 static struct timer_list spusched_timer;
@@ -169,14 +175,18 @@ static int node_allowed(struct spu_context *ctx, int node)
  */
 static void spu_add_to_active_list(struct spu *spu)
 {
-	mutex_lock(&spu_prio->active_mutex[spu->node]);
-	list_add_tail(&spu->list, &spu_prio->active_list[spu->node]);
-	mutex_unlock(&spu_prio->active_mutex[spu->node]);
+	int node = spu->node;
+
+	mutex_lock(&spu_prio->active_mutex[node]);
+	spu_prio->nr_active[node]++;
+	list_add_tail(&spu->list, &spu_prio->active_list[node]);
+	mutex_unlock(&spu_prio->active_mutex[node]);
 }
 
 static void __spu_remove_from_active_list(struct spu *spu)
 {
 	list_del_init(&spu->list);
+	spu_prio->nr_active[spu->node]--;
 }
 
 /**
@@ -275,6 +285,7 @@ static void __spu_add_to_rq(struct spu_context *ctx)
 {
 	int prio = ctx->prio;
 
+	spu_prio->nr_waiting++;
 	list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
 	set_bit(prio, spu_prio->bitmap);
 }
@@ -283,8 +294,10 @@ static void __spu_del_from_rq(struct spu_context *ctx)
 {
 	int prio = ctx->prio;
 
-	if (!list_empty(&ctx->rq))
+	if (!list_empty(&ctx->rq)) {
 		list_del_init(&ctx->rq);
+		spu_prio->nr_waiting--;
+	}
 	if (list_empty(&spu_prio->runq[prio]))
 		clear_bit(prio, spu_prio->bitmap);
 }
@@ -567,10 +580,56 @@ static void spusched_tick(struct spu_context *ctx)
 	}
 }
 
+/**
+ * count_active_contexts - count nr of active tasks
+ *
+ * Return the number of tasks currently running or waiting to run.
+ *
+ * Note that we don't take runq_lock / active_mutex here.  Reading
+ * a single 32bit value is atomic on powerpc, and we don't care
+ * about memory ordering issues here.
+ */
+static unsigned long count_active_contexts(void)
+{
+	int nr_active = 0, node;
+
+	for (node = 0; node < MAX_NUMNODES; node++)
+		nr_active += spu_prio->nr_active[node];
+	nr_active += spu_prio->nr_waiting;
+
+	return nr_active;
+}
+
+/**
+ * spu_calc_load - given tick count, update the avenrun load estimates.
+ * @tick:	tick count
+ *
+ * No locking against reading these values from userspace, as for
+ * the CPU loadavg code.
+ */
+static void spu_calc_load(unsigned long ticks)
+{
+	unsigned long active_tasks; /* fixed-point */
+	static int count = LOAD_FREQ;
+
+	count -= ticks;
+
+	if (unlikely(count < 0)) {
+		active_tasks = count_active_contexts() * FIXED_1;
+		do {
+			CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
+			CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
+			CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+			count += LOAD_FREQ;
+		} while (count < 0);
+	}
+}
+
 static void spusched_wake(unsigned long data)
 {
 	mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
 	wake_up_process(spusched_task);
+	spu_calc_load(SPUSCHED_TICK);
 }
 
 static int spusched_thread(void *unused)
@@ -598,13 +657,52 @@ static int spusched_thread(void *unused)
 	return 0;
 }
 
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int show_spu_loadavg(struct seq_file *s, void *private)
+{
+	int a, b, c;
+
+	a = spu_avenrun[0] + (FIXED_1/200);
+	b = spu_avenrun[1] + (FIXED_1/200);
+	c = spu_avenrun[2] + (FIXED_1/200);
+
+	/*
+	 * Note that last_pid doesn't really make much sense for the
+	 * SPU loadavg (it even seems very odd on the CPU side..),
+	 * but we include it here to have a 100% compatible interface.
+	 */
+	seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		count_active_contexts(),
+		atomic_read(&nr_spu_contexts),
+		current->nsproxy->pid_ns->last_pid);
+	return 0;
+}
+
+static int spu_loadavg_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_spu_loadavg, NULL);
+}
+
+static const struct file_operations spu_loadavg_fops = {
+	.open		= spu_loadavg_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 int __init spu_sched_init(void)
 {
-	int i;
+	struct proc_dir_entry *entry;
+	int err = -ENOMEM, i;
 
 	spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL);
 	if (!spu_prio)
-		return -ENOMEM;
+		goto out;
 
 	for (i = 0; i < MAX_PRIO; i++) {
 		INIT_LIST_HEAD(&spu_prio->runq[i]);
@@ -619,14 +717,25 @@ int __init spu_sched_init(void)
 
 	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
 	if (IS_ERR(spusched_task)) {
-		kfree(spu_prio);
-		return PTR_ERR(spusched_task);
+		err = PTR_ERR(spusched_task);
+		goto out_free_spu_prio;
 	}
 
+	entry = create_proc_entry("spu_loadavg", 0, NULL);
+	if (!entry)
+		goto out_stop_kthread;
+	entry->proc_fops = &spu_loadavg_fops;
+
 	pr_debug("spusched: tick: %d, min ticks: %d, default ticks: %d\n",
 			SPUSCHED_TICK, MIN_SPU_TIMESLICE, DEF_SPU_TIMESLICE);
 	return 0;
 
+ out_stop_kthread:
+	kthread_stop(spusched_task);
+ out_free_spu_prio:
+	kfree(spu_prio);
+ out:
+	return err;
 }
 
 void __exit spu_sched_exit(void)
@@ -634,6 +743,8 @@ void __exit spu_sched_exit(void)
 	struct spu *spu, *tmp;
 	int node;
 
+	remove_proc_entry("spu_loadavg", NULL);
+
 	kthread_stop(spusched_task);
 
 	for (node = 0; node < MAX_NUMNODES; node++) {
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 8ff16b4b6bd..7f5d0b2fdea 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -176,6 +176,7 @@ void spu_gang_add_ctx(struct spu_gang *gang, struct spu_context *ctx);
 int spufs_handle_class1(struct spu_context *ctx);
 
 /* context management */
+extern atomic_t nr_spu_contexts;
 static inline void spu_acquire(struct spu_context *ctx)
 {
 	mutex_lock(&ctx->state_mutex);
-- 
cgit v1.2.3


From e9f8a0b65ac716fd7974159240ce34bddea780b3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:58:03 +1000
Subject: [POWERPC] spufs: Add stat file to spufs

Export per-context statistics in spufs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/context.c |  2 +
 arch/powerpc/platforms/cell/spufs/fault.c   | 19 +++++--
 arch/powerpc/platforms/cell/spufs/file.c    | 79 +++++++++++++++++++++++++++++
 arch/powerpc/platforms/cell/spufs/run.c     |  4 ++
 arch/powerpc/platforms/cell/spufs/sched.c   | 19 ++++++-
 arch/powerpc/platforms/cell/spufs/spufs.h   | 51 +++++++++++++++++++
 6 files changed, 170 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index f623d963fdc..6d7bd60f538 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -59,6 +59,8 @@ struct spu_context *alloc_spu_context(struct spu_gang *gang)
 		spu_gang_add_ctx(gang, ctx);
 	ctx->cpus_allowed = current->cpus_allowed;
 	spu_set_timeslice(ctx);
+	ctx->stats.execution_state = SPUCTX_UTIL_USER;
+	ctx->stats.tstamp = jiffies;
 
 	atomic_inc(&nr_spu_contexts);
 	goto out;
diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 0f75c07e29d..3a9e49a24ec 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -33,7 +33,8 @@
  * function. Currently, there are a few corner cases that we haven't had
  * to handle fortunately.
  */
-static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea, unsigned long dsisr)
+static int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+		unsigned long dsisr, unsigned *flt)
 {
 	struct vm_area_struct *vma;
 	unsigned long is_write;
@@ -73,7 +74,8 @@ good_area:
 			goto bad_area;
 	}
 	ret = 0;
-	switch (handle_mm_fault(mm, vma, ea, is_write)) {
+	*flt = handle_mm_fault(mm, vma, ea, is_write);
+	switch (*flt) {
 	case VM_FAULT_MINOR:
 		current->min_flt++;
 		break;
@@ -153,6 +155,7 @@ int spufs_handle_class1(struct spu_context *ctx)
 {
 	u64 ea, dsisr, access;
 	unsigned long flags;
+	unsigned flt = 0;
 	int ret;
 
 	/*
@@ -178,9 +181,13 @@ int spufs_handle_class1(struct spu_context *ctx)
 	if (!(dsisr & (MFC_DSISR_PTE_NOT_FOUND | MFC_DSISR_ACCESS_DENIED)))
 		return 0;
 
+	spuctx_switch_state(ctx, SPUCTX_UTIL_IOWAIT);
+
 	pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea,
 		dsisr, ctx->state);
 
+	ctx->stats.hash_flt++;
+
 	/* we must not hold the lock when entering spu_handle_mm_fault */
 	spu_release(ctx);
 
@@ -192,7 +199,7 @@ int spufs_handle_class1(struct spu_context *ctx)
 
 	/* hashing failed, so try the actual fault handler */
 	if (ret)
-		ret = spu_handle_mm_fault(current->mm, ea, dsisr);
+		ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
 
 	spu_acquire(ctx);
 	/*
@@ -201,11 +208,17 @@ int spufs_handle_class1(struct spu_context *ctx)
 	 * In case of unhandled error report the problem to user space.
 	 */
 	if (!ret) {
+		if (flt == VM_FAULT_MINOR)
+			ctx->stats.min_flt++;
+		else
+			ctx->stats.maj_flt++;
+
 		if (ctx->spu)
 			ctx->ops->restart_dma(ctx);
 	} else
 		spufs_handle_dma_error(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
 
+	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(spufs_handle_class1);
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 2bb51ca51a6..30f7b077f34 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -2059,6 +2059,83 @@ static const struct file_operations spufs_tid_fops = {
 	.release	= single_release,
 };
 
+static const char *ctx_state_names[] = {
+	"user", "system", "iowait", "loaded"
+};
+
+static unsigned long long spufs_acct_time(struct spu_context *ctx,
+		enum spuctx_execution_state state)
+{
+	unsigned long time = ctx->stats.times[state];
+
+	if (ctx->stats.execution_state == state)
+		time += jiffies - ctx->stats.tstamp;
+
+	return jiffies_to_msecs(time);
+}
+
+static unsigned long long spufs_slb_flts(struct spu_context *ctx)
+{
+	unsigned long long slb_flts = ctx->stats.slb_flt;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		slb_flts += (ctx->spu->stats.slb_flt -
+			     ctx->stats.slb_flt_base);
+	}
+
+	return slb_flts;
+}
+
+static unsigned long long spufs_class2_intrs(struct spu_context *ctx)
+{
+	unsigned long long class2_intrs = ctx->stats.class2_intr;
+
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		class2_intrs += (ctx->spu->stats.class2_intr -
+				 ctx->stats.class2_intr_base);
+	}
+
+	return class2_intrs;
+}
+
+
+static int spufs_show_stat(struct seq_file *s, void *private)
+{
+	struct spu_context *ctx = s->private;
+
+	spu_acquire(ctx);
+	seq_printf(s, "%s %llu %llu %llu %llu "
+		      "%llu %llu %llu %llu %llu %llu %llu %llu\n",
+		ctx_state_names[ctx->stats.execution_state],
+		spufs_acct_time(ctx, SPUCTX_UTIL_USER),
+		spufs_acct_time(ctx, SPUCTX_UTIL_SYSTEM),
+		spufs_acct_time(ctx, SPUCTX_UTIL_IOWAIT),
+		spufs_acct_time(ctx, SPUCTX_UTIL_LOADED),
+		ctx->stats.vol_ctx_switch,
+		ctx->stats.invol_ctx_switch,
+		spufs_slb_flts(ctx),
+		ctx->stats.hash_flt,
+		ctx->stats.min_flt,
+		ctx->stats.maj_flt,
+		spufs_class2_intrs(ctx),
+		ctx->stats.libassist);
+	spu_release(ctx);
+	return 0;
+}
+
+static int spufs_stat_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, spufs_show_stat, SPUFS_I(inode)->i_ctx);
+}
+
+static const struct file_operations spufs_stat_fops = {
+	.open		= spufs_stat_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+
 struct tree_descr spufs_dir_contents[] = {
 	{ "capabilities", &spufs_caps_fops, 0444, },
 	{ "mem",  &spufs_mem_fops,  0666, },
@@ -2093,6 +2170,7 @@ struct tree_descr spufs_dir_contents[] = {
 	{ "dma_info", &spufs_dma_info_fops, 0444, },
 	{ "proxydma_info", &spufs_proxydma_info_fops, 0444, },
 	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
 	{},
 };
 
@@ -2117,6 +2195,7 @@ struct tree_descr spufs_dir_nosched_contents[] = {
 	{ "phys-id", &spufs_id_ops, 0666, },
 	{ "object-id", &spufs_object_id_ops, 0666, },
 	{ "tid", &spufs_tid_fops, 0444, },
+	{ "stat", &spufs_stat_fops, 0444, },
 	{},
 };
 
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 4e0db6ae0d5..c69f63dd5f0 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -351,6 +351,10 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 				      SPU_STATUS_STOPPED_BY_HALT |
 				       SPU_STATUS_SINGLE_STEP)));
 
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    ((status >> SPU_STOP_STATUS_SHIFT) & 0x2100))
+		ctx->stats.libassist++;
+
 	ctx->ops->master_stop(ctx);
 	ret = spu_run_fini(ctx, npc, &status);
 	spu_yield(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9fc09306c9a..bb16c22360d 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -229,6 +229,10 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 {
 	pr_debug("%s: pid=%d SPU=%d NODE=%d\n", __FUNCTION__, current->pid,
 		 spu->number, spu->node);
+
+	ctx->stats.slb_flt_base = spu->stats.slb_flt;
+	ctx->stats.class2_intr_base = spu->stats.class2_intr;
+
 	spu->ctx = ctx;
 	spu->flags = 0;
 	ctx->spu = spu;
@@ -275,6 +279,11 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 	ctx->spu = NULL;
 	spu->flags = 0;
 	spu->ctx = NULL;
+
+	ctx->stats.slb_flt +=
+		(spu->stats.slb_flt - ctx->stats.slb_flt_base);
+	ctx->stats.class2_intr +=
+		(spu->stats.class2_intr - ctx->stats.class2_intr_base);
 }
 
 /**
@@ -400,6 +409,7 @@ static struct spu *find_victim(struct spu_context *ctx)
 			}
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, victim);
+			victim->stats.invol_ctx_switch++;
 			mutex_unlock(&victim->state_mutex);
 			/*
 			 * We need to break out of the wait loop in spu_run
@@ -425,6 +435,7 @@ static struct spu *find_victim(struct spu_context *ctx)
  */
 int spu_activate(struct spu_context *ctx, unsigned long flags)
 {
+	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
 
 	if (ctx->spu)
 		return 0;
@@ -492,6 +503,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
 		if (new || force) {
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
+			ctx->stats.vol_ctx_switch++;
 			spu_free(spu);
 			if (new)
 				wake_up(&new->stop_wq);
@@ -521,6 +533,7 @@ void spu_deactivate(struct spu_context *ctx)
 	}
 
 	__spu_deactivate(ctx, 1, MAX_PRIO);
+	spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
 }
 
 /**
@@ -535,7 +548,10 @@ void spu_yield(struct spu_context *ctx)
 {
 	if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
 		mutex_lock(&ctx->state_mutex);
-		__spu_deactivate(ctx, 0, MAX_PRIO);
+		if (__spu_deactivate(ctx, 0, MAX_PRIO))
+			spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
+		else
+			spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
 		mutex_unlock(&ctx->state_mutex);
 	}
 }
@@ -564,6 +580,7 @@ static void spusched_tick(struct spu_context *ctx)
 
 			__spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
+			ctx->stats.invol_ctx_switch++;
 			spu_free(spu);
 			wake_up(&new->stop_wq);
 			/*
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 7f5d0b2fdea..cd2b54f6e37 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -40,6 +40,19 @@ enum {
 struct spu_context_ops;
 struct spu_gang;
 
+/*
+ * This is the state for spu utilization reporting to userspace.
+ * Because this state is visible to userspace it must never change and needs
+ * to be kept strictly separate from any internal state kept by the kernel.
+ */
+enum spuctx_execution_state {
+	SPUCTX_UTIL_USER = 0,
+	SPUCTX_UTIL_SYSTEM,
+	SPUCTX_UTIL_IOWAIT,
+	SPUCTX_UTIL_LOADED,
+	SPUCTX_UTIL_MAX
+};
+
 struct spu_context {
 	struct spu *spu;		  /* pointer to a physical SPU */
 	struct spu_state csa;		  /* SPU context save area. */
@@ -87,6 +100,24 @@ struct spu_context {
 	cpumask_t cpus_allowed;
 	int policy;
 	int prio;
+
+	/* statistics */
+	struct {
+		/* updates protected by ctx->state_mutex */
+		enum spuctx_execution_state execution_state;
+		unsigned long tstamp;		/* time of last ctx switch */
+		unsigned long times[SPUCTX_UTIL_MAX];
+		unsigned long long vol_ctx_switch;
+		unsigned long long invol_ctx_switch;
+		unsigned long long min_flt;
+		unsigned long long maj_flt;
+		unsigned long long hash_flt;
+		unsigned long long slb_flt;
+		unsigned long long slb_flt_base; /* # at last ctx switch */
+		unsigned long long class2_intr;
+		unsigned long long class2_intr_base; /* # at last ctx switch */
+		unsigned long long libassist;
+	} stats;
 };
 
 struct spu_gang {
@@ -256,4 +287,24 @@ struct spufs_coredump_reader {
 extern struct spufs_coredump_reader spufs_coredump_read[];
 extern int spufs_coredump_num_notes;
 
+/*
+ * This function is a little bit too large for an inline, but
+ * as fault.c is built into the kernel we can't move it out of
+ * line.
+ */
+static inline void spuctx_switch_state(struct spu_context *ctx,
+		enum spuctx_execution_state new_state)
+{
+	WARN_ON(!mutex_is_locked(&ctx->state_mutex));
+
+	if (ctx->stats.execution_state != new_state) {
+		unsigned long curtime = jiffies;
+
+		ctx->stats.times[ctx->stats.execution_state] +=
+				 curtime - ctx->stats.tstamp;
+		ctx->stats.tstamp = curtime;
+		ctx->stats.execution_state = new_state;
+	}
+}
+
 #endif
-- 
cgit v1.2.3


From 08c9692b168240729cf89c69c4ad722760a5690c Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Fri, 29 Jun 2007 10:58:04 +1000
Subject: [POWERPC] spufs: Fix libassist accounting

We're currently too permissive with counting libassist calls - fix the
check on the SPE stop-and-signal status.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/run.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index c69f63dd5f0..05cf815dbda 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -352,7 +352,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 				       SPU_STATUS_SINGLE_STEP)));
 
 	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
-	    ((status >> SPU_STOP_STATUS_SHIFT) & 0x2100))
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
 		ctx->stats.libassist++;
 
 	ctx->ops->master_stop(ctx);
-- 
cgit v1.2.3


From c77239b8be74f775142d9dd01041e2ce864ba20d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:58:05 +1000
Subject: [POWERPC] spusched: Disable tick when not needed

Only enable the scheduler tick if we have any context waiting to be
scheduled.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index bb16c22360d..2fb0e63344c 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -294,9 +294,10 @@ static void __spu_add_to_rq(struct spu_context *ctx)
 {
 	int prio = ctx->prio;
 
-	spu_prio->nr_waiting++;
 	list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
 	set_bit(prio, spu_prio->bitmap);
+	if (!spu_prio->nr_waiting++)
+		__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
 }
 
 static void __spu_del_from_rq(struct spu_context *ctx)
@@ -304,11 +305,13 @@ static void __spu_del_from_rq(struct spu_context *ctx)
 	int prio = ctx->prio;
 
 	if (!list_empty(&ctx->rq)) {
+		if (!--spu_prio->nr_waiting)
+			del_timer(&spusched_timer);
 		list_del_init(&ctx->rq);
-		spu_prio->nr_waiting--;
+
+		if (list_empty(&spu_prio->runq[prio]))
+			clear_bit(prio, spu_prio->bitmap);
 	}
-	if (list_empty(&spu_prio->runq[prio]))
-		clear_bit(prio, spu_prio->bitmap);
 }
 
 static void spu_prio_wait(struct spu_context *ctx)
@@ -654,9 +657,6 @@ static int spusched_thread(void *unused)
 	struct spu *spu, *next;
 	int node;
 
-	setup_timer(&spusched_timer, spusched_wake, 0);
-	__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
-
 	while (!kthread_should_stop()) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule();
@@ -670,7 +670,6 @@ static int spusched_thread(void *unused)
 		}
 	}
 
-	del_timer_sync(&spusched_timer);
 	return 0;
 }
 
@@ -732,6 +731,8 @@ int __init spu_sched_init(void)
 	}
 	spin_lock_init(&spu_prio->runq_lock);
 
+	setup_timer(&spusched_timer, spusched_wake, 0);
+
 	spusched_task = kthread_run(spusched_thread, NULL, "spusched");
 	if (IS_ERR(spusched_task)) {
 		err = PTR_ERR(spusched_task);
@@ -762,6 +763,7 @@ void __exit spu_sched_exit(void)
 
 	remove_proc_entry("spu_loadavg", NULL);
 
+	del_timer_sync(&spusched_timer);
 	kthread_stop(spusched_task);
 
 	for (node = 0; node < MAX_NUMNODES; node++) {
-- 
cgit v1.2.3


From 27449971e6907ff38bde7bbc4647e55bd7309fc3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:58:06 +1000
Subject: [POWERPC] spusched: Fix runqueue corruption

spu_activate can be called from multiple threads at the same time on
behalf of the same spu context.  We need to make sure to only add it
once to avoid runqueue corruption.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/sched.c | 37 +++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2fb0e63344c..9fb3133268f 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -292,12 +292,25 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
  */
 static void __spu_add_to_rq(struct spu_context *ctx)
 {
-	int prio = ctx->prio;
-
-	list_add_tail(&ctx->rq, &spu_prio->runq[prio]);
-	set_bit(prio, spu_prio->bitmap);
-	if (!spu_prio->nr_waiting++)
-		__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	/*
+	 * Unfortunately this code path can be called from multiple threads
+	 * on behalf of a single context due to the way the problem state
+	 * mmap support works.
+	 *
+	 * Fortunately we need to wake up all these threads at the same time
+	 * and can simply skip the runqueue addition for every but the first
+	 * thread getting into this codepath.
+	 *
+	 * It's still quite hacky, and long-term we should proxy all other
+	 * threads through the owner thread so that spu_run is in control
+	 * of all the scheduling activity for a given context.
+	 */
+	if (list_empty(&ctx->rq)) {
+		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+		set_bit(ctx->prio, spu_prio->bitmap);
+		if (!spu_prio->nr_waiting++)
+			__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+	}
 }
 
 static void __spu_del_from_rq(struct spu_context *ctx)
@@ -440,12 +453,18 @@ int spu_activate(struct spu_context *ctx, unsigned long flags)
 {
 	spuctx_switch_state(ctx, SPUCTX_UTIL_SYSTEM);
 
-	if (ctx->spu)
-		return 0;
-
 	do {
 		struct spu *spu;
 
+		/*
+		 * If there are multiple threads waiting for a single context
+		 * only one actually binds the context while the others will
+		 * only be able to acquire the state_mutex once the context
+		 * already is in runnable state.
+		 */
+		if (ctx->spu)
+			return 0;
+
 		spu = spu_get_idle(ctx);
 		/*
 		 * If this is a realtime thread we try to get it running by
-- 
cgit v1.2.3


From fe2f896d67b89a409c366c9a69e30291ab124467 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 Jun 2007 10:58:07 +1000
Subject: [POWERPC] spufs: Add spu stats in sysfs

Export spu statistics in sysfs.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd.bergmann@de.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/fault.c | 10 ++++++++++
 arch/powerpc/platforms/cell/spufs/run.c   |  3 ++-
 arch/powerpc/platforms/cell/spufs/sched.c | 10 +++++++++-
 arch/powerpc/platforms/cell/spufs/spufs.h | 13 +++++++++++++
 4 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index 3a9e49a24ec..e064d0c0d80 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -187,6 +187,10 @@ int spufs_handle_class1(struct spu_context *ctx)
 		dsisr, ctx->state);
 
 	ctx->stats.hash_flt++;
+	if (ctx->state == SPU_STATE_RUNNABLE) {
+		ctx->spu->stats.hash_flt++;
+		spu_switch_state(ctx->spu, SPU_UTIL_IOWAIT);
+	}
 
 	/* we must not hold the lock when entering spu_handle_mm_fault */
 	spu_release(ctx);
@@ -212,6 +216,12 @@ int spufs_handle_class1(struct spu_context *ctx)
 			ctx->stats.min_flt++;
 		else
 			ctx->stats.maj_flt++;
+		if (ctx->state == SPU_STATE_RUNNABLE) {
+			if (flt == VM_FAULT_MINOR)
+				ctx->spu->stats.min_flt++;
+			else
+				ctx->spu->stats.maj_flt++;
+		}
 
 		if (ctx->spu)
 			ctx->ops->restart_dma(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 05cf815dbda..58ae13b7de8 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -352,7 +352,8 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx,
 				       SPU_STATUS_SINGLE_STEP)));
 
 	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
-	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) &&
+	    (ctx->state == SPU_STATE_RUNNABLE))
 		ctx->stats.libassist++;
 
 	ctx->ops->master_stop(ctx);
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 9fb3133268f..e5b4dd1db28 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -251,6 +251,7 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx)
 	spu_cpu_affinity_set(spu, raw_smp_processor_id());
 	spu_switch_notify(spu, ctx);
 	ctx->state = SPU_STATE_RUNNABLE;
+	spu_switch_state(spu, SPU_UTIL_SYSTEM);
 }
 
 /**
@@ -263,6 +264,8 @@ static void spu_unbind_context(struct spu *spu, struct spu_context *ctx)
 	pr_debug("%s: unbind pid=%d SPU=%d NODE=%d\n", __FUNCTION__,
 		 spu->pid, spu->number, spu->node);
 
+	spu_switch_state(spu, SPU_UTIL_IDLE);
+
 	spu_switch_notify(spu, NULL);
 	spu_unmap_mappings(ctx);
 	spu_save(&ctx->csa, spu);
@@ -426,6 +429,7 @@ static struct spu *find_victim(struct spu_context *ctx)
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, victim);
 			victim->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
 			mutex_unlock(&victim->state_mutex);
 			/*
 			 * We need to break out of the wait loop in spu_run
@@ -526,6 +530,7 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
 			spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
 			ctx->stats.vol_ctx_switch++;
+			spu->stats.vol_ctx_switch++;
 			spu_free(spu);
 			if (new)
 				wake_up(&new->stop_wq);
@@ -572,8 +577,10 @@ void spu_yield(struct spu_context *ctx)
 		mutex_lock(&ctx->state_mutex);
 		if (__spu_deactivate(ctx, 0, MAX_PRIO))
 			spuctx_switch_state(ctx, SPUCTX_UTIL_USER);
-		else
+		else {
 			spuctx_switch_state(ctx, SPUCTX_UTIL_LOADED);
+			spu_switch_state(ctx->spu, SPU_UTIL_USER);
+		}
 		mutex_unlock(&ctx->state_mutex);
 	}
 }
@@ -603,6 +610,7 @@ static void spusched_tick(struct spu_context *ctx)
 			__spu_remove_from_active_list(spu);
 			spu_unbind_context(spu, ctx);
 			ctx->stats.invol_ctx_switch++;
+			spu->stats.invol_ctx_switch++;
 			spu_free(spu);
 			wake_up(&new->stop_wq);
 			/*
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index cd2b54f6e37..08b3530288a 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -307,4 +307,17 @@ static inline void spuctx_switch_state(struct spu_context *ctx,
 	}
 }
 
+static inline void spu_switch_state(struct spu *spu,
+		enum spuctx_execution_state new_state)
+{
+	if (spu->stats.utilization_state != new_state) {
+		unsigned long curtime = jiffies;
+
+		spu->stats.times[spu->stats.utilization_state] +=
+				 curtime - spu->stats.tstamp;
+		spu->stats.tstamp = curtime;
+		spu->stats.utilization_state = new_state;
+	}
+}
+
 #endif
-- 
cgit v1.2.3


From 933b0e35247ef0dbd1a078a0ba3705ddbbda129f Mon Sep 17 00:00:00 2001
From: Kazunori Asayama <asayama@sm.sony.co.jp>
Date: Fri, 29 Jun 2007 10:58:08 +1000
Subject: [POWERPC] spufs: Fix lost events in poll/epoll on mfc

When waiting for I/O events on mfc in an SPU context by using
poll/epoll syscalls, some of the events can be lost because of wrong
order of poll_wait and MFC status checks in the spufs_mfc_poll
function and non-atomic update of tagwait.  This fixes the
problem.

Signed-off-by: Kazunori Asayama <asayama@sm.sony.co.jp>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/file.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 30f7b077f34..c2814ea96af 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -1499,14 +1499,15 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
 		if (status)
 			ret = status;
 	}
-	spu_release(ctx);
 
 	if (ret)
-		goto out;
+		goto out_unlock;
 
 	ctx->tagwait |= 1 << cmd.tag;
 	ret = size;
 
+out_unlock:
+	spu_release(ctx);
 out:
 	return ret;
 }
@@ -1517,14 +1518,14 @@ static unsigned int spufs_mfc_poll(struct file *file,poll_table *wait)
 	u32 free_elements, tagstatus;
 	unsigned int mask;
 
+	poll_wait(file, &ctx->mfc_wq, wait);
+
 	spu_acquire(ctx);
 	ctx->ops->set_mfc_query(ctx, ctx->tagwait, 2);
 	free_elements = ctx->ops->get_mfc_free_elements(ctx);
 	tagstatus = ctx->ops->read_mfc_tagstatus(ctx);
 	spu_release(ctx);
 
-	poll_wait(file, &ctx->mfc_wq, wait);
-
 	mask = 0;
 	if (free_elements & 0xffff)
 		mask |= POLLOUT | POLLWRNORM;
-- 
cgit v1.2.3


From 8d038e0433e3164e460c2daeca1ec6947a08f81a Mon Sep 17 00:00:00 2001
From: Kazunori Asayama <asayama@sm.sony.co.jp>
Date: Fri, 29 Jun 2007 10:58:09 +1000
Subject: [POWERPC] spufs: Save dma_tagstatus_R in CSA

The function backing_ops->read_mfc_tagstatus() doesn't return a
correct value because the dma_tagstatus_R register isn't saved in
CSA.  This fixes the problem.

Signed-off-by: Kazunori Asayama <asayama@sm.sony.co.jp>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/backing_ops.c |  6 ++++++
 arch/powerpc/platforms/cell/spufs/switch.c      | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'arch/powerpc/platforms/cell/spufs')

diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index d32db9ffc6e..07a0e815abf 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -320,6 +320,12 @@ static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask,
 	/* FIXME: what are the side-effects of this? */
 	prob->dma_querymask_RW = mask;
 	prob->dma_querytype_RW = mode;
+	/* In the current implementation, the SPU context is always
+	 * acquired in runnable state when new bits are added to the
+	 * mask (tagwait), so it's sufficient just to mask
+	 * dma_tagstatus_R with the 'mask' parameter here.
+	 */
+	ctx->csa.prob.dma_tagstatus_R &= mask;
 out:
 	spin_unlock(&ctx->csa.register_lock);
 
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 881485847ac..9c506ba08cd 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -387,6 +387,19 @@ static inline void save_ppu_querytype(struct spu_state *csa, struct spu *spu)
 	csa->prob.dma_querytype_RW = in_be32(&prob->dma_querytype_RW);
 }
 
+static inline void save_ppu_tagstatus(struct spu_state *csa, struct spu *spu)
+{
+	struct spu_problem __iomem *prob = spu->problem;
+
+	/* Save the Prxy_TagStatus register in the CSA.
+	 *
+	 * It is unnecessary to restore dma_tagstatus_R, however,
+	 * dma_tagstatus_R in the CSA is accessed via backing_ops, so
+	 * we must save it.
+	 */
+	csa->prob.dma_tagstatus_R = in_be32(&prob->dma_tagstatus_R);
+}
+
 static inline void save_mfc_csr_tsq(struct spu_state *csa, struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
@@ -1812,6 +1825,7 @@ static void save_csa(struct spu_state *prev, struct spu *spu)
 	save_mfc_queues(prev, spu);	/* Step 19. */
 	save_ppu_querymask(prev, spu);	/* Step 20. */
 	save_ppu_querytype(prev, spu);	/* Step 21. */
+	save_ppu_tagstatus(prev, spu);  /* NEW.     */
 	save_mfc_csr_tsq(prev, spu);	/* Step 22. */
 	save_mfc_csr_cmd(prev, spu);	/* Step 23. */
 	save_mfc_csr_ato(prev, spu);	/* Step 24. */
-- 
cgit v1.2.3